๐Ÿ”ด ๋ฌธ์ œ 1: YUVโ†’RGB ๋ณ€ํ™˜ ์ตœ์ ํ™” ๋ฐฉ์•ˆ ๋ฐฉ์•ˆ 1: ๋ฃฉ์—… ํ…Œ์ด๋ธ” + ๋ผ์ธ ๋‹จ์œ„ ์ฒ˜๋ฆฌ // ์ดˆ๊ธฐํ™” ์‹œ ํ•œ ๋ฒˆ๋งŒ ๊ณ„์‚ฐํ•˜๋Š” ๋ฃฉ์—… ํ…Œ์ด๋ธ” static int yuv_table_r[256]; // V โ†’ R ๋ณ€ํ™˜ ํ…Œ์ด๋ธ” static int yuv_table_g_u[256]; // U โ†’ G ๋ณ€ํ™˜ ํ…Œ์ด๋ธ” static int yuv_table_g_v[256]; // V โ†’ G ๋ณ€ํ™˜ ํ…Œ์ด๋ธ” static int yuv_table_b[256]; // U โ†’ B ๋ณ€ํ™˜ ํ…Œ์ด๋ธ” // ์ตœ์ ํ™”๋œ ๋ณ€ํ™˜ ํ•จ์ˆ˜ void ConvertYUV420PToRGB24_Optimized() { for (uint32_t y = 0; y < height; ++y) { const uint8_t* y_row = y_plane + (y * y_stride); const uint8_t* u_row = u_plane + ((y / 2) * u_stride); const uint8_t* v_row = v_plane + ((y / 2) * v_stride); uint8_t* rgb_row = rgb_data + (y * rgb_stride); // ๋ผ์ธ ๋‹จ์œ„๋กœ 2ํ”ฝ์…€์”ฉ ์ฒ˜๋ฆฌ (YUV420P ํŠน์„ฑ ํ™œ์šฉ) for (uint32_t x = 0; x < width; x += 2) { uint8_t U = u_row[x / 2]; uint8_t V = v_row[x / 2]; // ๋ฃฉ์—… ํ…Œ์ด๋ธ”๋กœ ๋ณ€ํ™˜ ๊ณ„์ˆ˜ ์กฐํšŒ (๊ณฑ์…ˆ ๋Œ€์‹ ) int r_offset = yuv_table_r[V]; int g_offset = yuv_table_g_u[U] + yuv_table_g_v[V]; int b_offset = yuv_table_b[U]; // 2ํ”ฝ์…€ ๋™์‹œ ์ฒ˜๋ฆฌ for (int px = 0; px < 2 && (x + px) < width; ++px) { int Y = y_row[x + px] - 16; Y = (Y * 298) >> 8; // ๋น„ํŠธ ์‹œํ”„ํŠธ๋กœ ๋‚˜๋ˆ—์…ˆ ๋Œ€์ฒด rgb_row[(x + px) * 3 + 0] = clamp(Y + r_offset); rgb_row[(x + px) * 3 + 1] = clamp(Y + g_offset); rgb_row[(x + px) * 3 + 2] = clamp(Y + b_offset); } } } } ์„ฑ๋Šฅ ํ–ฅ์ƒ: ๊ธฐ์กด ๋Œ€๋น„ 3-4๋ฐฐ ๋น ๋ฆ„ (๋ฃฉ์—… ํ…Œ์ด๋ธ” + ๋ผ์ธ ์ตœ์ ํ™”) ๋ฐฉ์•ˆ 2: SIMD (SSE/AVX) ๋ฒกํ„ฐํ™” #include // AVX2 void ConvertYUV420PToRGB24_SIMD() { for (uint32_t y = 0; y < height; ++y) { // 8ํ”ฝ์…€์„ ํ•œ ๋ฒˆ์— ์ฒ˜๋ฆฌ (AVX2 256bit) for (uint32_t x = 0; x < width; x += 8) { // Y๊ฐ’ 8๊ฐœ ๋กœ๋“œ __m256i y_vec = _mm256_loadu_si256((__m256i*)(y_plane + y * y_stride + x)); // U, V๊ฐ’ 4๊ฐœ์”ฉ ๋กœ๋“œ ํ›„ ์ค‘๋ณต ํ™•์žฅ __m128i uv_4 = _mm_loadu_si128((__m128i*)(u_plane + (y/2) * u_stride + x/2)); __m256i u_vec = _mm256_unpacklo_epi8(uv_4, uv_4); // ์ค‘๋ณต ํ™•์žฅ // ๋ฒกํ„ฐํ™”๋œ YUVโ†’RGB ๋ณ€ํ™˜ __m256i r_vec = yuv_to_rgb_simd(y_vec, u_vec, v_vec, 0); __m256i g_vec = yuv_to_rgb_simd(y_vec, u_vec, v_vec, 1); __m256i b_vec = yuv_to_rgb_simd(y_vec, u_vec, v_vec, 2); // ์ธํ„ฐ๋ฆฌ๋ธŒํ•ด์„œ RGB24 ํ˜•ํƒœ๋กœ ์ €์žฅ store_rgb24_simd(rgb_data + y * rgb_stride + x * 3, r_vec, g_vec, b_vec); } } } ์„ฑ๋Šฅ ํ–ฅ์ƒ: ๊ธฐ์กด ๋Œ€๋น„ 8-10๋ฐฐ ๋น ๋ฆ„ (SIMD ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ) ๋ฐฉ์•ˆ 3: ์™ธ๋ถ€ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ // Intel IPP ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ #include void ConvertYUV420PToRGB24_IPP() { IppiSize roi = {width, height}; const Ipp8u* pSrc[3] = {y_plane, u_plane, v_plane}; int srcStep[3] = {y_stride, u_stride, v_stride}; // ํ•œ ์ค„๋กœ ๊ณ ์„ฑ๋Šฅ ๋ณ€ํ™˜ ippiYUV420ToRGB_8u_P3C3R(pSrc, srcStep, rgb_data, rgb_stride, roi); } ์„ฑ๋Šฅ ํ–ฅ์ƒ: ๊ธฐ์กด ๋Œ€๋น„ 10-15๋ฐฐ ๋น ๋ฆ„ (์ตœ๊ณ  ์ตœ์ ํ™”) --- ๐Ÿ”ด ๋ฌธ์ œ 2: BMP ํŒŒ์ผ ์ €์žฅ ์ตœ์ ํ™” ๋ฐฉ์•ˆ ๋ฐฉ์•ˆ 1: ๋ธ”๋ก ๋‹จ์œ„ ์“ฐ๊ธฐ + RGBโ†’BGR ๋ณ€ํ™˜ ๋ถ„๋ฆฌ void SaveAsBMP_Optimized() { // 1. RGBโ†’BGR ๋ณ€ํ™˜์„ ๋ฉ”๋ชจ๋ฆฌ์—์„œ ๋จผ์ € ์ˆ˜ํ–‰ std::vector bgr_buffer(rgb_frame.height * padded_row_size); for (uint32_t y = 0; y < rgb_frame.height; ++y) { const uint8_t* src_row = rgb_frame.data.data() + (y * rgb_frame.stride); uint8_t* dst_row = bgr_buffer.data() + (y * padded_row_size); // ๋ผ์ธ ๋‹จ์œ„๋กœ RGBโ†’BGR ๋ณ€ํ™˜ (ํŒจ๋”ฉ ํฌํ•จ) for (uint32_t x = 0; x < rgb_frame.width; ++x) { dst_row[x * 3 + 0] = src_row[x * 3 + 2]; // B dst_row[x * 3 + 1] = src_row[x * 3 + 1]; // G dst_row[x * 3 + 2] = src_row[x * 3 + 0]; // R } // ํŒจ๋”ฉ ์˜์—ญ 0์œผ๋กœ ์ฑ„์›€ memset(dst_row + rgb_frame.width * 3, 0, padding); } // 2. ํ•œ ๋ฒˆ์— ๋ธ”๋ก ์“ฐ๊ธฐ (621๋งŒ ๋ฒˆ โ†’ 1080๋ฒˆ) for (int32_t y = rgb_frame.height - 1; y >= 0; --y) { const uint8_t* row_data = bgr_buffer.data() + (y * padded_row_size); file.write(reinterpret_cast(row_data), padded_row_size); } } ์„ฑ๋Šฅ ํ–ฅ์ƒ: ๊ธฐ์กด ๋Œ€๋น„ 50-100๋ฐฐ ๋น ๋ฆ„ ๋ฐฉ์•ˆ 2: ๋ฉ”๋ชจ๋ฆฌ ๋งคํ•‘ ํŒŒ์ผ I/O void SaveAsBMP_MemoryMapped() { // Windows ๋ฉ”๋ชจ๋ฆฌ ๋งคํ•‘ HANDLE hFile = CreateFile(file_path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); HANDLE hMapping = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, total_file_size, NULL); uint8_t* mapped_memory = static_cast(MapViewOfFile(hMapping, FILE_MAP_WRITE, 0, 0, 0)); // ๋ฉ”๋ชจ๋ฆฌ์— ์ง์ ‘ ์“ฐ๊ธฐ (ํŒŒ์ผ I/O ์—†์Œ) memcpy(mapped_memory, &header, sizeof(header)); for (int32_t y = rgb_frame.height - 1; y >= 0; --y) { uint8_t* dst = mapped_memory + header_size + (rgb_frame.height - 1 - y) * padded_row_size; const uint8_t* src = rgb_frame.data.data() + (y * rgb_frame.stride); // RGBโ†’BGR ๋ณ€ํ™˜ํ•˜๋ฉด์„œ ์ง์ ‘ ๋ณต์‚ฌ convert_rgb_to_bgr_line(src, dst, rgb_frame.width); } UnmapViewOfFile(mapped_memory); CloseHandle(hMapping); CloseHandle(hFile); } ์„ฑ๋Šฅ ํ–ฅ์ƒ: ๊ธฐ์กด ๋Œ€๋น„ 100-200๋ฐฐ ๋น ๋ฆ„ ๋ฐฉ์•ˆ 3: SIMD๋ฅผ ์ด์šฉํ•œ RGBโ†’BGR ๋ณ€ํ™˜ void ConvertRGBToBGR_SIMD(const uint8_t* rgb, uint8_t* bgr, size_t pixel_count) { const __m256i shuffle_mask = _mm256_setr_epi8( 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, -1, 2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, -1 ); for (size_t i = 0; i < pixel_count; i += 32) { // 32๋ฐ”์ดํŠธ์”ฉ ์ฒ˜๋ฆฌ __m256i rgb_data = _mm256_loadu_si256((__m256i*)(rgb + i)); __m256i bgr_data = _mm256_shuffle_epi8(rgb_data, shuffle_mask); _mm256_storeu_si256((__m256i*)(bgr + i), bgr_data); } } ์„ฑ๋Šฅ ํ–ฅ์ƒ: ๋ณ€ํ™˜ ๋ถ€๋ถ„๋งŒ 5-8๋ฐฐ ๋น ๋ฆ„ --- ๐Ÿ“Š ์ข…ํ•ฉ ์„ฑ๋Šฅ ๊ฐœ์„  ํšจ๊ณผ ์˜ˆ์ƒ | ๋ฐฉ์•ˆ | YUVโ†’RGB | BMP ์ €์žฅ | ์ „์ฒด ํšจ๊ณผ | |--------------|---------|--------|-----------| | ํ˜„์žฌ | 150ms | 200ms | 350ms/ํ”„๋ ˆ์ž„ | | ๋ฃฉ์—…ํ…Œ์ด๋ธ” + ๋ธ”๋ก์“ฐ๊ธฐ | 40ms | 4ms | 44ms/ํ”„๋ ˆ์ž„ | | SIMD + ๋ฉ”๋ชจ๋ฆฌ๋งคํ•‘ | 15ms | 2ms | 17ms/ํ”„๋ ˆ์ž„ | | ์™ธ๋ถ€๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ(IPP) | 10ms | 2ms | 12ms/ํ”„๋ ˆ์ž„ | ๊ฒฐ๋ก : ํ˜„์žฌ 2.8fps โ†’ 30fps ์‹ค์‹œ๊ฐ„ ์žฌ์ƒ ๊ฐ€๋Šฅ ์–ด๋–ค ๋ฐฉ์•ˆ๋ถ€ํ„ฐ ๊ตฌํ˜„ํ•ด๋ณด์‹œ๊ฒ ์Šต๋‹ˆ๊นŒ?