diff --git a/test/ref/repack.txt b/test/ref/repack.txt new file mode 100644 index 0000000000..3946a6eb47 --- /dev/null +++ b/test/ref/repack.txt @@ -0,0 +1,163 @@ +0bgr => [pa] [un] gbrp | a=1:1 [tu] [tp] +0rgb => [pa] [un] gbrp | a=1:1 [tu] [tp] +abgr => [pa] [un] gbrap | a=1:1 [tu] [tp] +argb => [pa] [un] gbrap | a=1:1 [tu] [tp] +ayuv64 => [pa] [un] yuva444p16 | a=1:1 [tu] [tp] +ayuv64be => [pa] [un] yuva444p16 | a=1:1 [tu] [tp] +bayer_bggr16 => no +bayer_bggr16be => no +bayer_bggr8 => no +bayer_gbrg16 => no +bayer_gbrg16be => no +bayer_gbrg8 => no +bayer_grbg16 => no +bayer_grbg16be => no +bayer_grbg8 => no +bayer_rggb16 => no +bayer_rggb16be => no +bayer_rggb8 => no +bgr0 => [pa] [un] gbrp | a=1:1 [tu] [tp] +bgr24 => [pa] [un] gbrp | a=1:1 +bgr4 => no +bgr444 => [pa] [un] gbrp4 | a=1:1 +bgr444 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr444be => [pa] [un] gbrp4 | a=1:1 +bgr444be => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr48 => [pa] [un] gbrp16 | a=1:1 +bgr48be => [pa] [un] gbrp16 | a=1:1 +bgr4_byte => [pa] [un] gbrp2 | a=1:1 +bgr4_byte => [pa] [un] gbrp1 | a=1:1 [round-down] +bgr4_byte => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr555 => [pa] [un] gbrp5 | a=1:1 +bgr555 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr555be => [pa] [un] gbrp5 | a=1:1 +bgr555be => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr565 => [pa] [un] gbrp6 | a=1:1 +bgr565 => [pa] [un] gbrp5 | a=1:1 [round-down] +bgr565 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr565be => [pa] [un] gbrp6 | a=1:1 +bgr565be => [pa] [un] gbrp5 | a=1:1 [round-down] +bgr565be => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgr8 => [pa] [un] gbrp3 | a=1:1 +bgr8 => [pa] [un] gbrp2 | a=1:1 [round-down] +bgr8 => [pa] [un] gbrp | a=1:1 [expand-8bit] +bgra => [pa] [un] gbrap | a=1:1 [tu] [tp] +bgra64 => [pa] [un] gbrap16 | a=1:1 +bgra64be => [pa] [un] gbrap16 | a=1:1 +cuda => no +d3d11 => no +d3d11va_vld => no +drm_prime => no +dxva2_vld => no +gbrap10be => [pa] [un] gbrap10 | a=1:1 +gbrap12be => [pa] [un] gbrap12 | a=1:1 +gbrap16be => [pa] [un] gbrap16 | a=1:1 +gbrapf32be => [pa] [un] gbrapf32 | a=1:1 +gbrp10be => [pa] [un] gbrp10 | a=1:1 +gbrp12be => [pa] [un] gbrp12 | a=1:1 +gbrp14be => [pa] [un] gbrp14 | a=1:1 +gbrp16be => [pa] [un] gbrp16 | a=1:1 +gbrp9be => [pa] [un] gbrp9 | a=1:1 +gbrpf32be => [pa] [un] gbrpf32 | a=1:1 +gray10be => [pa] [un] gray10 | a=1:1 +gray12be => [pa] [un] gray12 | a=1:1 +gray14be => [pa] [un] gray14 | a=1:1 +gray16be => [pa] [un] gray16 | a=1:1 +gray9be => [pa] [un] gray9 | a=1:1 +grayf32be => [pa] [un] grayf32 | a=1:1 +mediacodec => no +mmal => no +monob => [pa] [un] y1 | a=8:1 [tu] [tp] +monob => [pa] [un] gray | a=8:1 [expand-8bit] +monow => [pa] [un] y1 | a=8:1 [tu] [tp] +monow => [pa] [un] gray | a=8:1 [expand-8bit] +nv12 => [pa] [un] yuv420p | a=2:2 [tu] [tp] +nv16 => [pa] [un] yuv422p | a=2:1 +nv20 => [pa] [un] yuv422p10 | a=2:1 +nv20be => [pa] [un] yuv422p10 | a=2:1 +nv21 => [pa] [un] yuv420p | a=2:2 [tu] [tp] +nv24 => [pa] [un] yuv444p | a=1:1 +nv42 => [pa] [un] yuv444p | a=1:1 +opencl => no +p010 => [pa] [un] yuv420p16 | a=2:2 +p010be => [pa] [un] yuv420p16 | a=2:2 +p016 => [pa] [un] yuv420p16 | a=2:2 +p016be => [pa] [un] yuv420p16 | a=2:2 +pal8 => [un] gbrap | a=1:1 +qsv => no +rgb0 => [pa] [un] gbrp | a=1:1 [tu] [tp] +rgb24 => [pa] [un] gbrp | a=1:1 +rgb30 => [pa] [un] gbrp10 | a=1:1 +rgb4 => no +rgb444 => [pa] [un] gbrp4 | a=1:1 +rgb444 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb444be => [pa] [un] gbrp4 | a=1:1 +rgb444be => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb48 => [pa] [un] gbrp16 | a=1:1 +rgb48be => [pa] [un] gbrp16 | a=1:1 [tu] [tp] +rgb4_byte => [pa] [un] gbrp2 | a=1:1 +rgb4_byte => [pa] [un] gbrp1 | a=1:1 [round-down] +rgb4_byte => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb555 => [pa] [un] gbrp5 | a=1:1 +rgb555 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb555be => [pa] [un] gbrp5 | a=1:1 +rgb555be => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb565 => [pa] [un] gbrp6 | a=1:1 +rgb565 => [pa] [un] gbrp5 | a=1:1 [round-down] +rgb565 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb565be => [pa] [un] gbrp6 | a=1:1 +rgb565be => [pa] [un] gbrp5 | a=1:1 [round-down] +rgb565be => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgb8 => [pa] [un] gbrp3 | a=1:1 +rgb8 => [pa] [un] gbrp2 | a=1:1 [round-down] +rgb8 => [pa] [un] gbrp | a=1:1 [expand-8bit] +rgba => [pa] [un] gbrap | a=1:1 [tu] [tp] +rgba64 => [pa] [un] gbrap16 | a=1:1 [tu] [tp] +rgba64be => [pa] [un] gbrap16 | a=1:1 +uyvy422 => [pa] [un] yuv422p | a=2:1 +uyyvyy411 => no +vaapi => no +vaapi_idct => no +vaapi_moco => no +vdpau => no +vdpau_output => no +videotoolbox => no +vulkan => no +xvmc => no +xyz12 => [pa] [un] gbrp16 | a=1:1 +xyz12be => [pa] [un] gbrp16 | a=1:1 +y210 => [pa] [un] yuv422p16 | a=2:1 +y210be => [pa] [un] yuv422p16 | a=2:1 +ya16 => [pa] [un] yap16 | a=1:1 [tu] [tp] +ya16be => [pa] [un] yap16 | a=1:1 +ya8 => [pa] [un] yap8 | a=1:1 +yuv420p10be => [pa] [un] yuv420p10 | a=2:2 +yuv420p12be => [pa] [un] yuv420p12 | a=2:2 +yuv420p14be => [pa] [un] yuv420p14 | a=2:2 +yuv420p16be => [pa] [un] yuv420p16 | a=2:2 +yuv420p9be => [pa] [un] yuv420p9 | a=2:2 +yuv422p10be => [pa] [un] yuv422p10 | a=2:1 +yuv422p12be => [pa] [un] yuv422p12 | a=2:1 +yuv422p14be => [pa] [un] yuv422p14 | a=2:1 +yuv422p16be => [pa] [un] yuv422p16 | a=2:1 [tu] [tp] +yuv422p9be => [pa] [un] yuv422p9 | a=2:1 +yuv440p10be => [pa] [un] yuv440p10 | a=1:2 +yuv440p12be => [pa] [un] yuv440p12 | a=1:2 +yuv444p10be => [pa] [un] yuv444p10 | a=1:1 +yuv444p12be => [pa] [un] yuv444p12 | a=1:1 +yuv444p14be => [pa] [un] yuv444p14 | a=1:1 +yuv444p16be => [pa] [un] yuv444p16 | a=1:1 +yuv444p9be => [pa] [un] yuv444p9 | a=1:1 +yuva420p10be => [pa] [un] yuva420p10 | a=2:2 +yuva420p16be => [pa] [un] yuva420p16 | a=2:2 +yuva420p9be => [pa] [un] yuva420p9 | a=2:2 +yuva422p10be => [pa] [un] yuva422p10 | a=2:1 +yuva422p12be => [pa] [un] yuva422p12 | a=2:1 +yuva422p16be => [pa] [un] yuva422p16 | a=2:1 +yuva422p9be => [pa] [un] yuva422p9 | a=2:1 +yuva444p10be => [pa] [un] yuva444p10 | a=1:1 +yuva444p12be => [pa] [un] yuva444p12 | a=1:1 +yuva444p16be => [pa] [un] yuva444p16 | a=1:1 +yuva444p9be => [pa] [un] yuva444p9 | a=1:1 +yuyv422 => [pa] [un] yuv422p | a=2:1 +yvyu422 => [pa] [un] yuv422p | a=2:1 [tu] [tp] diff --git a/test/repack.c b/test/repack.c new file mode 100644 index 0000000000..ede6046350 --- /dev/null +++ b/test/repack.c @@ -0,0 +1,249 @@ +#include + +#include "common/common.h" +#include "tests.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "video/repack.h" +#include "video/zimg.h" + +// Excuse the utter stupidity. +#define UNFUCK(v) ((v) > 0 ? (v) : pixfmt2imgfmt(-(v))) +static_assert(IMGFMT_START > 0, ""); +#define IMGFMT_GBRP (-AV_PIX_FMT_GBRP) +#define IMGFMT_GBRAP (-AV_PIX_FMT_GBRAP) + +struct entry { + int w, h; + int fmt_a; + const void *const a[4]; + int fmt_b; + const void *const b[4]; + int flags; +}; + +#define P8(...) (const uint8_t[]){__VA_ARGS__} +#define P16(...) (const uint16_t[]){__VA_ARGS__} + +// Warning: only entries that match existing conversions are tested. +static const struct entry repack_tests[] = { + // Note: the '0' tests rely on 0 being written, although by definition the + // contents of this padding is undefined. The repacker always writes + // it this way, though. + {1, 1, IMGFMT_RGB0, {P8(1, 2, 3, 0)}, + IMGFMT_GBRP, {P8(2), P8(3), P8(1)}}, + {1, 1, IMGFMT_BGR0, {P8(1, 2, 3, 0)}, + IMGFMT_GBRP, {P8(2), P8(1), P8(3)}}, + {1, 1, IMGFMT_0RGB, {P8(0, 1, 2, 3)}, + IMGFMT_GBRP, {P8(2), P8(3), P8(1)}}, + {1, 1, IMGFMT_0BGR, {P8(0, 1, 2, 3)}, + IMGFMT_GBRP, {P8(2), P8(1), P8(3)}}, + {1, 1, IMGFMT_RGBA, {P8(1, 2, 3, 4)}, + IMGFMT_GBRAP, {P8(2), P8(3), P8(1), P8(4)}}, + {1, 1, IMGFMT_BGRA, {P8(1, 2, 3, 4)}, + IMGFMT_GBRAP, {P8(2), P8(1), P8(3), P8(4)}}, + {1, 1, IMGFMT_ARGB, {P8(4, 1, 2, 3)}, + IMGFMT_GBRAP, {P8(2), P8(3), P8(1), P8(4)}}, + {1, 1, IMGFMT_ABGR, {P8(4, 1, 2, 3)}, + IMGFMT_GBRAP, {P8(2), P8(1), P8(3), P8(4)}}, + {1, 1, IMGFMT_RGBA64, {P16(0x1a1b, 0x2a2b, 0x3a3b, 0x4a4b)}, + -AV_PIX_FMT_GBRAP16, {P16(0x2a2b), P16(0x3a3b), + P16(0x1a1b), P16(0x4a4b)}}, + {1, 1, -AV_PIX_FMT_RGB48BE, {P16(0x1a1b, 0x2a2b, 0x3a3b)}, + -AV_PIX_FMT_GBRP16, {P16(0x2b2a), P16(0x3b3a), + P16(0x1b1a)}}, + {8, 1, -AV_PIX_FMT_MONOWHITE, {P8(0xAA)}, + IMGFMT_Y1, {P8(0, 1, 0, 1, 0, 1, 0, 1)}}, + {8, 1, -AV_PIX_FMT_MONOBLACK, {P8(0xAA)}, + IMGFMT_Y1, {P8(1, 0, 1, 0, 1, 0, 1, 0)}}, + {2, 2, IMGFMT_NV12, {P8(1, 2, 3, 4), P8(5, 6)}, + IMGFMT_420P, {P8(1, 2, 3, 4), P8(5), P8(6)}}, + {2, 2, -AV_PIX_FMT_NV21, {P8(1, 2, 3, 4), P8(5, 6)}, + IMGFMT_420P, {P8(1, 2, 3, 4), P8(6), P8(5)}}, + {1, 1, -AV_PIX_FMT_AYUV64, {P16(1, 2, 3, 4)}, + -AV_PIX_FMT_YUVA444P16, {P16(2), P16(3), P16(4), P16(1)}}, + {1, 1, -AV_PIX_FMT_AYUV64BE, {P16(0x0100, 0x0200, 0x0300, 0x0400)}, + -AV_PIX_FMT_YUVA444P16, {P16(2), P16(3), P16(4), P16(1)}}, + {2, 1, -AV_PIX_FMT_YVYU422, {P8(1, 2, 3, 4)}, + -AV_PIX_FMT_YUV422P, {P8(1, 3), P8(4), P8(2)}}, + {1, 1, -AV_PIX_FMT_YA16, {P16(1, 2)}, + IMGFMT_YAP16, {P16(1), P16(2)}}, + {2, 1, -AV_PIX_FMT_YUV422P16BE, {P16(0x1a1b, 0x2a2b), P16(0x3a3b), + P16(0x4a4b)}, + -AV_PIX_FMT_YUV422P16, {P16(0x1b1a, 0x2b2a), P16(0x3b3a), + P16(0x4b4a)}}, +}; + +static bool is_true_planar(int imgfmt) +{ + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, imgfmt)) + return false; + + for (int n = 0; n < desc.num_planes; n++) { + if (desc.planes[n].num_components != 1) + return false; + } + + return true; +} + +static int try_repack(struct test_ctx *ctx, FILE *f, int imgfmt, int flags, + int not_if_fmt) +{ + char *head = mp_tprintf(80, "%-15s =>", mp_imgfmt_to_name(imgfmt)); + struct mp_repack *un = mp_repack_create_planar(imgfmt, false, flags); + struct mp_repack *pa = mp_repack_create_planar(imgfmt, true, flags); + + // If both exists, they must be always symmetric. + if (un && pa) { + assert(mp_repack_get_format_src(pa) == mp_repack_get_format_dst(un)); + assert(mp_repack_get_format_src(un) == mp_repack_get_format_dst(pa)); + assert(mp_repack_get_align_x(pa) == mp_repack_get_align_x(un)); + assert(mp_repack_get_align_y(pa) == mp_repack_get_align_y(un)); + } + + int a = 0; + int b = 0; + if (un) { + a = mp_repack_get_format_src(un); + b = mp_repack_get_format_dst(un); + } else if (pa) { + a = mp_repack_get_format_dst(pa); + b = mp_repack_get_format_src(pa); + } + + // Skip the identity ones because they're uninteresting, and add too much + // noise. But still make sure they behave as expected. + if (is_true_planar(imgfmt)) { + // (note that we require alpha-enabled zimg) + assert(mp_zimg_supports_in_format(imgfmt)); + assert(un && pa); + assert(a == imgfmt && b == imgfmt); + talloc_free(pa); + talloc_free(un); + return 0; + } + + struct mp_repack *rp = pa ? pa : un; + if (!rp) { + if (!flags) + fprintf(f, "%s no\n", head); + return 0; + } + + assert(a == imgfmt); + if (b && b == not_if_fmt) { + talloc_free(pa); + talloc_free(un); + return 0; + } + + fprintf(f, "%s %4s %4s %-15s |", head, pa ? "[pa]" : "", un ? "[un]" : "", + mp_imgfmt_to_name(b)); + + fprintf(f, " a=%d:%d", mp_repack_get_align_x(rp), mp_repack_get_align_y(rp)); + + if (flags & REPACK_CREATE_ROUND_DOWN) + fprintf(f, " [round-down]"); + if (flags & REPACK_CREATE_EXPAND_8BIT) + fprintf(f, " [expand-8bit]"); + + // LCM of alignment of all packers. + int ax = mp_repack_get_align_x(rp); + int ay = mp_repack_get_align_y(rp); + if (pa && un) { + ax = MPMAX(mp_repack_get_align_x(pa), mp_repack_get_align_x(un)); + ay = MPMAX(mp_repack_get_align_y(pa), mp_repack_get_align_y(un)); + } + + for (int n = 0; n < MP_ARRAY_SIZE(repack_tests); n++) { + const struct entry *e = &repack_tests[n]; + int fmt_a = UNFUCK(e->fmt_a); + int fmt_b = UNFUCK(e->fmt_b); + if (!(fmt_a == a && fmt_b == b && e->flags == flags)) + continue; + + // We convert a "random" macro pixel to catch potential addressing bugs + // that might be ignored with (0, 0) origins. + struct mp_image *ia = mp_image_alloc(fmt_a, e->w * 5 * ax, e->h * 5 * ay); + struct mp_image *ib = mp_image_alloc(fmt_b, e->w * 7 * ax, e->h * 6 * ay); + int sx = 4 * ax, sy = 3 * ay, dx = 3 * ax, dy = 2 * ay; + + assert(ia && ib); + + for (int pack = 0; pack < 2; pack++) { + struct mp_repack *repacker = pack ? pa : un; + if (!repacker) + continue; + + mp_image_clear(ia, 0, 0, ia->w, ia->h); + mp_image_clear(ib, 0, 0, ib->w, ib->h); + + const void *const *dstd = pack ? e->a : e->b; + const void *const *srcd = pack ? e->b : e->a; + struct mp_image *dsti = pack ? ia : ib; + struct mp_image *srci = pack ? ib : ia; + + bool r = repack_config_buffers(repacker, 0, dsti, 0, srci, NULL); + assert(r); + + for (int p = 0; p < srci->num_planes; p++) { + uint8_t *ptr = mp_image_pixel_ptr(srci, p, sx, sy); + for (int y = 0; y < e->h >> srci->fmt.ys[p]; y++) { + int w = e->w >> srci->fmt.xs[p]; + int wb = (w * srci->fmt.bpp[p] + 7) / 8; + const void *cptr = (uint8_t *)srcd[p] + wb * y; + memcpy(ptr + srci->stride[p] * y, cptr, wb); + } + } + + repack_line(repacker, dx, dy, sx, sy, e->w); + + for (int p = 0; p < dsti->num_planes; p++) { + uint8_t *ptr = mp_image_pixel_ptr(dsti, p, dx, dy); + for (int y = 0; y < e->h >> dsti->fmt.ys[p]; y++) { + int w = e->w >> dsti->fmt.xs[p]; + int wb = (w * dsti->fmt.bpp[p] + 7) / 8; + const void *cptr = (uint8_t *)dstd[p] + wb * y; + assert_memcmp(ptr + dsti->stride[p] * y, cptr, wb); + } + } + + fprintf(f, " [t%s]", pack ? "p" : "u"); + } + + talloc_free(ia); + talloc_free(ib); + } + + fprintf(f, "\n"); + + talloc_free(pa); + talloc_free(un); + return b; +} + +static void run(struct test_ctx *ctx) +{ + FILE *f = test_open_out(ctx, "repack.txt"); + + init_imgfmts_list(); + for (int n = 0; n < num_imgfmts; n++) { + int imgfmt = imgfmts[n]; + + int other = try_repack(ctx, f, imgfmt, 0, 0); + try_repack(ctx, f, imgfmt, REPACK_CREATE_ROUND_DOWN, other); + try_repack(ctx, f, imgfmt, REPACK_CREATE_EXPAND_8BIT, other); + } + + fclose(f); + + assert_text_files_equal(ctx, "repack.txt", "repack.txt", + "This can fail if FFmpeg/libswscale adds or removes pixfmts."); +} + +const struct unittest test_repack = { + .name = "repack", + .run = run, +}; diff --git a/test/tests.c b/test/tests.c index 9ef88f4a8d..d8df43f319 100644 --- a/test/tests.c +++ b/test/tests.c @@ -12,6 +12,7 @@ static const struct unittest *unittests[] = { &test_paths, &test_repack_sws, #if HAVE_ZIMG + &test_repack, // zimg only due to cross-checking with zimg.c &test_repack_zimg, #endif NULL @@ -128,3 +129,25 @@ void assert_text_files_equal_impl(const char *file, int line, abort(); } } + +static void hexdump(const uint8_t *d, size_t size) +{ + printf("|"); + while (size--) { + printf(" %02x", d[0]); + d++; + } + printf(" |\n"); +} + +void assert_memcmp_impl(const char *file, int line, + const void *a, const void *b, size_t size) +{ + if (memcmp(a, b, size) == 0) + return; + + printf("%s:%d: mismatching data:\n", file, line); + hexdump(a, size); + hexdump(b, size); + abort(); +} diff --git a/test/tests.h b/test/tests.h index f4065f596f..8b2eb98174 100644 --- a/test/tests.h +++ b/test/tests.h @@ -43,6 +43,7 @@ extern const struct unittest test_json; extern const struct unittest test_linked_list; extern const struct unittest test_repack_sws; extern const struct unittest test_repack_zimg; +extern const struct unittest test_repack; extern const struct unittest test_paths; #define assert_true(x) assert(x) @@ -54,6 +55,10 @@ extern const struct unittest test_paths; #define assert_float_equal(a, b, tolerance) \ assert_float_equal_impl(__FILE__, __LINE__, (a), (b), (tolerance)) +// Assert that memcmp(a,b,s)==0, or hexdump output on failure. +#define assert_memcmp(a, b, s) \ + assert_memcmp_impl(__FILE__, __LINE__, (a), (b), (s)) + // Require that the files "ref" and "new" are the same. The paths can be // relative to ref_path and out_path respectively. If they're not the same, // the output of "diff" is shown, the err message (if not NULL), and the test @@ -69,6 +74,8 @@ void assert_float_equal_impl(const char *file, int line, void assert_text_files_equal_impl(const char *file, int line, struct test_ctx *ctx, const char *ref, const char *new, const char *err); +void assert_memcmp_impl(const char *file, int line, + const void *a, const void *b, size_t size); // Open a new file in the out_path. Always succeeds. FILE *test_open_out(struct test_ctx *ctx, const char *name); diff --git a/video/img_format.h b/video/img_format.h index 8e55cc9493..b0fdef8a50 100644 --- a/video/img_format.h +++ b/video/img_format.h @@ -69,8 +69,9 @@ struct mp_imgfmt_desc { int flags; // MP_IMGFLAG_* bitfield int8_t num_planes; int8_t chroma_xs, chroma_ys; // chroma shift (i.e. log2 of chroma pixel size) - int8_t align_x, align_y; // pixel size to get byte alignment and to get + int8_t align_x, align_y; // pixel count to get byte alignment and to get // to a pixel pos where luma & chroma aligns + // always power of 2 int8_t bytes[MP_MAX_PLANES]; // bytes per pixel (MP_IMGFLAG_BYTE_ALIGNED) int8_t bpp[MP_MAX_PLANES]; // bits per pixel int8_t plane_bits; // number of bits in use for plane 0 diff --git a/video/repack.c b/video/repack.c new file mode 100644 index 0000000000..359e32996d --- /dev/null +++ b/video/repack.c @@ -0,0 +1,1110 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include + +#include +#include + +#include "common/common.h" +#include "repack.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "video/mp_image.h" + +enum repack_step_type { + REPACK_STEP_REPACK, + REPACK_STEP_ENDIAN, +}; + +struct repack_step { + enum repack_step_type type; + // 0=input, 1=output + struct mp_image *buf[2]; + bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer + struct mp_imgfmt_desc fmt[2]; + struct mp_image *tmp; // output buffer, if needed +}; + +struct mp_repack { + bool pack; // if false, this is for unpacking + int flags; + int imgfmt_user; // original mp format (unchanged endian) + int imgfmt_a; // original mp format (possibly packed format, + // swapped endian) + int imgfmt_b; // equivalent unpacked/planar format + struct mp_imgfmt_desc fmt_a;// ==imgfmt_a + struct mp_imgfmt_desc fmt_b;// ==imgfmt_b + + void (*repack)(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w); + + bool passthrough_y; // possible luma plane optimization for e.g. nv12 + int endian_size; // endian swap; 0=none, 2/4=swap word size + + // For packed_repack. + int components[4]; // b[n] = mp_image.planes[components[n]] + // pack: a is dst, b is src + // unpack: a is src, b is dst + void (*packed_repack_scanline)(void *a, void *b[], int w); + + // Fringe RGB/YUV. + uint8_t comp_size; + uint8_t *comp_map; + uint8_t comp_shifts[3]; + uint8_t *comp_lut; + + // REPACK_STEP_REPACK: if true, need to copy this plane + bool copy_buf[4]; + + struct repack_step steps[4]; + int num_steps; + + bool configured; +}; + +// depth = number of LSB in use +static int find_gbrp_format(int depth, int num_planes) +{ + if (num_planes != 3 && num_planes != 4) + return 0; + struct mp_regular_imgfmt desc = { + .component_type = MP_COMPONENT_TYPE_UINT, + .forced_csp = MP_CSP_RGB, + .component_size = depth > 8 ? 2 : 1, + .component_pad = depth - (depth > 8 ? 16 : 8), + .num_planes = num_planes, + .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} }, + }; + return mp_find_regular_imgfmt(&desc); +} + +// depth = number of LSB in use +static int find_yuv_format(int depth, int num_planes) +{ + if (num_planes < 1 || num_planes > 4) + return 0; + struct mp_regular_imgfmt desc = { + .component_type = MP_COMPONENT_TYPE_UINT, + .component_size = depth > 8 ? 2 : 1, + .component_pad = depth - (depth > 8 ? 16 : 8), + .num_planes = num_planes, + .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} }, + }; + if (num_planes == 2) + desc.planes[1].components[0] = 4; + return mp_find_regular_imgfmt(&desc); +} + +// Copy one line on the plane p. +static void copy_plane(struct mp_image *dst, int dst_x, int dst_y, + struct mp_image *src, int src_x, int src_y, + int w, int p) +{ + // Number of lines on this plane. + int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1; + size_t size = mp_image_plane_bytes(dst, p, dst_x, w); + + assert(dst->fmt.bpp[p] == src->fmt.bpp[p]); + + for (int y = 0; y < h; y++) { + void *pd = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y); + void *ps = mp_image_pixel_ptr(src, p, src_x, src_y + y); + memcpy(pd, ps, size); + } +} + +// Swap endian for one line. +static void swap_endian(struct mp_image *dst, int dst_x, int dst_y, + struct mp_image *src, int src_x, int src_y, + int w, int endian_size) +{ + assert(src->fmt.num_planes == dst->fmt.num_planes); + + for (int p = 0; p < dst->fmt.num_planes; p++) { + int xs = dst->fmt.xs[p]; + int bpp = dst->fmt.bytes[p]; + int words_per_pixel = bpp / endian_size; + int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel; + // Number of lines on this plane. + int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1; + + assert(src->fmt.bytes[p] == bpp); + + for (int y = 0; y < h; y++) { + void *s = mp_image_pixel_ptr(src, p, src_x, src_y + y); + void *d = mp_image_pixel_ptr(dst, p, dst_x, dst_y + y); + switch (endian_size) { + case 2: + for (int x = 0; x < num_words; x++) + ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]); + break; + case 4: + for (int x = 0; x < num_words; x++) + ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]); + break; + default: + assert(0); + } + } + } +} + +// PA = PAck, copy planar input to single packed array +// UN = UNpack, copy packed input to planar output +// Naming convention: +// pa_/un_ prefix to identify conversion direction. +// Left (LSB, lowest byte address) -> Right (MSB, highest byte address). +// (This is unusual; MSB to LSB is more commonly used to describe formats, +// but our convention makes more sense for byte access in little endian.) +// "c" identifies a color component. +// "z" identifies known zero padding. +// "x" identifies uninitialized padding. +// A component is followed by its size in bits. +// Size can be omitted for multiple uniform components (c8c8c8 == ccc8). +// Unpackers will often use "x" for padding, because they ignore it, while +// packers will use "z" because they write zero. + +#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ + ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \ + ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \ + } \ + } + +#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ + ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \ + } \ + } + + +#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = (pad) | \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ + ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \ + } \ + } + +UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu) +PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24) +// Not sure if this is a good idea; there may be no alignment guarantee. +UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu) +PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48) + +#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ + } \ + } + +UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu) +PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0) +UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu) +PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0) +UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu) +PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0) + +#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = (pad) | \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \ + } \ + } + +#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + } \ + } + +UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu) +PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0) +UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu) +PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0) + +#define PA_SEQ_3(name, comp_t) \ + static void name(void *dst, void *src[], int w) { \ + comp_t *r = dst; \ + for (int x = 0; x < w; x++) { \ + *r++ = ((comp_t *)src[0])[x]; \ + *r++ = ((comp_t *)src[1])[x]; \ + *r++ = ((comp_t *)src[2])[x]; \ + } \ + } + +#define UN_SEQ_3(name, comp_t) \ + static void name(void *src, void *dst[], int w) { \ + comp_t *r = src; \ + for (int x = 0; x < w; x++) { \ + ((comp_t *)dst[0])[x] = *r++; \ + ((comp_t *)dst[1])[x] = *r++; \ + ((comp_t *)dst[2])[x] = *r++; \ + } \ + } + +UN_SEQ_3(un_ccc8, uint8_t) +PA_SEQ_3(pa_ccc8, uint8_t) +UN_SEQ_3(un_ccc16, uint16_t) +PA_SEQ_3(pa_ccc16, uint16_t) + +// "regular": single packed plane, all components have same width (except padding) +struct regular_repacker { + int packed_width; // number of bits of the packed pixel + int component_width; // number of bits for a single component + int prepadding; // number of bits of LSB padding + int num_components; // number of components that can be accessed + void (*pa_scanline)(void *a, void *b[], int w); + void (*un_scanline)(void *a, void *b[], int w); +}; + +static const struct regular_repacker regular_repackers[] = { + {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8}, + {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8}, + {32, 8, 0, 4, pa_cccc8, un_cccc8}, + {64, 16, 0, 4, pa_cccc16, un_cccc16}, + {24, 8, 0, 3, pa_ccc8, un_ccc8}, + {48, 16, 0, 3, pa_ccc16, un_ccc16}, + {16, 8, 0, 2, pa_cc8, un_cc8}, + {32, 16, 0, 2, pa_cc16, un_cc16}, + {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2}, +}; + +static void packed_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + rp->packed_repack_scanline(pa, pb, w); +} + +// Tries to set a packer/unpacker for component-wise byte aligned formats. +static void setup_packed_packer(struct mp_repack *rp) +{ + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a)) + return; + + if (desc.num_planes != 1 || desc.planes[0].num_components < 2) + return; + struct mp_regular_imgfmt_plane *p = &desc.planes[0]; + + int num_real_components = 0; + bool has_alpha = false; + for (int n = 0; n < p->num_components; n++) { + if (p->components[n]) { + has_alpha |= p->components[n] == 4; + num_real_components += 1; + } else { + // padding must be in MSB or LSB + if (n != 0 && n != p->num_components - 1) + return; + } + } + + int depth = desc.component_size * 8 + MPMIN(0, desc.component_pad); + + static const int reorder_gbrp[] = {0, 3, 1, 2, 4}; + static const int reorder_yuv[] = {0, 1, 2, 3, 4}; + int planar_fmt = 0; + const int *reorder = NULL; + if (desc.forced_csp) { + if (desc.forced_csp != MP_CSP_RGB && desc.forced_csp != MP_CSP_XYZ) + return; + planar_fmt = find_gbrp_format(depth, num_real_components); + reorder = reorder_gbrp; + } else { + planar_fmt = find_yuv_format(depth, num_real_components); + reorder = reorder_yuv; + } + if (!planar_fmt) + return; + + for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { + const struct regular_repacker *pa = ®ular_repackers[i]; + + // The following may assume little endian (because some repack backends + // use word access, while the metadata here uses byte access). + + int prepad = p->components[0] ? 0 : 8; + int first_comp = p->components[0] ? 0 : 1; + void (*repack_cb)(void *pa, void *pb[], int w) = + rp->pack ? pa->pa_scanline : pa->un_scanline; + + if (pa->packed_width != desc.component_size * p->num_components * 8 || + pa->component_width != depth || + pa->num_components != num_real_components || + pa->prepadding != prepad || + !repack_cb) + continue; + + rp->repack = packed_repack; + rp->packed_repack_scanline = repack_cb; + rp->imgfmt_b = planar_fmt; + for (int n = 0; n < num_real_components; n++) { + // Determine permutation that maps component order between the two + // formats, with has_alpha special case (see above). + int c = reorder[p->components[first_comp + n]]; + rp->components[n] = c == 4 ? num_real_components - 1 : c - 1; + } + return; + } +} + +struct fringe_rgb_repacker { + // To avoid making a mess of IMGFMT_*, we use av formats directly. + enum AVPixelFormat avfmt; + // If true, use BGR instead of RGB. + // False: LSB - R - G - B - pad - MSB + // True: LSB - B - G - R - pad - MSB + bool rev_order; + // Size in bit for each component, strictly from LSB to MSB. + int bits[3]; + bool be; +}; + +static const struct fringe_rgb_repacker fringe_rgb_repackers[] = { + {AV_PIX_FMT_BGR4_BYTE, false, {1, 2, 1}}, + {AV_PIX_FMT_RGB4_BYTE, true, {1, 2, 1}}, + {AV_PIX_FMT_BGR8, false, {3, 3, 2}}, + {AV_PIX_FMT_RGB8, true, {2, 3, 3}}, // pixdesc desc. and doc. bug? + {AV_PIX_FMT_RGB444LE, true, {4, 4, 4}}, + {AV_PIX_FMT_RGB444BE, true, {4, 4, 4}, .be = true}, + {AV_PIX_FMT_BGR444LE, false, {4, 4, 4}}, + {AV_PIX_FMT_BGR444BE, false, {4, 4, 4}, .be = true}, + {AV_PIX_FMT_BGR565LE, false, {5, 6, 5}}, + {AV_PIX_FMT_BGR565BE, false, {5, 6, 5}, .be = true}, + {AV_PIX_FMT_RGB565LE, true, {5, 6, 5}}, + {AV_PIX_FMT_RGB565BE, true, {5, 6, 5}, .be = true}, + {AV_PIX_FMT_BGR555LE, false, {5, 5, 5}}, + {AV_PIX_FMT_BGR555BE, false, {5, 5, 5}, .be = true}, + {AV_PIX_FMT_RGB555LE, true, {5, 5, 5}}, + {AV_PIX_FMT_RGB555BE, true, {5, 5, 5}, .be = true}, +}; + +#define PA_SHIFT_LUT8(name, packed_t) \ + static void name(void *dst, void *src[], int w, uint8_t *lut, \ + uint8_t s0, uint8_t s1, uint8_t s2) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = \ + (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) | \ + (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) | \ + (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2); \ + } \ + } + + +#define UN_SHIFT_LUT8(name, packed_t) \ + static void name(void *src, void *dst[], int w, uint8_t *lut, \ + uint8_t s0, uint8_t s1, uint8_t s2) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0]; \ + ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1]; \ + ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2]; \ + } \ + } + +PA_SHIFT_LUT8(pa_shift_lut8_8, uint8_t) +PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t) +UN_SHIFT_LUT8(un_shift_lut8_8, uint8_t) +UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t) + +static void fringe_rgb_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + assert(rp->comp_size == 1 || rp->comp_size == 2); + + void (*repack)(void *pa, void *pb[], int w, uint8_t *lut, + uint8_t s0, uint8_t s1, uint8_t s2) = NULL; + if (rp->pack) { + repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16; + } else { + repack = rp->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16; + } + repack(pa, pb, w, rp->comp_lut, + rp->comp_shifts[0], rp->comp_shifts[1], rp->comp_shifts[2]); +} + +static void setup_fringe_rgb_packer(struct mp_repack *rp) +{ + enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a); + + const struct fringe_rgb_repacker *fmt = NULL; + for (int n = 0; n < MP_ARRAY_SIZE(fringe_rgb_repackers); n++) { + if (fringe_rgb_repackers[n].avfmt == avfmt) { + fmt = &fringe_rgb_repackers[n]; + break; + } + } + + if (!fmt) + return; + + int depth = fmt->bits[0]; + for (int n = 0; n < 3; n++) { + if (rp->flags & REPACK_CREATE_ROUND_DOWN) { + depth = MPMIN(depth, fmt->bits[n]); + } else { + depth = MPMAX(depth, fmt->bits[n]); + } + } + if (rp->flags & REPACK_CREATE_EXPAND_8BIT) + depth = 8; + + rp->imgfmt_b = find_gbrp_format(depth, 3); + if (!rp->imgfmt_b) + return; + rp->comp_lut = talloc_array(rp, uint8_t, 256 * 3); + rp->repack = fringe_rgb_repack; + static const int c_order_rgb[] = {3, 1, 2}; + static const int c_order_bgr[] = {2, 1, 3}; + for (int n = 0; n < 3; n++) + rp->components[n] = (fmt->rev_order ? c_order_bgr : c_order_rgb)[n] - 1; + + int bitpos = 0; + for (int n = 0; n < 3; n++) { + int bits = fmt->bits[n]; + rp->comp_shifts[n] = bitpos; + if (rp->comp_lut) { + uint8_t *lut = rp->comp_lut + 256 * n; + uint8_t zmax = (1 << depth) - 1; + uint8_t cmax = (1 << bits) - 1; + for (int v = 0; v < 256; v++) { + if (rp->pack) { + lut[v] = (v * cmax + zmax / 2) / zmax; + } else { + lut[v] = (v & cmax) * zmax / cmax; + } + } + } + bitpos += bits; + } + + rp->comp_size = (bitpos + 7) / 8; + assert(rp->comp_size == 1 || rp->comp_size == 2); + + if (fmt->be) { + assert(rp->comp_size == 2); + rp->endian_size = 2; + } +} + +static void unpack_pal(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y); + uint32_t *pal = (void *)a->planes[1]; + + uint8_t *dst[4] = {0}; + for (int p = 0; p < b->num_planes; p++) + dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y); + + for (int x = 0; x < w; x++) { + uint32_t c = pal[src[x]]; + dst[0][x] = (c >> 8) & 0xFF; // G + dst[1][x] = (c >> 0) & 0xFF; // B + dst[2][x] = (c >> 16) & 0xFF; // R + dst[3][x] = (c >> 24) & 0xFF; // A + } +} + +static void bitmap_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y); + + if (rp->pack) { + for (unsigned x = 0; x < w; x += 8) { + uint8_t d = 0; + int max_b = MPMIN(8, w - x); + for (int bp = 0; bp < max_b; bp++) + d |= (rp->comp_lut[pb[x + bp]]) << (7 - bp); + pa[x / 8] = d; + } + } else { + for (unsigned x = 0; x < w; x += 8) { + uint8_t d = pa[x / 8]; + int max_b = MPMIN(8, w - x); + for (int bp = 0; bp < max_b; bp++) + pb[x + bp] = rp->comp_lut[d & (1 << (7 - bp))]; + } + } +} + +static void setup_misc_packer(struct mp_repack *rp) +{ + // Although it's in regular_repackers[], the generic mpv imgfmt metadata + // can't handle it yet. + if (rp->imgfmt_a == IMGFMT_RGB30) { + int planar_fmt = find_gbrp_format(10, 3); + if (!planar_fmt) + return; + rp->imgfmt_b = planar_fmt; + rp->repack = packed_repack; + rp->packed_repack_scanline = rp->pack ? pa_ccc10z2 : un_ccc10x2; + static int c_order[] = {3, 2, 1}; + for (int n = 0; n < 3; n++) + rp->components[n] = c_order[n] - 1; + } else if (rp->imgfmt_a == IMGFMT_PAL8 && !rp->pack) { + int grap_fmt = find_gbrp_format(8, 4); + if (!grap_fmt) + return; + rp->imgfmt_b = grap_fmt; + rp->repack = unpack_pal; + } else { + enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a); + if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) { + rp->comp_lut = talloc_array(rp, uint8_t, 256); + rp->imgfmt_b = IMGFMT_Y1; + int max = 1; + if (rp->flags & REPACK_CREATE_EXPAND_8BIT) { + rp->imgfmt_b = IMGFMT_Y8; + max = 255; + } + bool inv = avfmt == AV_PIX_FMT_MONOWHITE; + for (int n = 0; n < 256; n++) { + rp->comp_lut[n] = rp->pack ? (inv ^ (n >= (max + 1) / 2)) + : ((inv ^ !!n) ? max : 0); + } + rp->repack = bitmap_repack; + return; + } + } +} + +struct fringe_yuv422_repacker { + // To avoid making a mess of IMGFMT_*, we use av formats directly. + enum AVPixelFormat avfmt; + // In bits (depth/8 rounded up gives byte size) + int8_t depth; + // Word index of each sample: {y0, y1, cb, cr} + uint8_t comp[4]; + bool be; +}; + +static const struct fringe_yuv422_repacker fringe_yuv422_repackers[] = { + {AV_PIX_FMT_YUYV422, 8, {0, 2, 1, 3}}, + {AV_PIX_FMT_UYVY422, 8, {1, 3, 0, 2}}, + {AV_PIX_FMT_YVYU422, 8, {0, 2, 3, 1}}, +#ifdef AV_PIX_FMT_Y210 + {AV_PIX_FMT_Y210LE, 10, {0, 2, 1, 3}}, + {AV_PIX_FMT_Y210BE, 10, {0, 2, 1, 3}, .be = true}, +#endif +}; + +#define PA_P422(name, comp_t) \ + static void name(void *dst, void *src[], int w, uint8_t *c) { \ + for (int x = 0; x < w; x += 2) { \ + ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0]; \ + ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1]; \ + ((comp_t *)dst)[x * 2 + c[2]] = ((comp_t *)src[1])[x >> 1]; \ + ((comp_t *)dst)[x * 2 + c[3]] = ((comp_t *)src[2])[x >> 1]; \ + } \ + } + + +#define UN_P422(name, comp_t) \ + static void name(void *src, void *dst[], int w, uint8_t *c) { \ + for (int x = 0; x < w; x += 2) { \ + ((comp_t *)dst[0])[x + 0] = ((comp_t *)src)[x * 2 + c[0]]; \ + ((comp_t *)dst[0])[x + 1] = ((comp_t *)src)[x * 2 + c[1]]; \ + ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[2]]; \ + ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[3]]; \ + } \ + } + +PA_P422(pa_p422_8, uint8_t) +PA_P422(pa_p422_16, uint16_t) +UN_P422(un_p422_8, uint8_t) +UN_P422(un_p422_16, uint16_t) + +static void fringe_yuv422_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) + pb[p] = mp_image_pixel_ptr(b, p, b_x, b_y); + + assert(rp->comp_size == 1 || rp->comp_size == 2); + + void (*repack)(void *a, void *b[], int w, uint8_t *c) = NULL; + if (rp->pack) { + repack = rp->comp_size == 1 ? pa_p422_8 : pa_p422_16; + } else { + repack = rp->comp_size == 1 ? un_p422_8 : un_p422_16; + } + repack(pa, pb, w, rp->comp_map); +} + +static void setup_fringe_yuv422_packer(struct mp_repack *rp) +{ + enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a); + + const struct fringe_yuv422_repacker *fmt = NULL; + for (int n = 0; n < MP_ARRAY_SIZE(fringe_yuv422_repackers); n++) { + if (fringe_yuv422_repackers[n].avfmt == avfmt) { + fmt = &fringe_yuv422_repackers[n]; + break; + } + } + + if (!fmt) + return; + + rp->comp_size = (fmt->depth + 7) / 8; + assert(rp->comp_size == 1 || rp->comp_size == 2); + + struct mp_regular_imgfmt yuvfmt = { + .component_type = MP_COMPONENT_TYPE_UINT, + // NB: same problem with P010 and not clearing padding. + .component_size = rp->comp_size, + .num_planes = 3, + .planes = { {1, {1}}, {1, {2}}, {1, {3}} }, + .chroma_xs = 1, + .chroma_ys = 0, + }; + rp->imgfmt_b = mp_find_regular_imgfmt(&yuvfmt); + rp->repack = fringe_yuv422_repack; + rp->comp_map = (uint8_t *)fmt->comp; + + if (fmt->be) { + assert(rp->comp_size == 2); + rp->endian_size = 2; + } +} + +static void repack_nv(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + int xs = a->fmt.chroma_xs; + + uint32_t *pa = mp_image_pixel_ptr(a, 1, a_x, a_y); + + void *pb[2]; + for (int p = 0; p < 2; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + rp->packed_repack_scanline(pa, pb, (w + (1 << xs) - 1) >> xs); +} + +static void setup_nv_packer(struct mp_repack *rp) +{ + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a)) + return; + + // Check for NV. + if (desc.num_planes != 2) + return; + if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1) + return; + if (desc.planes[1].num_components != 2) + return; + int cr0 = desc.planes[1].components[0]; + int cr1 = desc.planes[1].components[1]; + if (cr0 > cr1) + MPSWAP(int, cr0, cr1); + if (cr0 != 2 || cr1 != 3) + return; + + // Construct equivalent planar format. + struct mp_regular_imgfmt desc2 = desc; + desc2.num_planes = 3; + desc2.planes[1].num_components = 1; + desc2.planes[1].components[0] = 2; + desc2.planes[2].num_components = 1; + desc2.planes[2].components[0] = 3; + // For P010. Strangely this concept exists only for the NV format. + if (desc2.component_pad > 0) + desc2.component_pad = 0; + + int planar_fmt = mp_find_regular_imgfmt(&desc2); + if (!planar_fmt) + return; + + for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { + const struct regular_repacker *pa = ®ular_repackers[i]; + + void (*repack_cb)(void *pa, void *pb[], int w) = + rp->pack ? pa->pa_scanline : pa->un_scanline; + + if (pa->packed_width != desc.component_size * 2 * 8 || + pa->component_width != desc.component_size * 8 || + pa->num_components != 2 || + pa->prepadding != 0 || + !repack_cb) + continue; + + rp->repack = repack_nv; + rp->passthrough_y = true; + rp->packed_repack_scanline = repack_cb; + rp->imgfmt_b = planar_fmt; + rp->components[0] = desc.planes[1].components[0] - 1; + rp->components[1] = desc.planes[1].components[1] - 1; + return; + } +} + +void repack_line(struct mp_repack *rp, int dst_x, int dst_y, + int src_x, int src_y, int w) +{ + assert(rp->configured); + + struct repack_step *first = &rp->steps[0]; + struct repack_step *last = &rp->steps[rp->num_steps - 1]; + + assert(dst_x >= 0 && dst_y >= 0 && src_x >= 0 && src_y >= 0 && w >= 0); + assert(dst_x + w <= MP_ALIGN_UP(last->buf[1]->w, last->fmt[1].align_x)); + assert(src_x + w <= MP_ALIGN_UP(first->buf[1]->w, first->fmt[0].align_x)); + assert(dst_y < last->buf[1]->h); + assert(src_y < first->buf[0]->h); + assert(!(dst_x & (last->fmt[1].align_x - 1))); + assert(!(src_x & (first->fmt[0].align_x - 1))); + assert(!(w & ((1 << first->fmt[0].chroma_xs) - 1))); + assert(!(dst_y & (last->fmt[1].align_y - 1))); + assert(!(src_y & (first->fmt[0].align_y - 1))); + + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + + // When writing to temporary buffers, always write to the start (maybe + // helps with locality). + int sx = rs->user_buf[0] ? src_x : 0; + int sy = rs->user_buf[0] ? src_y : 0; + int dx = rs->user_buf[1] ? dst_x : 0; + int dy = rs->user_buf[1] ? dst_y : 0; + + struct mp_image *buf_a = rs->buf[rp->pack]; + struct mp_image *buf_b = rs->buf[!rp->pack]; + int a_x = rp->pack ? dx : sx; + int a_y = rp->pack ? dy : sy; + int b_x = rp->pack ? sx : dx; + int b_y = rp->pack ? sy : dy; + + switch (rs->type) { + case REPACK_STEP_REPACK: { + if (rp->repack) + rp->repack(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w); + + for (int p = 0; p < rs->fmt[0].num_planes; p++) { + if (rp->copy_buf[p]) + copy_plane(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, p); + } + break; + } + case REPACK_STEP_ENDIAN: + swap_endian(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, + rp->endian_size); + break; + } + } +} + +static bool setup_format_ne(struct mp_repack *rp) +{ + if (!rp->imgfmt_b) + setup_nv_packer(rp); + if (!rp->imgfmt_b) + setup_misc_packer(rp); + if (!rp->imgfmt_b) + setup_packed_packer(rp); + if (!rp->imgfmt_b) + setup_fringe_rgb_packer(rp); + if (!rp->imgfmt_b) + setup_fringe_yuv422_packer(rp); + if (!rp->imgfmt_b) + rp->imgfmt_b = rp->imgfmt_a; // maybe it was planar after all + + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_b)) + return false; + + // no weird stuff + if (desc.num_planes > 4) + return false; + + // Endian swapping. + if (rp->imgfmt_a != rp->imgfmt_user) { + struct mp_regular_imgfmt ndesc; + if (!mp_get_regular_imgfmt(&ndesc, rp->imgfmt_a) || ndesc.num_planes > 4) + return false; + rp->endian_size = ndesc.component_size; + if (rp->endian_size != 2 && rp->endian_size != 4) + return false; + } + + // Accept only true planar formats (with known components and no padding). + for (int n = 0; n < desc.num_planes; n++) { + if (desc.planes[n].num_components != 1) + return false; + int c = desc.planes[n].components[0]; + if (c < 1 || c > 4) + return false; + } + + rp->fmt_a = mp_imgfmt_get_desc(rp->imgfmt_a); + rp->fmt_b = mp_imgfmt_get_desc(rp->imgfmt_b); + + // This is if we did a pack step. + + rp->steps[rp->num_steps++] = (struct repack_step) { + .type = REPACK_STEP_REPACK, + .fmt = { rp->fmt_b, rp->fmt_a }, + }; + + if (rp->endian_size) { + rp->steps[rp->num_steps++] = (struct repack_step) { + .type = REPACK_STEP_ENDIAN, + .fmt = { + rp->fmt_a, + mp_imgfmt_get_desc(rp->imgfmt_user), + }, + }; + } + + // Reverse if unpack (to reflect actual data flow) + if (!rp->pack) { + for (int n = 0; n < rp->num_steps / 2; n++) { + MPSWAP(struct repack_step, rp->steps[n], + rp->steps[rp->num_steps - 1 - n]); + } + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + MPSWAP(struct mp_imgfmt_desc, rs->fmt[0], rs->fmt[1]); + } + } + + for (int n = 0; n < rp->num_steps - 1; n++) + assert(rp->steps[n].fmt[1].id == rp->steps[n + 1].fmt[0].id); + + return true; +} + +static void reset_params(struct mp_repack *rp) +{ + rp->num_steps = 0; + rp->imgfmt_b = 0; + rp->repack = NULL; + rp->passthrough_y = false; + rp->endian_size = 0; + rp->packed_repack_scanline = NULL; + rp->comp_size = 0; + rp->comp_map = NULL; + talloc_free(rp->comp_lut); + rp->comp_lut = NULL; +} + +static bool setup_format(struct mp_repack *rp) +{ + reset_params(rp); + rp->imgfmt_a = rp->imgfmt_user; + if (setup_format_ne(rp)) + return true; + // Try reverse endian. + reset_params(rp); + rp->imgfmt_a = mp_find_other_endian(rp->imgfmt_user); + return rp->imgfmt_a && setup_format_ne(rp); +} + +struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags) +{ + struct mp_repack *rp = talloc_zero(NULL, struct mp_repack); + rp->imgfmt_user = imgfmt; + rp->pack = pack; + rp->flags = flags; + + if (!setup_format(rp)) { + talloc_free(rp); + return NULL; + } + + return rp; +} + +int mp_repack_get_format_src(struct mp_repack *rp) +{ + return rp->steps[0].fmt[0].id; +} + +int mp_repack_get_format_dst(struct mp_repack *rp) +{ + return rp->steps[rp->num_steps - 1].fmt[1].id; +} + +int mp_repack_get_align_x(struct mp_repack *rp) +{ + // We really want the LCM between those, but since only one of them is + // packed (or they're the same format), and the chroma subsampling is the + // same for both, only the packed one matters. + return rp->fmt_a.align_x; +} + +int mp_repack_get_align_y(struct mp_repack *rp) +{ + return rp->fmt_a.align_y; // should be the same for packed/planar formats +} + +static void image_realloc(struct mp_image **img, int fmt, int w, int h) +{ + if (*img && (*img)->imgfmt == fmt && (*img)->w == w && (*img)->h == h) + return; + talloc_free(*img); + *img = mp_image_alloc(fmt, w, h); +} + +bool repack_config_buffers(struct mp_repack *rp, + int dst_flags, struct mp_image *dst, + int src_flags, struct mp_image *src, + bool *enable_passthrough) +{ + struct repack_step *rs_first = &rp->steps[0]; + struct repack_step *rs_last = &rp->steps[rp->num_steps - 1]; + + rp->configured = false; + + assert(dst && src); + + int buf_w = MPMAX(dst->w, src->w); + + assert(dst->imgfmt == rs_last->fmt[1].id); + assert(src->imgfmt == rs_first->fmt[0].id); + + // Chain/allocate buffers. + + for (int n = 0; n < rp->num_steps; n++) + rp->steps[n].buf[0] = rp->steps[n].buf[1] = NULL; + + rs_first->buf[0] = src; + rs_last->buf[1] = dst; + + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + + if (!rs->buf[0]) { + assert(n > 0); + rs->buf[0] = rp->steps[n - 1].buf[1]; + } + + if (rs->buf[1]) + continue; + + // Note: since repack_line() can have different src/dst offsets, we + // can't do true in-place in general. + bool can_inplace = rs->type == REPACK_STEP_ENDIAN && + rs->buf[0] != src && rs->buf[0] != dst; + if (can_inplace) { + rs->buf[1] = rs->buf[0]; + continue; + } + + if (rs != rs_last) { + struct repack_step *next = &rp->steps[n + 1]; + if (next->buf[0]) { + rs->buf[1] = next->buf[0]; + continue; + } + } + + image_realloc(&rs->tmp, rs->fmt[1].id, buf_w, rs->fmt[1].align_y); + if (!rs->tmp) + return false; + talloc_steal(rp, rs->tmp); + rs->buf[1] = rs->tmp; + } + + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + rs->user_buf[0] = rs->buf[0] == src || rs->buf[0] == dst; + rs->user_buf[1] = rs->buf[1] == src || rs->buf[1] == dst; + } + + // If repacking is the only operation. It's also responsible for simply + // copying src to dst if absolutely no filtering is done. + bool may_passthrough = + rp->num_steps == 1 && rp->steps[0].type == REPACK_STEP_REPACK; + + for (int p = 0; p < rp->fmt_b.num_planes; p++) { + // (All repack callbacks copy, except nv12 does not copy luma.) + bool repack_copies_plane = rp->repack && !(rp->passthrough_y && p == 0); + + bool can_pt = may_passthrough && !repack_copies_plane && + enable_passthrough && enable_passthrough[p]; + + // Copy if needed, unless the repack callback does it anyway. + rp->copy_buf[p] = !repack_copies_plane && !can_pt; + + if (enable_passthrough) + enable_passthrough[p] = can_pt && !rp->copy_buf[p]; + } + + if (enable_passthrough) { + for (int n = rp->fmt_b.num_planes; n < MP_MAX_PLANES; n++) + enable_passthrough[n] = false; + } + + rp->configured = true; + + return true; +} diff --git a/video/repack.h b/video/repack.h new file mode 100644 index 0000000000..fa81ca9df2 --- /dev/null +++ b/video/repack.h @@ -0,0 +1,72 @@ +#pragma once + +#include + +enum { + // This controls bheavior with different bit widths per component (like + // RGB565). If ROUND_DOWN is specified, the planar format will use the min. + // bit width of all components, otherwise the transformation is lossless. + REPACK_CREATE_ROUND_DOWN = (1 << 0), + + // Expand some (not all) low bit depth fringe formats to 8 bit on unpack. + REPACK_CREATE_EXPAND_8BIT = (1 << 1), +}; + +struct mp_repack; +struct mp_image; + +// Create a repacker between any format (imgfmt parameter) and an equivalent +// planar format (that is native endian). If pack==true, imgfmt is the output, +// otherwise it is the input. The respective other input/output is the planar +// format. The planar format can be queried with mp_repack_get_format_*(). +// Note that some formats may change the "implied" colorspace (for example, +// packed xyz unpacks as rgb). +// If imgfmt is already planar, a passthrough repacker may be created. +// imgfmt: src or dst format (usually packed, non-planar, etc.) +// pack: true if imgfmt is dst, false if imgfmt is src +// flags: any of REPACK_CREATE_* flags +// returns: NULL on failure, otherwise free with talloc_free(). +struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags); + +// Return input and output formats for which rp was created. +int mp_repack_get_format_src(struct mp_repack *rp); +int mp_repack_get_format_dst(struct mp_repack *rp); + +// Return pixel alignment. For x, this is a lowest pixel count at which there is +// a byte boundary and a full chroma pixel (horizontal subsampling) on src/dst. +// For y, this is the pixel height of the vertical subsampling. +// Always returns a power of 2. +int mp_repack_get_align_x(struct mp_repack *rp); +int mp_repack_get_align_y(struct mp_repack *rp); + +// Repack a single line from dst to src, as set in repack_config_buffers(). +// For subsampled chroma formats, this copies as many luma/alpha rows as needed +// for a complete line (e.g. 2 luma lines, 1 chroma line for 4:2:0). +// dst_x, src_x, y must be aligned to the pixel alignment. w may be unaligned +// if at the right crop-border of the image, but must be always aligned to +// horiz. sub-sampling. y is subject to hslice. +void repack_line(struct mp_repack *rp, int dst_x, int dst_y, + int src_x, int src_y, int w); + +// Configure with a source and target buffer. The rp instance will keep the +// mp_image pointers and access them on repack_line() calls. Refcounting is +// not respected - the caller needs to make sure dst is always writable. +// The images can have different sizes (as repack_line() lets you use different +// target coordinates for dst/src). +// This also allocaters potentially required temporary buffers. +// dst_flags: REPACK_BUF_* flags for dst +// dst: where repack_line() writes to +// src_flags: REPACK_BUF_* flags for src +// src: where repack_line() reads from +// enable_passthrough: if non-NULL, an bool array of size MP_MAX_PLANES indexed +// by plane; a true entry requests disabling copying the +// plane data to the dst plane. The function will write to +// this array whether the plane can really be passed through +// (i.e. will set array entries from true to false if pass- +// through is not possible). It writes to all MP_MAX_PLANES +// entries. If NULL, all entries are implicitly false. +// returns: success (fails on OOM) +bool repack_config_buffers(struct mp_repack *rp, + int dst_flags, struct mp_image *dst, + int src_flags, struct mp_image *src, + bool *enable_passthrough); diff --git a/video/zimg.c b/video/zimg.c index ae3602d297..4e7711f61a 100644 --- a/video/zimg.c +++ b/video/zimg.c @@ -25,6 +25,7 @@ #include "csputils.h" #include "options/m_config.h" #include "options/m_option.h" +#include "repack.h" #include "video/fmt-conversion.h" #include "video/img_format.h" #include "zimg.h" @@ -83,50 +84,14 @@ struct mp_zimg_repack { int num_planes; // number of planes involved unsigned zmask[4]; // zmask[mp_index] = zimg mask (using mp index!) int z_planes[4]; // z_planes[zimg_index] = mp_index (or -1) - bool pass_through_y; // luma plane optimization for e.g. nv12 - // If set, the pack/unpack callback to pass to zimg. - // Called with user==mp_zimg_repack. - zimg_filter_graph_callback repack; - - // Endian-swap (done before/after actual repacker). - int endian_size; // 0=no swapping, 2/4=word byte size to swap - int endian_items[4]; // number of words per pixel/plane - - // For packed_repack. - int components[4]; // p2[n] = mp_image.planes[components[n]] - // pack: p1 is dst, p2 is src - // unpack: p1 is src, p2 is dst - void (*packed_repack_scanline)(void *p1, void *p2[], int x0, int x1); - - // Fringe RGB/YUV. - uint8_t comp_size; - uint8_t *comp_map; - uint8_t comp_shifts[3]; - uint8_t *comp_lut; // 256 * 3 + struct mp_repack *repack; // converting to/from planar // Temporary memory for slice-wise repacking. This may be set even if repack // is not set (then it may be used to avoid alignment issues). This has // about one slice worth of data. struct mp_image *tmp; - // Temporary memory for endian swapping. This has about one slice worth - // of data; set and used only if endian swapping is used (endian_size>0). - // It's also used only for pack==false; packers do this in-place. - struct mp_image *tmp_endian; - - // Temporary, per-call source/target frame. - struct mp_image *mpi; - // Y coordinate of first line in mpi; usually 0 if mpi==user_mpi, or the - // start of the current slice (in the current repack cb). - // repackers should use: mpi->data[p] + mpi->stride[p] * (i - mpi_y0) - int mpi_y0; - - struct mp_image *user_mpi; - - // Also temporary, per-call. use_buf[n] == plane n uses tmp (and not mpi). - bool use_buf[4]; - int real_w, real_h; // aligned size }; @@ -243,532 +208,44 @@ void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx, mp_zimg_update_from_cmdline(ctx); // first update } -static int repack_align(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - for (int p = 0; p < r->mpi->fmt.num_planes; p++) { - if (!r->use_buf[p]) - continue; - - int bpp = r->mpi->fmt.bytes[p]; - int xs = r->mpi->fmt.xs[p]; - int ys = r->mpi->fmt.ys[p]; - // Number of lines on this plane. - int h = (1 << r->mpi->fmt.chroma_ys) - (1 << ys) + 1; - - for (int y = i; y < i + h; y++) { - void *a = r->mpi->planes[p] + - r->mpi->stride[p] * (ptrdiff_t)((y - r->mpi_y0) >> ys) + - bpp * (x0 >> xs); - void *b = r->tmp->planes[p] + - r->tmp->stride[p] * (ptrdiff_t)((y >> ys) & r->zmask[p]) + - bpp * (x0 >> xs); - size_t size = ((x1 - x0) >> xs) * bpp; - if (r->pack) { - memcpy(a, b, size); - } else { - memcpy(b, a, size); - } - } - } - - return 0; -} - -// Swap endian for one line. -static void swap_endian(struct mp_zimg_repack *r, struct mp_image *dst, int dst_y, - struct mp_image *src, int src_y, int x0, int x1) -{ - for (int p = 0; p < dst->fmt.num_planes; p++) { - int xs = dst->fmt.xs[p]; - int ys = dst->fmt.ys[p]; - int words_per_pixel = r->endian_items[p]; - int bpp = words_per_pixel * r->endian_size; - // Number of lines on this plane. - int h = (1 << dst->fmt.chroma_ys) - (1 << ys) + 1; - int num_words = ((x1 - x0) >> xs) * words_per_pixel; - - for (int y = 0; y < h; y++) { - void *s = src->planes[p] + - src->stride[p] * (ptrdiff_t)((y + src_y) >> ys) + - bpp * (x0 >> xs); - void *d = dst->planes[p] + - dst->stride[p] * (ptrdiff_t)((y + dst_y) >> ys) + - bpp * (x0 >> xs); - switch (r->endian_size) { - case 2: - for (int w = 0; w < num_words; w++) - ((uint16_t *)d)[w] = av_bswap16(((uint16_t *)s)[w]); - break; - case 4: - for (int w = 0; w < num_words; w++) - ((uint32_t *)d)[w] = av_bswap32(((uint32_t *)s)[w]); - break; - default: - assert(0); - } - } - } -} - -// PA = PAck, copy planar input to single packed array -// UN = UNpack, copy packed input to planar output -// Naming convention: -// pa_/un_ prefix to identify conversion direction. -// Left (LSB, lowest byte address) -> Right (MSB, highest byte address). -// (This is unusual; MSB to LSB is more commonly used to describe formats, -// but our convention makes more sense for byte access in little endian.) -// "c" identifies a color component. -// "z" identifies known zero padding. -// "x" identifies uninitialized padding. -// A component is followed by its size in bits. -// Size can be omitted for multiple uniform components (c8c8c8 == ccc8). -// Unpackers will often use "x" for padding, because they ignore it, while -// packers will use "z" because they write zero. - -#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = \ - ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ - ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ - ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \ - ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \ - } \ - } - -#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\ - static void name(void *src, void *dst[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ - ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ - ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ - ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \ - } \ - } - - -#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = (pad) | \ - ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ - ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ - ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \ - } \ - } - -UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu) -PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24) -// Not sure if this is a good idea; there may be no alignment guarantee. -UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu) -PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48) - -#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \ - static void name(void *src, void *dst[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ - ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ - ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ - } \ - } - -UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu) -PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0) -UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu) -PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0) -UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu) -PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 20, 10, 0, 0) - -#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = (pad) | \ - ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ - ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \ - } \ - } - -#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \ - static void name(void *src, void *dst[], int x0, int x1) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ - ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ - } \ - } - -UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu) -PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0) -UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu) -PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0) - -#define PA_SEQ_3(name, comp_t) \ - static void name(void *dst, void *src[], int x0, int x1) { \ - comp_t *r = dst; \ - for (int x = x0; x < x1; x++) { \ - *r++ = ((comp_t *)src[0])[x]; \ - *r++ = ((comp_t *)src[1])[x]; \ - *r++ = ((comp_t *)src[2])[x]; \ - } \ - } - -#define UN_SEQ_3(name, comp_t) \ - static void name(void *src, void *dst[], int x0, int x1) { \ - comp_t *r = src; \ - for (int x = x0; x < x1; x++) { \ - ((comp_t *)dst[0])[x] = *r++; \ - ((comp_t *)dst[1])[x] = *r++; \ - ((comp_t *)dst[2])[x] = *r++; \ - } \ - } - -UN_SEQ_3(un_ccc8, uint8_t) -PA_SEQ_3(pa_ccc8, uint8_t) -UN_SEQ_3(un_ccc16, uint16_t) -PA_SEQ_3(pa_ccc16, uint16_t) - -// "regular": single packed plane, all components have same width (except padding) -struct regular_repacker { - int packed_width; // number of bits of the packed pixel - int component_width; // number of bits for a single component - int prepadding; // number of bits of LSB padding - int num_components; // number of components that can be accessed - void (*pa_scanline)(void *p1, void *p2[], int x0, int x1); - void (*un_scanline)(void *p1, void *p2[], int x0, int x1); -}; - -static const struct regular_repacker regular_repackers[] = { - {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8}, - {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8}, - {32, 8, 0, 4, pa_cccc8, un_cccc8}, - {64, 16, 0, 4, pa_cccc16, un_cccc16}, - {24, 8, 0, 3, pa_ccc8, un_ccc8}, - {48, 16, 0, 3, pa_ccc16, un_ccc16}, - {16, 8, 0, 2, pa_cc8, un_cc8}, - {32, 16, 0, 2, pa_cc16, un_cc16}, - {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2}, -}; - -static int packed_repack(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - uint32_t *p1 = (void *)(r->mpi->planes[0] + - r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0)); - - void *p2[4] = {0}; - for (int p = 0; p < r->num_planes; p++) { - int s = r->components[p]; - p2[p] = r->tmp->planes[s] + - r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]); - } - - r->packed_repack_scanline(p1, p2, x0, x1); - - return 0; -} - -struct fringe_rgb_repacker { - // To avoid making a mess of IMGFMT_*, we use av formats directly. - enum AVPixelFormat avfmt; - // If true, use BGR instead of RGB. - // False: LSB - R - G - B - pad - MSB - // True: LSB - B - G - R - pad - MSB - bool rev_order; - // Size in bit for each component, strictly from LSB to MSB. - int bits[3]; - bool be; -}; - -static const struct fringe_rgb_repacker fringe_rgb_repackers[] = { - {AV_PIX_FMT_BGR4_BYTE, false, {1, 2, 1}}, - {AV_PIX_FMT_RGB4_BYTE, true, {1, 2, 1}}, - {AV_PIX_FMT_BGR8, false, {3, 3, 2}}, - {AV_PIX_FMT_RGB8, true, {2, 3, 3}}, // pixdesc desc. and doc. bug? - {AV_PIX_FMT_RGB444LE, true, {4, 4, 4}}, - {AV_PIX_FMT_RGB444BE, true, {4, 4, 4}, .be = true}, - {AV_PIX_FMT_BGR444LE, false, {4, 4, 4}}, - {AV_PIX_FMT_BGR444BE, false, {4, 4, 4}, .be = true}, - {AV_PIX_FMT_BGR565LE, false, {5, 6, 5}}, - {AV_PIX_FMT_BGR565BE, false, {5, 6, 5}, .be = true}, - {AV_PIX_FMT_RGB565LE, true, {5, 6, 5}}, - {AV_PIX_FMT_RGB565BE, true, {5, 6, 5}, .be = true}, - {AV_PIX_FMT_BGR555LE, false, {5, 5, 5}}, - {AV_PIX_FMT_BGR555BE, false, {5, 5, 5}, .be = true}, - {AV_PIX_FMT_RGB555LE, true, {5, 5, 5}}, - {AV_PIX_FMT_RGB555BE, true, {5, 5, 5}, .be = true}, -}; - -#define PA_SHIFT_LUT8(name, packed_t) \ - static void name(void *dst, void *src[], int x0, int x1, uint8_t *lut, \ - uint8_t s0, uint8_t s1, uint8_t s2) { \ - for (int x = x0; x < x1; x++) { \ - ((packed_t *)dst)[x] = \ - (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) | \ - (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) | \ - (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2); \ - } \ - } - - -#define UN_SHIFT_LUT8(name, packed_t) \ - static void name(void *src, void *dst[], int x0, int x1, uint8_t *lut, \ - uint8_t s0, uint8_t s1, uint8_t s2) { \ - for (int x = x0; x < x1; x++) { \ - packed_t c = ((packed_t *)src)[x]; \ - ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0]; \ - ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1]; \ - ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2]; \ - } \ - } - -PA_SHIFT_LUT8(pa_shift_lut8_8, uint8_t) -PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t) -UN_SHIFT_LUT8(un_shift_lut8_8, uint8_t) -UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t) - -static int fringe_rgb_repack(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - void *p1 = r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0); - - void *p2[4] = {0}; - for (int p = 0; p < r->num_planes; p++) { - int s = r->components[p]; - p2[p] = r->tmp->planes[s] + - r->tmp->stride[s] * (ptrdiff_t)(i & r->zmask[s]); - } - - assert(r->comp_size == 1 || r->comp_size == 2); - - void (*repack)(void *p1, void *p2[], int x0, int x1, uint8_t *lut, - uint8_t s0, uint8_t s1, uint8_t s2) = NULL; - if (r->pack) { - repack = r->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16; - } else { - repack = r->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16; - } - repack(p1, p2, x0, x1, r->comp_lut, - r->comp_shifts[0], r->comp_shifts[1], r->comp_shifts[2]); - - return 0; -} - -static int bitmap_repack(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - uint8_t *p1 = - r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0); - uint8_t *p2 = - r->tmp->planes[0] + r->tmp->stride[0] * (ptrdiff_t)(i & r->zmask[0]); - - uint8_t swap = r->comp_size ? 0xFF : 0; - if (r->pack) { - // Supposedly zimg aligns this at least on 64 byte boundaries. Simplifies a - // lot for us. - assert(!(x0 & 7)); - - for (int x = x0; x < x1; x += 8) { - uint8_t d = 0; - int max_b = MPMIN(8, x1 - x); - for (int b = 0; b < max_b; b++) - d |= (!!p2[x + b]) << (7 - b); - p1[x / 8] = d ^ swap; - } - } else { - x0 &= ~0x7; - - for (int x = x0; x < x1; x += 8) { - uint8_t d = p1[x / 8] ^ swap; - int max_b = MPMIN(8, x1 - x); - for (int b = 0; b < max_b; b++) - p2[x + b] = !!(d & (1 << (7 - b))); - } - } - - return 0; -} - -static int unpack_pal(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - uint8_t *src = (void *)(r->mpi->planes[0] + - r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0)); - uint32_t *pal = (void *)r->mpi->planes[1]; - - uint8_t *dst[4] = {0}; - for (int p = 0; p < r->num_planes; p++) { - dst[p] = r->tmp->planes[p] + - r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]); - } - - for (int x = x0; x < x1; x++) { - uint32_t c = pal[src[x]]; - dst[0][x] = (c >> 8) & 0xFF; // G - dst[1][x] = (c >> 0) & 0xFF; // B - dst[2][x] = (c >> 16) & 0xFF; // R - dst[3][x] = (c >> 24) & 0xFF; // A - } - - return 0; -} - -struct fringe_yuv422_repacker { - // To avoid making a mess of IMGFMT_*, we use av formats directly. - enum AVPixelFormat avfmt; - // In bits (depth/8 rounded up gives byte size) - int8_t depth; - // Word index of each sample: {y0, y1, cb, cr} - uint8_t comp[4]; - bool be; -}; - -static const struct fringe_yuv422_repacker fringe_yuv422_repackers[] = { - {AV_PIX_FMT_YUYV422, 8, {0, 2, 1, 3}}, - {AV_PIX_FMT_UYVY422, 8, {1, 3, 0, 2}}, - {AV_PIX_FMT_YVYU422, 8, {0, 2, 3, 1}}, -#ifdef AV_PIX_FMT_Y210 - {AV_PIX_FMT_Y210LE, 10, {0, 2, 1, 3}}, - {AV_PIX_FMT_Y210BE, 10, {0, 2, 1, 3}, .be = true}, -#endif -}; - -#define PA_P422(name, comp_t) \ - static void name(void *dst, void *src[], int x0, int x1, uint8_t *c) { \ - for (int x = x0; x < x1; x += 2) { \ - ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0]; \ - ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1]; \ - ((comp_t *)dst)[x * 2 + c[2]] = ((comp_t *)src[1])[x >> 1]; \ - ((comp_t *)dst)[x * 2 + c[3]] = ((comp_t *)src[2])[x >> 1]; \ - } \ - } - - -#define UN_P422(name, comp_t) \ - static void name(void *src, void *dst[], int x0, int x1, uint8_t *c) { \ - for (int x = x0; x < x1; x += 2) { \ - ((comp_t *)dst[0])[x + 0] = ((comp_t *)src)[x * 2 + c[0]]; \ - ((comp_t *)dst[0])[x + 1] = ((comp_t *)src)[x * 2 + c[1]]; \ - ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[2]]; \ - ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[3]]; \ - } \ - } - -PA_P422(pa_p422_8, uint8_t) -PA_P422(pa_p422_16, uint16_t) -UN_P422(un_p422_8, uint8_t) -UN_P422(un_p422_16, uint16_t) - -static int fringe_yuv422_repack(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - void *p1 = r->mpi->planes[0] + r->mpi->stride[0] * (ptrdiff_t)(i - r->mpi_y0); - - void *p2[4] = {0}; - for (int p = 0; p < r->num_planes; p++) { - p2[p] = r->tmp->planes[p] + - r->tmp->stride[p] * (ptrdiff_t)(i & r->zmask[p]); - } - - assert(r->comp_size == 1 || r->comp_size == 2); - - void (*repack)(void *p1, void *p2[], int x0, int x1, uint8_t *c) = NULL; - if (r->pack) { - repack = r->comp_size == 1 ? pa_p422_8 : pa_p422_16; - } else { - repack = r->comp_size == 1 ? un_p422_8 : un_p422_16; - } - repack(p1, p2, x0, x1, r->comp_map); - - return 0; -} - -static int repack_nv(void *user, unsigned i, unsigned x0, unsigned x1) -{ - struct mp_zimg_repack *r = user; - - int xs = r->mpi->fmt.chroma_xs; - int ys = r->mpi->fmt.chroma_ys; - - if (r->use_buf[0]) { - // Copy Y. - int l_h = 1 << ys; - for (int y = i; y < i + l_h; y++) { - ptrdiff_t bpp = r->mpi->fmt.bytes[0]; - void *a = r->mpi->planes[0] + - r->mpi->stride[0] * (ptrdiff_t)(y - r->mpi_y0) + bpp * x0; - void *b = r->tmp->planes[0] + - r->tmp->stride[0] * (ptrdiff_t)(y & r->zmask[0]) + bpp * x0; - size_t size = (x1 - x0) * bpp; - if (r->pack) { - memcpy(a, b, size); - } else { - memcpy(b, a, size); - } - } - } - - uint32_t *p1 = (void *)(r->mpi->planes[1] + - r->mpi->stride[1] * (ptrdiff_t)((i - r->mpi_y0) >> ys)); - - void *p2[2]; - for (int p = 0; p < 2; p++) { - int s = r->components[p]; - p2[p] = r->tmp->planes[s] + - r->tmp->stride[s] * (ptrdiff_t)((i >> ys) & r->zmask[s]); - } - - r->packed_repack_scanline(p1, p2, x0 >> xs, x1 >> xs); - - return 0; -} - static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1) { struct mp_zimg_repack *r = user; - if (r->endian_size && !r->pack) { - r->mpi = r->tmp_endian; - r->mpi_y0 = i; - swap_endian(r, r->mpi, 0, r->user_mpi, i, x0, x1); - } else { - r->mpi = r->user_mpi; - r->mpi_y0 = 0; - } + // If reading is not aligned, just read slightly more data. + if (!r->pack) + x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1); - if (r->repack) { - r->repack(r, i, x0, x1); - } else { - repack_align(r, i, x0, x1); - } + // mp_repack requirements and zimg guarantees. + assert(!(i & (mp_repack_get_align_y(r->repack) - 1))); + assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1))); - if (r->endian_size && r->pack) - swap_endian(r, r->user_mpi, i, r->mpi, i - r->mpi_y0, x0, x1); + unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX); + unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]); + + repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0); - r->mpi = NULL; return 0; } -static void wrap_buffer(struct mp_zimg_repack *r, +static bool wrap_buffer(struct mp_zimg_repack *r, zimg_image_buffer *buf, struct mp_image *mpi) { *buf = (zimg_image_buffer){ZIMG_API_VERSION}; - bool plane_aligned[4] = {0}; - for (int n = 0; n < r->num_planes; n++) { - plane_aligned[n] = !((uintptr_t)mpi->planes[n] % ZIMG_ALIGN) && - !(mpi->stride[n] % ZIMG_ALIGN); + bool direct[MP_MAX_PLANES] = {0}; + + for (int p = 0; p < mpi->num_planes; p++) { + // If alignment is good, try to avoid copy. + direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) && + !(mpi->stride[p] % ZIMG_ALIGN); } + if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp, + 0, r->pack ? r->tmp : mpi, direct)) + return false; + for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) { // Note: this is really the only place we have to care about plane // permutation (zimg_image_buffer may have a different plane order @@ -778,355 +255,67 @@ static void wrap_buffer(struct mp_zimg_repack *r, if (mplane < 0) continue; - r->use_buf[mplane] = !plane_aligned[mplane] || r->endian_size; - if (!(r->pass_through_y && mplane == 0)) - r->use_buf[mplane] |= !!r->repack; - - struct mp_image *tmpi = r->use_buf[mplane] ? r->tmp : mpi; + struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp; buf->plane[n].data = tmpi->planes[mplane]; buf->plane[n].stride = tmpi->stride[mplane]; - buf->plane[n].mask = r->use_buf[mplane] ? r->zmask[mplane] - : ZIMG_BUFFER_MAX; + buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane]; } - r->user_mpi = mpi; + return true; } -// depth = number of LSB in use -static int find_gbrp_format(int depth, int num_planes) +// (ctx can be NULL for probing.) +static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, + bool pack, struct mp_image_params *user_fmt, + struct mp_zimg_context *ctx) { - if (num_planes != 3 && num_planes != 4) - return 0; - struct mp_regular_imgfmt desc = { - .component_type = MP_COMPONENT_TYPE_UINT, - .forced_csp = MP_CSP_RGB, - .component_size = depth > 8 ? 2 : 1, - .component_pad = depth - (depth > 8 ? 16 : 8), - .num_planes = num_planes, - .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} }, - }; - return mp_find_regular_imgfmt(&desc); -} + r->fmt = *user_fmt; + r->pack = pack; -// depth = number of LSB in use -static int find_gray_format(int depth, int num_planes) -{ - if (num_planes != 1 && num_planes != 2) - return 0; - struct mp_regular_imgfmt desc = { - .component_type = MP_COMPONENT_TYPE_UINT, - .component_size = depth > 8 ? 2 : 1, - .component_pad = depth - (depth > 8 ? 16 : 8), - .num_planes = num_planes, - .planes = { {1, {1}}, {1, {4}} }, - }; - return mp_find_regular_imgfmt(&desc); -} - -static void setup_fringe_rgb_packer(struct mp_zimg_repack *r, - struct mp_zimg_context *ctx) -{ - enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt); - - const struct fringe_rgb_repacker *fmt = NULL; - for (int n = 0; n < MP_ARRAY_SIZE(fringe_rgb_repackers); n++) { - if (fringe_rgb_repackers[n].avfmt == avfmt) { - fmt = &fringe_rgb_repackers[n]; - break; - } - } - - if (!fmt) - return; - - int depth = 8; - if (r->pack) { - // Dither to lowest depth - loses some precision, but result is saner. - depth = fmt->bits[0]; - for (int n = 0; n < 3; n++) - depth = MPMIN(depth, fmt->bits[n]); - } - - r->zimgfmt = find_gbrp_format(depth, 3); - if (!r->zimgfmt) - return; - if (ctx) - r->comp_lut = talloc_array(ctx, uint8_t, 256 * 3); - r->repack = fringe_rgb_repack; - static const int c_order_rgb[] = {3, 1, 2}; - static const int c_order_bgr[] = {2, 1, 3}; - for (int n = 0; n < 3; n++) - r->components[n] = (fmt->rev_order ? c_order_bgr : c_order_rgb)[n] - 1; - - int bitpos = 0; - for (int n = 0; n < 3; n++) { - int bits = fmt->bits[n]; - r->comp_shifts[n] = bitpos; - if (r->comp_lut) { - uint8_t *lut = r->comp_lut + 256 * n; - uint8_t zmax = (1 << depth) - 1; - uint8_t cmax = (1 << bits) - 1; - for (int v = 0; v < 256; v++) { - if (r->pack) { - lut[v] = (v * cmax + zmax / 2) / zmax; - } else { - lut[v] = (v & cmax) * zmax / cmax; - } - } - } - bitpos += bits; - } - - r->comp_size = (bitpos + 7) / 8; - assert(r->comp_size == 1 || r->comp_size == 2); - - if (fmt->be) { - assert(r->comp_size == 2); - r->endian_size = 2; - r->endian_items[0] = 1; - } -} - -static void setup_fringe_yuv422_packer(struct mp_zimg_repack *r) -{ - enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt); - - const struct fringe_yuv422_repacker *fmt = NULL; - for (int n = 0; n < MP_ARRAY_SIZE(fringe_yuv422_repackers); n++) { - if (fringe_yuv422_repackers[n].avfmt == avfmt) { - fmt = &fringe_yuv422_repackers[n]; - break; - } - } - - if (!fmt) - return; - - r->comp_size = (fmt->depth + 7) / 8; - assert(r->comp_size == 1 || r->comp_size == 2); - - struct mp_regular_imgfmt yuvfmt = { - .component_type = MP_COMPONENT_TYPE_UINT, - // NB: same problem with P010 and not clearing padding. - .component_size = r->comp_size, - .num_planes = 3, - .planes = { {1, {1}}, {1, {2}}, {1, {3}} }, - .chroma_xs = 1, - .chroma_ys = 0, - }; - r->zimgfmt = mp_find_regular_imgfmt(&yuvfmt); - r->repack = fringe_yuv422_repack; - r->comp_map = (uint8_t *)fmt->comp; - - if (fmt->be) { - assert(r->comp_size == 2); - r->endian_size = 2; - r->endian_items[0] = 4; - } -} - -static void setup_nv_packer(struct mp_zimg_repack *r) -{ - struct mp_regular_imgfmt desc; - if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) - return; - - // Check for NV. - if (desc.num_planes != 2) - return; - if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1) - return; - if (desc.planes[1].num_components != 2) - return; - int cr0 = desc.planes[1].components[0]; - int cr1 = desc.planes[1].components[1]; - if (cr0 > cr1) - MPSWAP(int, cr0, cr1); - if (cr0 != 2 || cr1 != 3) - return; - - // Construct equivalent planar format. - struct mp_regular_imgfmt desc2 = desc; - desc2.num_planes = 3; - desc2.planes[1].num_components = 1; - desc2.planes[1].components[0] = 2; - desc2.planes[2].num_components = 1; - desc2.planes[2].components[0] = 3; - // For P010. Strangely this concept exists only for the NV format. - if (desc2.component_pad > 0) - desc2.component_pad = 0; - - int planar_fmt = mp_find_regular_imgfmt(&desc2); - if (!planar_fmt) - return; - - for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { - const struct regular_repacker *pa = ®ular_repackers[i]; - - void (*repack_cb)(void *p1, void *p2[], int x0, int x1) = - r->pack ? pa->pa_scanline : pa->un_scanline; - - if (pa->packed_width != desc.component_size * 2 * 8 || - pa->component_width != desc.component_size * 8 || - pa->num_components != 2 || - pa->prepadding != 0 || - !repack_cb) - continue; - - r->repack = repack_nv; - r->pass_through_y = true; - r->packed_repack_scanline = repack_cb; - r->zimgfmt = planar_fmt; - r->components[0] = desc.planes[1].components[0] - 1; - r->components[1] = desc.planes[1].components[1] - 1; - return; - } -} - -static void setup_misc_packer(struct mp_zimg_repack *r) -{ - // Although it's in regular_repackers[], the generic mpv imgfmt metadata - // can't handle it yet. - if (r->zimgfmt == IMGFMT_RGB30) { - int planar_fmt = find_gbrp_format(10, 3); - if (!planar_fmt) - return; - r->zimgfmt = planar_fmt; - r->repack = packed_repack; - r->packed_repack_scanline = r->pack ? pa_ccc10z2 : un_ccc10x2; - static int c_order[] = {3, 2, 1}; - for (int n = 0; n < 3; n++) - r->components[n] = c_order[n] - 1; - } else if (r->zimgfmt == IMGFMT_PAL8 && !r->pack) { - int grap_fmt = find_gbrp_format(8, 4); - if (!grap_fmt) - return; - r->zimgfmt = grap_fmt; - r->repack = unpack_pal; - } else { - enum AVPixelFormat avfmt = imgfmt2pixfmt(r->zimgfmt); - if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) { - r->zimgfmt = IMGFMT_Y1; - r->repack = bitmap_repack; - r->comp_size = avfmt == AV_PIX_FMT_MONOWHITE; // abuse to pass a flag - return; - } - } -} - -// Tries to set a packer/unpacker for component-wise byte aligned RGB formats. -static void setup_regular_rgb_packer(struct mp_zimg_repack *r) -{ - struct mp_regular_imgfmt desc; - if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) - return; - - if (desc.num_planes != 1 || desc.planes[0].num_components < 2) - return; - struct mp_regular_imgfmt_plane *p = &desc.planes[0]; - - int num_real_components = 0; - bool has_alpha = false; - for (int n = 0; n < p->num_components; n++) { - if (p->components[n]) { - has_alpha |= p->components[n] == 4; - num_real_components += 1; - } else { - // padding must be in MSB or LSB - if (n != 0 && n != p->num_components - 1) - return; - } - } - - int depth = desc.component_size * 8 + MPMIN(0, desc.component_pad); - - int planar_fmt = num_real_components > 2 - ? find_gbrp_format(depth, num_real_components) - : find_gray_format(depth, num_real_components); - if (!planar_fmt) - return; - static const int reorder_gbrp[] = {0, 3, 1, 2, 4}; - static const int reorder_gray[] = {0, 1, 0, 0, 4}; - const int *reorder = num_real_components > 2 ? reorder_gbrp : reorder_gray; - - for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { - const struct regular_repacker *pa = ®ular_repackers[i]; - - // The following may assume little endian (because some repack backends - // use word access, while the metadata here uses byte access). - - int prepad = p->components[0] ? 0 : 8; - int first_comp = p->components[0] ? 0 : 1; - void (*repack_cb)(void *p1, void *p2[], int x0, int x1) = - r->pack ? pa->pa_scanline : pa->un_scanline; - - if (pa->packed_width != desc.component_size * p->num_components * 8 || - pa->component_width != depth || - pa->num_components != num_real_components || - pa->prepadding != prepad || - !repack_cb) - continue; - - r->repack = packed_repack; - r->packed_repack_scanline = repack_cb; - r->zimgfmt = planar_fmt; - for (int n = 0; n < num_real_components; n++) { - // Determine permutation that maps component order between the two - // formats, with has_alpha special case (see above). - int c = reorder[p->components[first_comp + n]]; - r->components[n] = c == 4 ? num_real_components - 1 : c - 1; - } - return; - } -} - -// (If native_fmt!=r->fmt.imgfmt, this is the swap-endian case; native_fmt is NE.) -// (ctx can be NULL for the sake of probing.) -static bool setup_format_ne(zimg_image_format *zfmt, struct mp_zimg_repack *r, - int native_fmt, struct mp_zimg_context *ctx) -{ zimg_image_format_default(zfmt, ZIMG_API_VERSION); + int rp_flags = 0; + + // For e.g. RGB565, go to lowest depth on pack for less weird dithering. + if (r->pack) { + rp_flags |= REPACK_CREATE_ROUND_DOWN; + } else { + rp_flags |= REPACK_CREATE_EXPAND_8BIT; + } + + r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags); + if (!r->repack) + return false; + + int align_x = mp_repack_get_align_x(r->repack); + + r->zimgfmt = r->pack ? mp_repack_get_format_src(r->repack) + : mp_repack_get_format_dst(r->repack); + + if (ctx) { + talloc_steal(r, r->repack); + } else { + TA_FREEP(&r->repack); + } + struct mp_image_params fmt = r->fmt; mp_image_params_guess_csp(&fmt); - r->zimgfmt = native_fmt; - - if (!r->repack) - setup_nv_packer(r); - if (!r->repack) - setup_misc_packer(r); - if (!r->repack) - setup_regular_rgb_packer(r); - if (!r->repack) - setup_fringe_rgb_packer(r, ctx); - if (!r->repack) - setup_fringe_yuv422_packer(r); - struct mp_regular_imgfmt desc; if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) return false; + // Relies on zimg callbacks reading on 64 byte alignment. + if (!MP_IS_POWER_OF_2(align_x) || align_x > 64 / desc.component_size) + return false; + // no weird stuff if (desc.num_planes > 4) return false; - // Endian swapping. - if (native_fmt != fmt.imgfmt) { - struct mp_regular_imgfmt ndesc; - if (!mp_get_regular_imgfmt(&ndesc, native_fmt) || ndesc.num_planes > 4) - return false; - r->endian_size = ndesc.component_size; - if (r->endian_size != 2 && r->endian_size != 4) - return false; - for (int n = 0; n < ndesc.num_planes; n++) - r->endian_items[n] = ndesc.planes[n].num_components; - } - for (int n = 0; n < 4; n++) r->z_planes[n] = -1; - // Accept only true planar formats. for (int n = 0; n < desc.num_planes; n++) { if (desc.planes[n].num_components != 1) return false; @@ -1219,25 +408,6 @@ static bool setup_format_ne(zimg_image_format *zfmt, struct mp_zimg_repack *r, return true; } -static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, - bool pack, struct mp_image_params *fmt, - struct mp_zimg_context *ctx) -{ - struct mp_zimg_repack repack_init = { - .pack = pack, - .fmt = *fmt, - }; - *r = repack_init; - if (setup_format_ne(zfmt, r, fmt->imgfmt, ctx)) - return true; - // Try reverse endian. - int nimgfmt = mp_find_other_endian(fmt->imgfmt); - if (!nimgfmt) - return false; - *r = repack_init; - return setup_format_ne(zfmt, r, nimgfmt, ctx); -} - static bool allocate_buffer(struct mp_zimg_context *ctx, struct mp_zimg_repack *r) { @@ -1269,20 +439,15 @@ static bool allocate_buffer(struct mp_zimg_context *ctx, if (!r->tmp) return false; + // Note: although zimg doesn't require that the chroma plane's zmask is + // divided by the full size zmask, the repack callback requires it, + // since mp_repack can handle only proper slices. for (int n = 1; n < r->tmp->fmt.num_planes; n++) { r->zmask[n] = r->zmask[0]; if (r->zmask[0] != ZIMG_BUFFER_MAX) r->zmask[n] = r->zmask[n] >> r->tmp->fmt.ys[n]; } - if (r->endian_size && !r->pack) { - r->tmp_endian = mp_image_alloc(r->fmt.imgfmt, r->fmt.w, h); - talloc_steal(r, r->tmp_endian); - - if (!r->tmp_endian) - return false; - } - return true; } @@ -1382,8 +547,12 @@ bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst, assert(ctx->zimg_graph); zimg_image_buffer zsrc, zdst; - wrap_buffer(ctx->zimg_src, &zsrc, src); - wrap_buffer(ctx->zimg_dst, &zdst, dst); + if (!wrap_buffer(ctx->zimg_src, &zsrc, src) || + !wrap_buffer(ctx->zimg_dst, &zdst, dst)) + { + MP_ERR(ctx, "zimg repacker initialization failed.\n"); + return false; + } // An annoyance. zimg_image_buffer_const zsrc_c = {ZIMG_API_VERSION}; @@ -1400,9 +569,6 @@ bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst, repack_entrypoint, ctx->zimg_src, repack_entrypoint, ctx->zimg_dst); - ctx->zimg_src->user_mpi = NULL; - ctx->zimg_dst->user_mpi = NULL; - return true; } diff --git a/wscript_build.py b/wscript_build.py index 2a656a604d..93933e7371 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -402,6 +402,7 @@ def build(ctx): ( "test/json.c", "tests" ), ( "test/linked_list.c", "tests" ), ( "test/paths.c", "tests" ), + ( "test/repack.c", "tests && zimg" ), ( "test/scale_sws.c", "tests" ), ( "test/scale_test.c", "tests" ), ( "test/scale_zimg.c", "tests && zimg" ), @@ -529,6 +530,7 @@ def build(ctx): ( "video/out/win32/droptarget.c", "win32-desktop" ), ( "video/out/win_state.c"), ( "video/out/x11_common.c", "x11" ), + ( "video/repack.c" ), ( "video/sws_utils.c" ), ( "video/zimg.c", "zimg" ), ( "video/vaapi.c", "vaapi" ),