vo_opengl: refactor scaler LUT weight packing/loading

This is mostly done so we can support using textures with more components than the scaler LUTs have entries. But while we're at it, also change the way the weights are packed so that they're always sequential with no gaps. This allows us to simplify pass_sample_separated_get_weights as well.
2025-12-28 05:33:14 +00:00 · 2017-08-27 09:15:50 +02:00
parent f589a3bd78
commit 8cf5799ab1
4 changed files with 24 additions and 36 deletions
--- a/video/out/filter_kernels.c
+++ b/video/out/filter_kernels.c
@@ -142,14 +142,17 @@ static void mp_compute_weights(struct filter_kernel *filter, double f,
 }

 // Fill the given array with weights for the range [0.0, 1.0]. The array is
-// interpreted as rectangular array of count * filter->size items.
+// interpreted as rectangular array of count * filter->size items, with a
+// stride of `stride` floats in between each array element. (For polar filters,
+// the `count` indicates the row size and filter->size/stride are ignored)
 //
 // There will be slight sampling error if these weights are used in a OpenGL
 // texture as LUT directly. The sampling point of a texel is located at its
 // center, so out_array[0] will end up at 0.5 / count instead of 0.0.
 // Correct lookup requires a linear coordinate mapping from [0.0, 1.0] to
 // [0.5 / count, 1.0 - 0.5 / count].
-void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
+void mp_compute_lut(struct filter_kernel *filter, int count, int stride,
+                    float *out_array)
 {
    if (filter->polar) {
        filter->radius_cutoff = 0.0;
@@ -165,7 +168,7 @@ void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
        // Compute a 2D array indexed by subpixel position
        for (int n = 0; n < count; n++) {
            mp_compute_weights(filter, n / (double)(count - 1),
-                               out_array + filter->size * n);
+                               out_array + stride * n);
        }
    }
 }
--- a/video/out/filter_kernels.h
+++ b/video/out/filter_kernels.h
@@ -50,6 +50,7 @@ const struct filter_kernel *mp_find_filter_kernel(const char *name);

 bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
                    double scale);
-void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array);
+void mp_compute_lut(struct filter_kernel *filter, int count, int stride,
+                    float *out_array);

 #endif /* MPLAYER_FILTER_KERNELS_H */
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -1597,23 +1597,18 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
    scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);

    int size = scaler->kernel->size;
-    int elems_per_pixel = 4;
-    if (size == 1) {
-        elems_per_pixel = 1;
-    } else if (size == 2) {
-        elems_per_pixel = 2;
-    } else if (size == 6) {
-        elems_per_pixel = 3;
-    }
-    int width = size / elems_per_pixel;
-    assert(size == width * elems_per_pixel);
-    const struct ra_format *fmt = ra_find_float16_format(p->ra, elems_per_pixel);
+    int num_components = size > 2 ? 4 : size;
+    const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components);
    assert(fmt);

+    int width = (size + num_components - 1) / num_components; // round up
+    int stride = width * num_components;
+    assert(size <= stride);
+
    scaler->lut_size = 1 << p->opts.scaler_lut_size;

-    float *weights = talloc_array(NULL, float, scaler->lut_size * size);
-    mp_compute_lut(scaler->kernel, scaler->lut_size, weights);
+    float *weights = talloc_array(NULL, float, scaler->lut_size * stride);
+    mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights);

    bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);

--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -41,27 +41,16 @@ static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
                                              struct scaler *scaler)
 {
    gl_sc_uniform_texture(sc, "lut", scaler->lut);
-    // Define a new variable to cache the corrected fcoord.
-    GLSLF("float fcoord_lut = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);
+    GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut_size);

    int N = scaler->kernel->size;
-    if (N == 2) {
-        GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord_lut)).rg;)
-        GLSL(float weights[2] = float[](c1.r, c1.g);)
-    } else if (N == 6) {
-        GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord_lut));)
-        GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord_lut));)
-        GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);)
-    } else {
-        GLSLF("float weights[%d];\n", N);
-        for (int n = 0; n < N / 4; n++) {
-            GLSLF("c = texture(lut, vec2(1.0 / %d.0 + %d.0 / %d.0, fcoord_lut));\n",
-                    N / 2, n, N / 4);
-            GLSLF("weights[%d] = c.r;\n", n * 4 + 0);
-            GLSLF("weights[%d] = c.g;\n", n * 4 + 1);
-            GLSLF("weights[%d] = c.b;\n", n * 4 + 2);
-            GLSLF("weights[%d] = c.a;\n", n * 4 + 3);
-        }
+    int width = (N + 3) / 4; // round up
+
+    GLSLF("float weights[%d];\n", N);
+    for (int i = 0; i < N; i++) {
+        if (i % 4 == 0)
+            GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width);
+        GLSLF("weights[%d] = c[%d];\n", i, i % 4);
    }
 }