vo_opengl: enable NNEDI3 prescaler on OpenGL ES 3.0

It turns out that both UBO and intBitsToFloat() are supported in OpenGL ES 3.0[1][2], enable them so that NNEDI3 prescaler can be used in a wider range of backends. Also fixes some implicit int-to-float conversions so that the shader actually compiles on GLES. Tested on Linux desktop (nvidia 358.16) with "es" sub-option. [1]: https://www.khronos.org/opengles/sdk/docs/man3/html/glGetUniformBlockIndex.xhtml [2]: https://www.khronos.org/opengles/sdk/docs/manglsl/docbook4/xhtml/intBitsToFloat.xml
2025-12-27 05:10:20 +00:00 · 2015-12-02 00:28:26 +00:00
parent 69cc002c92
commit 42a0f4d87b
4 changed files with 21 additions and 16 deletions
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -618,11 +618,11 @@ Available video output drivers are:

        ``ubo``
            Upload these weights via uniform buffer objects. This is the
-            default. (requires OpenGL 3.1)
+            default. (requires OpenGL 3.1 / GLES 3.0)

        ``shader``
            Hard code all the weights into the shader source code. (requires
-            OpenGL 3.3)
+            OpenGL 3.3 / GLES 3.0)


    ``pre-shaders=<files>``, ``post-shaders=<files>``, ``scale-shader=<file>``
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -326,6 +326,7 @@ static const struct gl_functions gl_functions[] = {
    // uniform buffer object extensions, requires OpenGL 3.1.
    {
        .ver_core = 310,
+        .ver_es_core = 300,
        .extension = "GL_ARB_uniform_buffer_object",
        .functions = (const struct gl_function[]) {
            DEF_FN(GetUniformBlockIndex),
--- a/video/out/opengl/nnedi3.c
+++ b/video/out/opengl/nnedi3.c
@@ -108,7 +108,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
        snprintf(buf, sizeof(buf), "vec4 weights[%d];",
                 neurons * (sample_count * 2 + 1));
        gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0);
-        if (gl->glsl_version < 140)
+        if (!gl->es && gl->glsl_version < 140)
            gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object");
    } else if (conf->upload == NNEDI3_UPLOAD_SHADER) {
        // Somehow necessary for hard coding approach.
@@ -139,12 +139,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,

    for (int y = 0; y < height; y++)
        for (int x = 0; x < width; x += 4) {
-            GLSLHF("samples[%d] = vec4(GET(%d, %d), GET(%d, %d),"
-                                      "GET(%d, %d), GET(%d, %d));\n",
+            GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0),"
+                                      "GET(%d.0, %d.0), GET(%d.0, %d.0));\n",
                   (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y);
        }

-    GLSLHF("float sum = 0, sumsq = 0;"
+    GLSLHF("float sum = 0.0, sumsq = 0.0;"
           "for (int i = 0; i < %d; i++) {"
               "sum += dot(samples[i], vec4(1.0));"
               "sumsq += dot(samples[i], samples[i]);"
@@ -152,11 +152,11 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,

    GLSLHF("float mstd0 = sum / %d.0;\n"
           "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n"
-           "float mstd2 = mix(0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
+           "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
           "mstd1 *= mstd2;\n",
           width * height, width * height, FLT_EPSILON);

-    GLSLHF("float vsum = 0, wsum = 0, sum1, sum2;\n");
+    GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n");

    if (conf->upload == NNEDI3_UPLOAD_SHADER) {
        GLSLH(#define T(x) intBitsToFloat(x))
@@ -166,7 +166,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
               "sum1 = exp(sum1 * mstd2 + T(w0));"
               "sum2 = sum2 * mstd2 + T(w1);"
               "wsum += sum1;"
-               "vsum += sum1*(sum2/(1+abs(sum2)));\n");
+               "vsum += sum1*(sum2/(1.0+abs(sum2)));\n");

        for (int n = 0; n < neurons; n++) {
            const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n;
@@ -191,7 +191,7 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
        GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons);

        for (int s = 0; s < 2; s++) {
-            GLSLHF("sum%d = 0;\n"
+            GLSLHF("sum%d = 0.0;\n"
                   "for (int i = 0; i < %d; i++) {"
                       "sum%d += dot(samples[i], weights[idx++]);"
                   "}\n",
@@ -201,12 +201,12 @@ void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num,
        GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]);
              sum2 = sum2 * mstd2 + weights[idx++][1];
              wsum += sum1;
-              vsum += sum1*(sum2/(1+abs(sum2)));)
+              vsum += sum1*(sum2/(1.0+abs(sum2)));)

        GLSLHF("}\n");
    }

-    GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0, 1);)
+    GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);)

    GLSLHF("}\n"); // nnedi3

--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -2483,14 +2483,18 @@ static void check_gl_features(struct gl_video *p)
    if (p->opts.prescale == 2) {
        if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) {
            // Check features for uniform buffer objects.
-            if (!p->gl->BindBufferBase || !p->gl->GetUniformBlockIndex) {
-                MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.1 required).\n");
+            if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) {
+                MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
+                        gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1");
                p->opts.prescale = 0;
            }
        } else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) {
            // Check features for hard coding approach.
-            if (p->gl->glsl_version < 330) {
-                MP_WARN(p, "Disabling NNEDI3 (OpenGL 3.3 required).\n");
+            if ((!gl->es && gl->glsl_version < 330) ||
+                (gl->es && gl->glsl_version < 300))
+            {
+                MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
+                        gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3");
                p->opts.prescale = 0;
            }
        }