vo_opengl: remove nnedi3 prescaler

This commit is contained in:
Bin Jin
2016-06-10 12:22:51 +00:00
committed by wm4
parent 47d9fbd133
commit 61bc96518a
11 changed files with 1 additions and 413 deletions

1
.gitignore vendored
View File

@@ -15,7 +15,6 @@
/tags
/TAGS
/video/out/x11_icon.inc
/video/out/opengl/nnedi3_weights.inc
/demux/ebml_defs.c
/demux/ebml_types.h
/sub/osd_font.h

View File

@@ -12,9 +12,6 @@ the code to LGPLv2.1+ at a later point without asking the contributor. (This
is a safeguard for making potential relicensing of the project to LGPLv2.1+
easier.) Using a more liberal license compatible to LGPLv2.1+ is also ok.
Some GPLv3 or LGPLv3 code in mpv can be enabled by building with --enable-gpl3,
and in this case, the resulting binaries must be distributed under GPLv3.
For information about authors and contributors, consult the git log, which
contains the complete SVN and CVS history as well.

View File

@@ -596,13 +596,6 @@ Available video output drivers are:
Some parameters can be tuned with ``superxbr-sharpness`` and
``superxbr-edge-strength`` options.
``nnedi3``
An artificial neural network based deinterlacer, which can be used
to upscale images.
Extremely slow and requires a recent mid or high end graphics card
to work smoothly (as of 2015).
``prescale-passes=<1..5>``
The number of passes to apply the prescaler (defaults to be 1). Setting
it to 2 will perform a 4x upscaling.
@@ -621,29 +614,6 @@ Available video output drivers are:
A value less than 1.0 will disable the check.
``nnedi3-neurons=<16|32|64|128>``
Specify the neurons for nnedi3 prescaling (defaults to be 32). The
rendering time is expected to be linear to the number of neurons.
``nnedi3-window=<8x4|8x6>``
Specify the size of local window for sampling in nnedi3 prescaling
(defaults to be ``8x4``). The ``8x6`` window produces sharper images,
but is also slower.
``nnedi3-upload=<ubo|shader>``
Specify how to upload the NN weights to GPU. Depending on the graphics
card, driver, shader compiler and nnedi3 settings, both method can be
faster or slower.
``ubo``
Upload these weights via uniform buffer objects. This is the
default. (requires OpenGL 3.1 / GLES 3.0)
``shader``
Hard code all the weights into the shader source code. (requires
OpenGL 3.3 / GLES 3.0)
``pre-shaders=<files>``, ``post-shaders=<files>``, ``scale-shader=<file>``
Custom GLSL fragment shaders.
@@ -954,7 +924,7 @@ Available video output drivers are:
angle
Direct3D11 through the OpenGL ES translation layer ANGLE. This
supports almost everything the ``win`` backend does (if the ANGLE
build is new enough), except the ``nnedi3`` prescaler.
build is new enough).
dxinterop (experimental)
Win32, using WGL for rendering and Direct3D 9Ex for presentation.
Works on Nvidia and AMD. Newer Intel chips with the latest drivers

View File

@@ -12,7 +12,6 @@ export PYTHON=/usr/bin/python3
--check-c-compiler=gcc \
--disable-cdda \
--enable-egl-angle \
--enable-gpl3 \
--enable-jpeg \
--enable-lcms2 \
--enable-libarchive \

View File

@@ -1,248 +0,0 @@
/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*
* The shader portions may have been derived from existing LGPLv3 shaders
* (see below), possibly making this file effectively LGPLv3.
*/
#include "nnedi3.h"
#if HAVE_NNEDI
#include <assert.h>
#include <stdint.h>
#include <float.h>
#include <libavutil/bswap.h>
#include "video.h"
/*
* NNEDI3, an intra-field deinterlacer
*
* The original filter was authored by Kevin Stone (aka. tritical) and is
* licensed under GPL2 terms:
* http://bengal.missouri.edu/~kes25c/
*
* A LGPLv3 licensed OpenCL kernel was created by SEt:
* http://forum.doom9.org/showthread.php?t=169766
*
* A HLSL port further modified by madshi, Shiandow and Zach Saw could be
* found at (also LGPLv3 licensed):
* https://github.com/zachsaw/MPDN_Extensions
*
*/
#define GLSL(x) gl_sc_add(sc, #x "\n");
#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__)
const struct nnedi3_opts nnedi3_opts_def = {
.neurons = 1,
.window = 0,
.upload = NNEDI3_UPLOAD_UBO,
};
#define OPT_BASE_STRUCT struct nnedi3_opts
const struct m_sub_options nnedi3_conf = {
.opts = (const m_option_t[]) {
OPT_CHOICE("neurons", neurons, 0,
({"16", 0},
{"32", 1},
{"64", 2},
{"128", 3})),
OPT_CHOICE("window", window, 0,
({"8x4", 0},
{"8x6", 1})),
OPT_CHOICE("upload", upload, 0,
({"ubo", NNEDI3_UPLOAD_UBO},
{"shader", NNEDI3_UPLOAD_SHADER})),
{0}
},
.size = sizeof(struct nnedi3_opts),
.defaults = &nnedi3_opts_def,
};
const static char nnedi3_weights[40320 * 4 + 1] =
#include "video/out/opengl/nnedi3_weights.inc"
;
const int nnedi3_weight_offsets[9] =
{0, 1088, 3264, 7616, 16320, 17920, 21120, 27520, 40320};
const int nnedi3_neurons[4] = {16, 32, 64, 128};
const int nnedi3_window_width[2] = {8, 8};
const int nnedi3_window_height[2] = {4, 6};
const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size)
{
int idx = conf->window * 4 + conf->neurons;
const int offset = nnedi3_weight_offsets[idx];
*size = (nnedi3_weight_offsets[idx + 1] - offset) * 4;
return (const float*)(nnedi3_weights + offset * 4);
}
void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int step,
const struct nnedi3_opts *conf,
struct gl_transform *transform)
{
assert(0 <= step && step < 2);
if (!conf)
conf = &nnedi3_opts_def;
const int neurons = nnedi3_neurons[conf->neurons];
const int width = nnedi3_window_width[conf->window];
const int height = nnedi3_window_height[conf->window];
const int offset = nnedi3_weight_offsets[conf->window * 4 + conf->neurons];
const uint32_t *weights = (const int*)(nnedi3_weights + offset * 4);
GLSLF("// nnedi3 (step %d, neurons %d, window %dx%d, mode %d)\n",
step, neurons, width, height, conf->upload);
// This is required since each row will be encoded into vec4s
assert(width % 4 == 0);
const int sample_count = width * height / 4;
if (conf->upload == NNEDI3_UPLOAD_UBO) {
char buf[32];
snprintf(buf, sizeof(buf), "vec4 weights[%d];",
neurons * (sample_count * 2 + 1));
gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0);
if (!gl->es && gl->glsl_version < 140)
gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object");
} else if (conf->upload == NNEDI3_UPLOAD_SHADER) {
// Somehow necessary for hard coding approach.
GLSLH(#pragma optionNV(fastprecision on))
}
GLSLHF("float nnedi3() {\n");
if (step == 0) {
*transform = (struct gl_transform){{{1.0,0.0}, {0.0,2.0}}, {0.0,-0.5}};
GLSLH(if ((transpose(HOOKED_rot) * fract(HOOKED_pos * HOOKED_size)).y < 0.5)
return HOOKED_texOff(vec2(0, 0.25)).x;)
GLSLHF("#define GET(i, j) "
"HOOKED_texOff(vec2((i)-(%f),(j)-(%f)+0.25)).x\n",
width / 2.0 - 1, (height - 1) / 2.0);
} else {
*transform = (struct gl_transform){{{2.0,0.0}, {0.0,1.0}}, {-0.5,0.0}};
GLSLH(if (fract(HOOKED_pos.x * HOOKED_size.x) < 0.5)
return HOOKED_texOff(vec2(0.25, 0)).x;)
GLSLHF("#define GET(i, j) "
"HOOKED_texOff(vec2((j)-(%f)+0.25,(i)-(%f))).x\n",
(height - 1) / 2.0, width / 2.0 - 1);
}
GLSLHF("vec4 samples[%d];\n", sample_count);
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x += 4) {
GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0),"
"GET(%d.0, %d.0), GET(%d.0, %d.0));\n",
(y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y);
}
GLSLHF("float sum = 0.0, sumsq = 0.0;"
"for (int i = 0; i < %d; i++) {"
"sum += dot(samples[i], vec4(1.0));"
"sumsq += dot(samples[i], samples[i]);"
"}\n", sample_count);
GLSLHF("float mstd0 = sum / %d.0;\n"
"float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n"
"float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n"
"mstd1 *= mstd2;\n",
width * height, width * height, FLT_EPSILON);
GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n");
if (conf->upload == NNEDI3_UPLOAD_SHADER) {
GLSLH(#define T(x) intBitsToFloat(x))
GLSLH(#define W(i,w0,w1,w2,w3) dot(samples[i],vec4(T(w0),T(w1),T(w2),T(w3))))
GLSLHF("#define WS(w0,w1) "
"sum1 = exp(sum1 * mstd2 + T(w0));"
"sum2 = sum2 * mstd2 + T(w1);"
"wsum += sum1;"
"vsum += sum1*(sum2/(1.0+abs(sum2)));\n");
for (int n = 0; n < neurons; n++) {
const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n;
for (int s = 0; s < 2; s++) {
GLSLHF("sum%d", s + 1);
for (int i = 0; i < sample_count; i++) {
GLSLHF("%cW(%d,%d,%d,%d,%d)", i == 0 ? '=' : '+', i,
(int)av_le2ne32(weights_ptr[0]),
(int)av_le2ne32(weights_ptr[1]),
(int)av_le2ne32(weights_ptr[2]),
(int)av_le2ne32(weights_ptr[3]));
weights_ptr += 4;
}
GLSLHF(";");
}
GLSLHF("WS(%d,%d);\n", (int)av_le2ne32(weights_ptr[0]),
(int)av_le2ne32(weights_ptr[1]));
}
} else if (conf->upload == NNEDI3_UPLOAD_UBO) {
GLSLH(int idx = 0;)
GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons);
for (int s = 0; s < 2; s++) {
GLSLHF("sum%d = 0.0;\n"
"for (int i = 0; i < %d; i++) {"
"sum%d += dot(samples[i], weights[idx++]);"
"}\n",
s + 1, sample_count, s + 1);
}
GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]);
sum2 = sum2 * mstd2 + weights[idx++][1];
wsum += sum1;
vsum += sum1*(sum2/(1.0+abs(sum2)));)
GLSLHF("}\n");
}
GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);)
GLSLHF("}\n"); // nnedi3
GLSL(color.x = nnedi3();)
}
#else
const struct m_sub_options nnedi3_conf = {0};
const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size)
{
return NULL;
}
void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int step,
const struct nnedi3_opts *conf,
struct gl_transform *transform)
{
}
#endif

View File

@@ -1,45 +0,0 @@
/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MP_GL_NNEDI3_H
#define MP_GL_NNEDI3_H
#include "config.h"
#include "common.h"
#include "utils.h"
#define HAVE_NNEDI HAVE_GPL3
#define NNEDI3_UPLOAD_UBO 0
#define NNEDI3_UPLOAD_SHADER 1
struct nnedi3_opts {
int neurons;
int window;
int upload;
};
extern const struct nnedi3_opts nnedi3_opts_def;
extern const struct m_sub_options nnedi3_conf;
const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size);
void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int step,
const struct nnedi3_opts *conf,
struct gl_transform *transform);
#endif

View File

Binary file not shown.

View File

@@ -38,7 +38,6 @@
#include "osd.h"
#include "stream/stream.h"
#include "superxbr.h"
#include "nnedi3.h"
#include "video_shaders.h"
#include "user_shaders.h"
#include "video/out/filter_kernels.h"
@@ -196,8 +195,6 @@ struct gl_video {
GLuint dither_texture;
int dither_size;
GLuint nnedi3_weights_buffer;
struct gl_timer *upload_timer;
struct gl_timer *render_timer;
struct gl_timer *present_timer;
@@ -455,16 +452,12 @@ const struct m_sub_options gl_video_conf = {
OPT_CHOICE("prescale-luma", prescale_luma, 0,
({"none", PRESCALE_NONE},
{"superxbr", PRESCALE_SUPERXBR}
#if HAVE_NNEDI
, {"nnedi3", PRESCALE_NNEDI3}
#endif
)),
OPT_INTRANGE("prescale-passes",
prescale_passes, 0, 1, MAX_PRESCALE_PASSES),
OPT_FLOATRANGE("prescale-downscaling-threshold",
prescale_downscaling_threshold, 0, 0.0, 32.0),
OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0),
OPT_SUBSTRUCT("nnedi3", nnedi3_opts, nnedi3_conf, 0),
OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0),
OPT_REMOVED("approx-gamma", "this is always enabled now"),
@@ -594,9 +587,6 @@ static void uninit_rendering(struct gl_video *p)
gl->DeleteTextures(1, &p->dither_texture);
p->dither_texture = 0;
gl->DeleteBuffers(1, &p->nnedi3_weights_buffer);
p->nnedi3_weights_buffer = 0;
for (int n = 0; n < 4; n++) {
fbotex_uninit(&p->merge_fbo[n]);
fbotex_uninit(&p->scale_fbo[n]);
@@ -1520,27 +1510,6 @@ static int get_prescale_passes(struct gl_video *p)
return passes;
}
// Upload the NNEDI3 UBO weights only if needed
static void upload_nnedi3_weights(struct gl_video *p)
{
GL *gl = p->gl;
if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO &&
!p->nnedi3_weights_buffer)
{
gl->GenBuffers(1, &p->nnedi3_weights_buffer);
gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, p->nnedi3_weights_buffer);
int size;
const float *weights = get_nnedi3_weights(p->opts.nnedi3_opts, &size);
MP_VERBOSE(p, "Uploading NNEDI3 weights via UBO (size=%d)\n", size);
// We don't know the endianness of GPU, just assume it's LE
gl->BufferData(GL_UNIFORM_BUFFER, size, weights, GL_STATIC_DRAW);
}
}
// Returns true if two img_texs are semantically equivalent (same metadata)
static bool img_tex_equiv(struct img_tex a, struct img_tex b)
{
@@ -1594,14 +1563,6 @@ static void superxbr_hook(struct gl_video *p, struct img_tex tex,
pass_superxbr(p->sc, step, p->opts.superxbr_opts, trans);
}
static void nnedi3_hook(struct gl_video *p, struct img_tex tex,
struct gl_transform *trans, void *priv)
{
int step = (uintptr_t)priv;
upload_nnedi3_weights(p);
pass_nnedi3(p->gl, p->sc, step, p->opts.nnedi3_opts, trans);
}
static void unsharp_hook(struct gl_video *p, struct img_tex tex,
struct gl_transform *trans, void *priv)
{
@@ -1849,19 +1810,6 @@ static void gl_video_setup_hooks(struct gl_video *p)
}
}
if (p->opts.prescale_luma == PRESCALE_NNEDI3) {
for (int i = 0; i < prescale_passes; i++) {
for (int step = 0; step < 2; step++) {
pass_add_hook(p, (struct tex_hook) {
.hook_tex = "LUMA",
.bind_tex = {"HOOKED"},
.hook = nnedi3_hook,
.priv = (void *)(uintptr_t)step,
});
}
}
}
if (p->opts.unsharp != 0.0) {
pass_add_hook(p, (struct tex_hook) {
.hook_tex = "MAIN",
@@ -3283,26 +3231,6 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
if (p->opts.prescale_luma == PRESCALE_NNEDI3) {
if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) {
// Check features for uniform buffer objects.
if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) {
MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1");
p->opts.prescale_luma = PRESCALE_NONE;
}
} else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) {
// Check features for hard coding approach.
if ((!gl->es && gl->glsl_version < 330) ||
(gl->es && gl->glsl_version < 300))
{
MP_WARN(p, "Disabling NNEDI3 (%s required).\n",
gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3");
p->opts.prescale_luma = PRESCALE_NONE;
}
}
}
}
static void init_gl(struct gl_video *p)

View File

@@ -95,7 +95,6 @@ enum blend_subs_mode {
enum prescalers {
PRESCALE_NONE = 0,
PRESCALE_SUPERXBR,
PRESCALE_NNEDI3,
};
enum tone_mapping {
@@ -147,7 +146,6 @@ struct gl_video_opts {
int prescale_passes;
float prescale_downscaling_threshold;
struct superxbr_opts *superxbr_opts;
struct nnedi3_opts *nnedi3_opts;
struct mp_icc_opts *icc_opts;
};

View File

@@ -10,11 +10,6 @@ from waftools.checks.custom import *
build_options = [
{
'name': '--gpl3',
'desc': 'GPL3 license',
'default': 'disable',
'func': check_true
}, {
'name': '--cplayer',
'desc': 'mpv CLI player',
'default': 'enable',

View File

@@ -67,10 +67,6 @@ def build(ctx):
source = "sub/osd_font.otf",
target = "sub/osd_font.h")
ctx.file2string(
source = "video/out/opengl/nnedi3_weights.bin",
target = "video/out/opengl/nnedi3_weights.inc")
lua_files = ["defaults.lua", "assdraw.lua", "options.lua", "osc.lua",
"ytdl_hook.lua"]
for fn in lua_files:
@@ -356,7 +352,6 @@ def build(ctx):
( "video/out/opengl/hwdec_osx.c", "videotoolbox-gl" ),
( "video/out/opengl/hwdec_vdpau.c", "vdpau-gl-x11" ),
( "video/out/opengl/lcms.c", "gl" ),
( "video/out/opengl/nnedi3.c", "gl" ),
( "video/out/opengl/osd.c", "gl" ),
( "video/out/opengl/superxbr.c", "gl" ),
( "video/out/opengl/user_shaders.c", "gl" ),