misc/language: parse hi/sdh/cc tags from external subtitle tracks

This also adds `hearing-impaired` argument to `sub-add` command.

Fixes: #14805
This commit is contained in:
Kacper Michajłow
2025-03-06 19:59:37 +01:00
parent 415b70dc7b
commit 7df3f442ea
9 changed files with 87 additions and 36 deletions

View File

@@ -0,0 +1 @@
add `hearing-impaired` argument to `sub-add` command

View File

@@ -297,10 +297,16 @@ done:
return best_score;
}
bstr mp_guess_lang_from_filename(bstr name, int *lang_start)
bstr mp_guess_lang_from_filename(bstr name, int *lang_start, bool *hearing_impaired)
{
name = bstr_strip(bstr_strip_ext(name));
if (lang_start)
*lang_start = -1;
if (hearing_impaired)
*hearing_impaired = false;
if (name.len < 2)
return (bstr){0};
@@ -318,12 +324,32 @@ bstr mp_guess_lang_from_filename(bstr name, int *lang_start)
i--;
}
bool *hi = hearing_impaired ? hearing_impaired : &(bool){0};
bool checked_hi = false;
while (true) {
while (i >= 0 && mp_isalpha(name.start[i])) {
lang_length++;
i--;
}
if (i >= 0 && lang_length >= 2 && !checked_hi && name.start[i] == delimiter) {
checked_hi = true;
static const char *const suffixes[] = { "sdh", "hi", "cc" };
bstr tag = { name.start + i + 1, lang_length };
for (int n = 0; n < MP_ARRAY_SIZE(suffixes); n++) {
if (!bstrcasecmp0(tag, suffixes[n])) {
*hi = true;
break;
}
}
if (*hi) {
lang_length = 0;
i -= (delimiter != '.') ? 2 : 1;
continue;
}
}
// According to
// https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags
// subtags after the first are composed of 1 to 8 letters.

View File

@@ -25,6 +25,6 @@
// Result numerically higher => better match. 0 == no match.
int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);
bstr mp_guess_lang_from_filename(bstr name, int *lang_start);
bstr mp_guess_lang_from_filename(bstr name, int *lang_start, bool *hearing_impaired);
#endif /* MP_LANGUAGE_H */

View File

@@ -6215,8 +6215,8 @@ static void cmd_track_add(void *p)
struct mp_cmd_ctx *cmd = p;
struct MPContext *mpctx = cmd->mpctx;
int type = *(int *)cmd->priv;
bool is_albumart = type == STREAM_VIDEO &&
cmd->args[4].v.b;
bool is_albumart = type == STREAM_VIDEO && cmd->args[4].v.b;
bool hearing_impaired = type == STREAM_SUB && cmd->args[4].v.b;
if (mpctx->stop_play) {
cmd->success = false;
@@ -6236,7 +6236,7 @@ static void cmd_track_add(void *p)
}
}
int first = mp_add_external_file(mpctx, cmd->args[0].v.s, type,
cmd->abort->cancel, is_albumart);
cmd->abort->cancel, is_albumart, hearing_impaired);
if (first < 0) {
cmd->success = false;
return;
@@ -6300,9 +6300,10 @@ static void cmd_track_reload(void *p)
if (t && t->is_external && t->external_filename) {
char *filename = talloc_strdup(NULL, t->external_filename);
bool is_albumart = t->attached_picture;
bool hearing_impaired = t->hearing_impaired_track;
mp_remove_track(mpctx, t);
nt_num = mp_add_external_file(mpctx, filename, type, cmd->abort->cancel,
is_albumart);
is_albumart, hearing_impaired);
talloc_free(filename);
}
@@ -6313,8 +6314,11 @@ static void cmd_track_reload(void *p)
struct track *nt = mpctx->tracks[nt_num];
if (!nt->lang)
nt->lang = bstrto0(nt, mp_guess_lang_from_filename(bstr0(nt->external_filename), NULL));
if (!nt->lang) {
bstr lang = mp_guess_lang_from_filename(bstr0(nt->external_filename), NULL,
&nt->hearing_impaired_track);
nt->lang = bstrto0(nt, lang);
}
mp_switch_track(mpctx, nt->type, nt, 0);
print_track_list(mpctx, "Reloaded:");
@@ -7149,6 +7153,7 @@ const struct mp_cmd_def mp_cmds[] = {
.flags = MP_CMD_OPT_ARG},
{"title", OPT_STRING(v.s), .flags = MP_CMD_OPT_ARG},
{"lang", OPT_STRING(v.s), .flags = MP_CMD_OPT_ARG},
{"hearing-impaired", OPT_BOOL(v.b), .flags = MP_CMD_OPT_ARG},
},
.priv = &(const int){STREAM_SUB},
.spawn_thread = true,

View File

@@ -529,7 +529,7 @@ void mp_abort_trigger_locked(struct MPContext *mpctx,
struct mp_abort_entry *abort);
int mp_add_external_file(struct MPContext *mpctx, char *filename,
enum stream_type filter, struct mp_cancel *cancel,
bool cover_art);
bool cover_art, bool hearing_impaired);
void mark_track_selection(struct MPContext *mpctx, int order,
enum stream_type type, int value);
#define FLAG_MARK_SELECTION 1

View File

@@ -155,7 +155,8 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
bstr lang = {0};
int start = 0;
lang = mp_guess_lang_from_filename(dename, &start);
bool hearing_impaired = false;
lang = mp_guess_lang_from_filename(dename, &start, &hearing_impaired);
if (bstr_case_startswith(tmp_fname_trim, f_fname_trim)) {
if (lang.len && start == f_fname_trim.len)
prio |= 16; // exact movie name + followed by lang
@@ -200,6 +201,7 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
sub->priority = prio;
sub->fname = subpath;
sub->lang = lang.len ? bstrdup0(*slist, lang) : NULL;
sub->hearing_impaired = hearing_impaired;
} else
talloc_free(subpath);
}

View File

@@ -25,6 +25,7 @@ struct subfn {
int priority;
char *fname;
char *lang;
bool hearing_impaired;
};
struct mpv_global;

View File

@@ -826,7 +826,7 @@ bool mp_remove_track(struct MPContext *mpctx, struct track *track)
// the demuxer is changed to be slaved to mpctx->playback_abort instead.
int mp_add_external_file(struct MPContext *mpctx, char *filename,
enum stream_type filter, struct mp_cancel *cancel,
bool cover_art)
bool cover_art, bool hearing_impaired)
{
struct MPOpts *opts = mpctx->opts;
if (!filename || mp_cancel_test(cancel))
@@ -915,6 +915,7 @@ int mp_add_external_file(struct MPContext *mpctx, char *filename,
t->external_filename = mp_normalize_user_path(t, mpctx->global, filename);
t->no_default = sh->type != filter;
t->no_auto_select = t->no_default;
t->hearing_impaired_track = hearing_impaired;
// if we found video, and we are loading cover art, flag as such.
t->attached_picture = t->type == STREAM_VIDEO && cover_art;
if (first_num < 0 && (filter == STREAM_TYPE_COUNT || sh->type == filter))
@@ -945,7 +946,7 @@ static void open_external_files(struct MPContext *mpctx, char **files,
for (int n = 0; files && files[n]; n++)
// when given filter is set to video, we are loading up cover art
mp_add_external_file(mpctx, files[n], filter, mpctx->playback_abort,
filter == STREAM_VIDEO);
filter == STREAM_VIDEO, false);
talloc_free(tmp);
}
@@ -987,7 +988,7 @@ void autoload_external_files(struct MPContext *mpctx, struct mp_cancel *cancel)
// when given filter is set to video, we are loading up cover art
int first = mp_add_external_file(mpctx, e->fname, e->type, cancel,
e->type == STREAM_VIDEO);
e->type == STREAM_VIDEO, e->hearing_impaired);
if (first < 0)
goto skip;

View File

@@ -56,33 +56,48 @@ int main(void)
assert_int_equal(mp_match_lang((char*[]){NULL} , "ax") , 0);
void *ta_ctx = talloc_new(NULL);
int start; // this is actually the position of the delimiter.
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en.srt"), &start)), "en");
assert_int_equal(start, 3);
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.eng.srt"), NULL)), "eng");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.e.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.engg.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.00.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0(NULL), NULL)), "");
#define TEST_LANG_GUESS(filename, expected_lang, expected_start, expected_hi) \
do { \
int start; \
bool hearing_impaired; \
bstr lang = mp_guess_lang_from_filename(bstr0(filename), &start, \
&hearing_impaired); \
assert_string_equal(bstrto0(ta_ctx, lang), expected_lang); \
assert_int_equal(start, expected_start); \
assert_true(hearing_impaired == expected_hi); \
} while (0)
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-US.srt"), NULL)), "en-US");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-simple.srt"), NULL)), "en-simple");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.sgn-FSL.srt"), NULL)), "sgn-FSL");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.gsw-u-sd-chzh.srt"), NULL)), "gsw-u-sd-chzh");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-US-.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-aaaaaaaaa.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-0.srt"), NULL)), "");
TEST_LANG_GUESS("foo.en.srt", "en", 3, false);
TEST_LANG_GUESS("foo.eng.srt", "eng", 3, false);
TEST_LANG_GUESS("foo.e.srt", "", -1, false);
TEST_LANG_GUESS("foo.engg.srt", "", -1, false);
TEST_LANG_GUESS("foo.00.srt", "", -1, false);
TEST_LANG_GUESS("foo.srt", "", -1, false);
TEST_LANG_GUESS(NULL, "", -1, false);
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo[en].srt"), NULL)), "en");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo[en-US].srt"), NULL)), "en-US");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo[].srt"), NULL)), "");
TEST_LANG_GUESS("foo.en-US.srt", "en-US", 3, false);
TEST_LANG_GUESS("foo.en-US.hi.srt", "en-US", 3, true);
TEST_LANG_GUESS("foo.en-US.sdh.srt", "en-US", 3, true);
TEST_LANG_GUESS("foo.en-simple.srt", "en-simple", 3, false);
TEST_LANG_GUESS("foo.sgn-FSL.srt", "sgn-FSL", 3, false);
TEST_LANG_GUESS("foo.gsw-u-sd-chzh.srt", "gsw-u-sd-chzh", 3, false);
TEST_LANG_GUESS("foo.en-.srt", "", -1, false);
TEST_LANG_GUESS("foo.en-US-.srt", "", -1, false);
TEST_LANG_GUESS("foo.en-aaaaaaaaa.srt", "", -1, false);
TEST_LANG_GUESS("foo.en-0.srt", "", -1, false);
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo(en).srt"), NULL)), "en");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo(en-US).srt"), NULL)), "en-US");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo().srt"), NULL)), "");
TEST_LANG_GUESS("foo[en].srt", "en", 3, false);
TEST_LANG_GUESS("foo[en-US].srt", "en-US", 3, false);
TEST_LANG_GUESS("foo[en-US][hi].srt", "en-US", 3, true);
TEST_LANG_GUESS("foo[en-US][sdh].srt", "en-US", 3, true);
TEST_LANG_GUESS("foo[].srt", "", -1, false);
TEST_LANG_GUESS("foo(en).srt", "en", 3, false);
TEST_LANG_GUESS("foo(en-US).srt", "en-US", 3, false);
TEST_LANG_GUESS("foo(en-US)(hi).srt", "en-US", 3, true);
TEST_LANG_GUESS("foo(en-US)(sdh).srt", "en-US", 3, true);
TEST_LANG_GUESS("foo().srt", "", -1, false);
talloc_free(ta_ctx);
}