use super::{LabelPurpose, LabelQualifier};
pub fn codec(code: &str) -> &str {
match code {
"MLP" => "TrueHD",
"AC3" | "AC" => "Dolby Digital",
"DTS" => "DTS",
"DDL" => "Dolby Digital Plus",
"WAV" => "PCM",
"atmos" => "Dolby Atmos",
_ => code,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct LangInfo {
pub code: &'static str,
pub variant: &'static str,
}
pub fn lang(text: &str) -> Option<LangInfo> {
let lower = text.to_lowercase();
for (needle, code, variant) in COMPOUND_LANGS {
if lower.contains(needle) {
return Some(LangInfo { code, variant });
}
}
for (needle, code) in BARE_LANGS {
if has_word(&lower, needle) {
return Some(LangInfo { code, variant: "" });
}
}
None
}
const COMPOUND_LANGS: &[(&str, &str, &str)] = &[
("brazilian portuguese", "por", "Brazilian"),
("euro portuguese", "por", "European"),
("european portuguese", "por", "European"),
("castilian spanish", "spa", "Castilian"),
("latin american spanish", "spa", "Latin American"),
("latin spanish", "spa", "Latin American"),
("canadian french", "fra", "Canadian"),
("parisian french", "fra", "Parisian"),
("australian english", "eng", "Australian"),
("austrailian english", "eng", "Australian"), ("british english", "eng", "British"),
("simplified chinese", "zho", "Simplified"),
("traditional chinese", "zho", "Traditional"),
("mandarin chinese", "zho", "Mandarin"),
("cantonese chinese", "zho", "Cantonese"),
];
const BARE_LANGS: &[(&str, &str)] = &[
("english", "eng"),
("spanish", "spa"),
("french", "fra"),
("german", "deu"),
("italian", "ita"),
("japanese", "jpn"),
("chinese", "zho"),
("mandarin", "zho"),
("cantonese", "zho"),
("portuguese", "por"),
("polish", "pol"),
("czech", "ces"),
("hungarian", "hun"),
("dutch", "nld"),
("korean", "kor"),
("arabic", "ara"),
("hindi", "hin"),
("turkish", "tur"),
("thai", "tha"),
("swedish", "swe"),
("norwegian", "nor"),
("danish", "dan"),
("finnish", "fin"),
("hebrew", "heb"),
("russian", "rus"),
("greek", "ell"),
("vietnamese", "vie"),
("indonesian", "ind"),
("malay", "msa"),
("ukrainian", "ukr"),
("romanian", "ron"),
("bulgarian", "bul"),
("croatian", "hrv"),
("serbian", "srp"),
("slovak", "slk"),
("slovenian", "slv"),
("estonian", "est"),
("latvian", "lav"),
("lithuanian", "lit"),
("icelandic", "isl"),
("basque", "eus"),
("catalan", "cat"),
("galician", "glg"),
];
pub fn purpose(text: &str) -> LabelPurpose {
let lower = text.to_lowercase();
if lower.contains("audio description") || lower.contains("descriptive service") {
return LabelPurpose::Descriptive;
}
if lower.contains("music only") {
return LabelPurpose::Score;
}
if has_word(&lower, "commentary") {
return LabelPurpose::Commentary;
}
if has_word(&lower, "descriptive")
|| has_word(&lower, "description")
|| has_word(&lower, "described")
{
return LabelPurpose::Descriptive;
}
if has_word(&lower, "score") {
return LabelPurpose::Score;
}
if has_word(&lower, "ime") {
return LabelPurpose::Ime;
}
LabelPurpose::Normal
}
pub fn qualifier(text: &str) -> LabelQualifier {
let lower = text.to_lowercase();
if has_word(&lower, "sdh") || has_word(&lower, "captions") {
return LabelQualifier::Sdh;
}
if lower.contains("descriptive service") || has_word(&lower, "rnib") {
return LabelQualifier::DescriptiveService;
}
if has_word(&lower, "forced") {
return LabelQualifier::Forced;
}
LabelQualifier::None
}
fn has_word(haystack: &str, needle: &str) -> bool {
if needle.is_empty() {
return false;
}
let bytes = haystack.as_bytes();
let nb = needle.as_bytes();
let mut i = 0;
while i + nb.len() <= bytes.len() {
if &bytes[i..i + nb.len()] == nb {
let before = if i == 0 { None } else { Some(bytes[i - 1]) };
let after = bytes.get(i + nb.len()).copied();
let bound = |c: Option<u8>| match c {
None => true,
Some(b) => !b.is_ascii_alphanumeric(),
};
if bound(before) && bound(after) {
return true;
}
}
i += 1;
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn codec_known_aliases() {
assert_eq!(codec("MLP"), "TrueHD");
assert_eq!(codec("AC3"), "Dolby Digital");
assert_eq!(codec("AC"), "Dolby Digital");
assert_eq!(codec("DDL"), "Dolby Digital Plus");
assert_eq!(codec("atmos"), "Dolby Atmos");
}
#[test]
fn codec_unknown_passes_through() {
assert_eq!(codec("FX9"), "FX9");
assert_eq!(codec(""), "");
}
fn li(code: &'static str, variant: &'static str) -> LangInfo {
LangInfo { code, variant }
}
#[test]
fn lang_bare_names_have_empty_variant() {
assert_eq!(lang("English"), Some(li("eng", "")));
assert_eq!(lang("english"), Some(li("eng", "")));
assert_eq!(lang("Spanish 5.1 Dolby Digital"), Some(li("spa", "")));
assert_eq!(lang("japanese"), Some(li("jpn", "")));
assert_eq!(lang("Italian"), Some(li("ita", "")));
}
#[test]
fn lang_compounds_carry_variant() {
assert_eq!(
lang("Brazilian Portuguese 5.1"),
Some(li("por", "Brazilian"))
);
assert_eq!(lang("Castilian Spanish"), Some(li("spa", "Castilian")));
assert_eq!(
lang("Canadian French Dolby Digital"),
Some(li("fra", "Canadian"))
);
assert_eq!(
lang("Latin American Spanish"),
Some(li("spa", "Latin American"))
);
assert_eq!(lang("Simplified Chinese"), Some(li("zho", "Simplified")));
assert_eq!(lang("British English"), Some(li("eng", "British")));
}
#[test]
fn lang_compounds_win_over_bare() {
assert_eq!(lang("Brazilian Portuguese").unwrap().variant, "Brazilian");
assert_eq!(lang("Canadian French").unwrap().variant, "Canadian");
}
#[test]
fn lang_unknown_returns_none() {
assert_eq!(lang("Klingon Dolby Atmos"), None);
assert_eq!(lang(""), None);
assert_eq!(lang("eng"), None); }
#[test]
fn lang_word_boundary_avoids_substring_false_positive() {
assert_eq!(lang("Audio Engineering Demo"), None);
}
#[test]
fn purpose_recognizes_commentary() {
assert_eq!(purpose("English Commentary"), LabelPurpose::Commentary);
assert_eq!(purpose("Director's Commentary"), LabelPurpose::Commentary);
}
#[test]
fn purpose_recognizes_descriptive() {
assert_eq!(
purpose("English Descriptive Audio"),
LabelPurpose::Descriptive
);
assert_eq!(purpose("Audio Description"), LabelPurpose::Descriptive);
assert_eq!(purpose("Described Video"), LabelPurpose::Descriptive);
}
#[test]
fn purpose_descriptive_service_routes_to_descriptive() {
assert_eq!(
purpose("English Descriptive Service"),
LabelPurpose::Descriptive
);
}
#[test]
fn purpose_word_boundary_avoids_commenter_false_positive() {
assert_eq!(purpose("Commenter Pro audio track"), LabelPurpose::Normal);
}
#[test]
fn purpose_recognizes_score() {
assert_eq!(purpose("Music Only"), LabelPurpose::Score);
assert_eq!(purpose("Isolated Score"), LabelPurpose::Score);
}
#[test]
fn purpose_unknown_is_normal() {
assert_eq!(purpose("English Dolby Atmos"), LabelPurpose::Normal);
assert_eq!(purpose(""), LabelPurpose::Normal);
}
#[test]
fn qualifier_recognizes_sdh() {
assert_eq!(qualifier("English SDH"), LabelQualifier::Sdh);
assert_eq!(qualifier("English Captions"), LabelQualifier::Sdh);
}
#[test]
fn qualifier_recognizes_forced() {
assert_eq!(qualifier("English Forced"), LabelQualifier::Forced);
assert_eq!(qualifier("Forced Narrative"), LabelQualifier::Forced);
}
#[test]
fn qualifier_recognizes_descriptive_service() {
assert_eq!(
qualifier("English RNIB"),
LabelQualifier::DescriptiveService
);
assert_eq!(
qualifier("English Descriptive Service"),
LabelQualifier::DescriptiveService
);
}
#[test]
fn qualifier_sdh_wins_over_forced_when_both_present() {
assert_eq!(qualifier("English Forced SDH"), LabelQualifier::Sdh);
}
#[test]
fn qualifier_unknown_is_none() {
assert_eq!(qualifier("English"), LabelQualifier::None);
assert_eq!(qualifier(""), LabelQualifier::None);
}
#[test]
fn has_word_basic() {
assert!(has_word("english forced", "english"));
assert!(has_word("english forced", "forced"));
assert!(has_word("english", "english"));
}
#[test]
fn has_word_rejects_substring() {
assert!(!has_word("engineering", "english"));
assert!(!has_word("englishman", "english"));
assert!(!has_word("aenglish", "english"));
}
#[test]
fn has_word_punctuation_boundary() {
assert!(has_word("english (sdh)", "sdh"));
assert!(has_word("commentary,extra,info", "commentary"));
}
}