subx_cli/core/
language.rs1use regex::Regex;
17use std::collections::HashMap;
18use std::path::Path;
19
20#[derive(Debug, Clone, PartialEq)]
22pub enum LanguageSource {
23 Directory,
25 Filename,
27 Extension,
29}
30impl Default for LanguageDetector {
31 fn default() -> Self {
32 Self::new()
33 }
34}
35
36#[derive(Debug, Clone)]
38pub struct LanguageInfo {
39 pub code: String,
41 pub source: LanguageSource,
43 pub confidence: f32,
45}
46
47pub struct LanguageDetector {
49 language_codes: HashMap<String, String>,
50 directory_patterns: Vec<String>,
51 filename_patterns: Vec<Regex>,
52}
53
54impl LanguageDetector {
55 pub fn new() -> Self {
59 let mut language_codes = HashMap::new();
61 language_codes.insert("tc".to_string(), "tc".to_string());
63 language_codes.insert("繁中".to_string(), "tc".to_string()); language_codes.insert("繁體".to_string(), "tc".to_string()); language_codes.insert("cht".to_string(), "tc".to_string());
66 language_codes.insert("sc".to_string(), "sc".to_string());
68 language_codes.insert("简中".to_string(), "sc".to_string()); language_codes.insert("简体".to_string(), "sc".to_string()); language_codes.insert("chs".to_string(), "sc".to_string());
71 language_codes.insert("en".to_string(), "en".to_string());
73 language_codes.insert("英文".to_string(), "en".to_string()); language_codes.insert("english".to_string(), "en".to_string());
75 let filename_patterns = vec![
78 Regex::new(r"\.([a-z]{2,3})\.").unwrap(), Regex::new(r"_([a-z]{2,3})\.").unwrap(), Regex::new(r"-([a-z]{2,3})\.").unwrap(), ];
82
83 Self {
84 language_codes,
85 directory_patterns: vec!["tc".to_string(), "sc".to_string(), "en".to_string()],
86 filename_patterns,
87 }
88 }
89 pub fn detect_from_path(&self, path: &Path) -> Option<LanguageInfo> {
95 if let Some(lang) = self.detect_from_directory(path) {
96 return Some(lang);
97 }
98 if let Some(lang) = self.detect_from_filename(path) {
99 return Some(lang);
100 }
101 None
102 }
103
104 pub fn get_primary_language(&self, path: &Path) -> Option<String> {
110 self.detect_all_languages(path)
111 .into_iter()
112 .next()
113 .map(|lang| lang.code)
114 }
115
116 pub fn detect_all_languages(&self, path: &Path) -> Vec<LanguageInfo> {
120 let mut langs = Vec::new();
121 if let Some(dir_lang) = self.detect_from_directory(path) {
122 langs.push(dir_lang);
123 }
124 if let Some(file_lang) = self.detect_from_filename(path) {
125 langs.push(file_lang);
126 }
127 langs.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
128 langs.dedup_by(|a, b| a.code == b.code);
129 langs
130 }
131
132 fn detect_from_directory(&self, path: &Path) -> Option<LanguageInfo> {
133 for comp in path.components() {
134 if let Some(s) = comp.as_os_str().to_str() {
135 let key = s.to_lowercase();
136 if let Some(code) = self.language_codes.get(&key) {
137 return Some(LanguageInfo {
138 code: code.clone(),
139 source: LanguageSource::Directory,
140 confidence: 0.9,
141 });
142 }
143 }
144 }
145 None
146 }
147
148 fn detect_from_filename(&self, path: &Path) -> Option<LanguageInfo> {
149 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
150 for re in &self.filename_patterns {
151 if let Some(cap) = re.captures(name) {
152 if let Some(m) = cap.get(1) {
153 if let Some(code) = self.language_codes.get(m.as_str()) {
154 return Some(LanguageInfo {
155 code: code.clone(),
156 source: LanguageSource::Filename,
157 confidence: 0.8,
158 });
159 }
160 }
161 }
162 }
163 }
164 None
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use std::path::Path;
172
173 #[test]
174 fn test_directory_language_detection() {
175 let det = LanguageDetector::new();
176 let p = Path::new("tc/subtitle.srt");
177 let lang = det.get_primary_language(p).unwrap();
178 assert_eq!(lang, "tc");
179 }
180
181 #[test]
182 fn test_filename_language_detection() {
183 let det = LanguageDetector::new();
184 let p = Path::new("subtitle.sc.ass");
185 let lang = det.get_primary_language(p).unwrap();
186 assert_eq!(lang, "sc");
187 }
188
189 #[test]
190 fn test_no_language_detection() {
191 let det = LanguageDetector::new();
192 let p = Path::new("subtitle.ass");
193 assert!(det.get_primary_language(p).is_none());
194 }
195}