subx_cli/core/
language.rs1use regex::Regex;
17use std::collections::HashMap;
18use std::path::Path;
19
20#[derive(Debug, Clone, PartialEq)]
22pub enum LanguageSource {
23 Directory,
25 Filename,
27 Extension,
29}
30impl Default for LanguageDetector {
31 fn default() -> Self {
32 Self::new()
33 }
34}
35
36#[derive(Debug, Clone)]
38pub struct LanguageInfo {
39 pub code: String,
41 pub source: LanguageSource,
43 pub confidence: f32,
45}
46
47pub struct LanguageDetector {
49 language_codes: HashMap<String, String>,
50 directory_patterns: Vec<String>,
51 filename_patterns: Vec<Regex>,
52}
53
54impl LanguageDetector {
55 pub fn new() -> Self {
59 let mut language_codes = HashMap::new();
60 language_codes.insert("tc".to_string(), "tc".to_string());
62 language_codes.insert("繁中".to_string(), "tc".to_string());
63 language_codes.insert("繁體".to_string(), "tc".to_string());
64 language_codes.insert("cht".to_string(), "tc".to_string());
65 language_codes.insert("sc".to_string(), "sc".to_string());
67 language_codes.insert("簡中".to_string(), "sc".to_string());
68 language_codes.insert("簡體".to_string(), "sc".to_string());
69 language_codes.insert("chs".to_string(), "sc".to_string());
70 language_codes.insert("en".to_string(), "en".to_string());
72 language_codes.insert("英文".to_string(), "en".to_string());
73 language_codes.insert("english".to_string(), "en".to_string());
74 let filename_patterns = vec![
77 Regex::new(r"\.([a-z]{2,3})\.").unwrap(), Regex::new(r"_([a-z]{2,3})\.").unwrap(), Regex::new(r"-([a-z]{2,3})\.").unwrap(), ];
81
82 Self {
83 language_codes,
84 directory_patterns: vec!["tc".to_string(), "sc".to_string(), "en".to_string()],
85 filename_patterns,
86 }
87 }
88 pub fn detect_from_path(&self, path: &Path) -> Option<LanguageInfo> {
94 if let Some(lang) = self.detect_from_directory(path) {
95 return Some(lang);
96 }
97 if let Some(lang) = self.detect_from_filename(path) {
98 return Some(lang);
99 }
100 None
101 }
102
103 pub fn get_primary_language(&self, path: &Path) -> Option<String> {
109 self.detect_all_languages(path)
110 .into_iter()
111 .next()
112 .map(|lang| lang.code)
113 }
114
115 pub fn detect_all_languages(&self, path: &Path) -> Vec<LanguageInfo> {
119 let mut langs = Vec::new();
120 if let Some(dir_lang) = self.detect_from_directory(path) {
121 langs.push(dir_lang);
122 }
123 if let Some(file_lang) = self.detect_from_filename(path) {
124 langs.push(file_lang);
125 }
126 langs.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
127 langs.dedup_by(|a, b| a.code == b.code);
128 langs
129 }
130
131 fn detect_from_directory(&self, path: &Path) -> Option<LanguageInfo> {
132 for comp in path.components() {
133 if let Some(s) = comp.as_os_str().to_str() {
134 let key = s.to_lowercase();
135 if let Some(code) = self.language_codes.get(&key) {
136 return Some(LanguageInfo {
137 code: code.clone(),
138 source: LanguageSource::Directory,
139 confidence: 0.9,
140 });
141 }
142 }
143 }
144 None
145 }
146
147 fn detect_from_filename(&self, path: &Path) -> Option<LanguageInfo> {
148 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
149 for re in &self.filename_patterns {
150 if let Some(cap) = re.captures(name) {
151 if let Some(m) = cap.get(1) {
152 if let Some(code) = self.language_codes.get(m.as_str()) {
153 return Some(LanguageInfo {
154 code: code.clone(),
155 source: LanguageSource::Filename,
156 confidence: 0.8,
157 });
158 }
159 }
160 }
161 }
162 }
163 None
164 }
165}
166
167#[cfg(test)]
168mod tests {
169 use super::*;
170 use std::path::Path;
171
172 #[test]
173 fn test_directory_language_detection() {
174 let det = LanguageDetector::new();
175 let p = Path::new("tc/subtitle.srt");
176 let lang = det.get_primary_language(p).unwrap();
177 assert_eq!(lang, "tc");
178 }
179
180 #[test]
181 fn test_filename_language_detection() {
182 let det = LanguageDetector::new();
183 let p = Path::new("subtitle.sc.ass");
184 let lang = det.get_primary_language(p).unwrap();
185 assert_eq!(lang, "sc");
186 }
187
188 #[test]
189 fn test_no_language_detection() {
190 let det = LanguageDetector::new();
191 let p = Path::new("subtitle.ass");
192 assert!(det.get_primary_language(p).is_none());
193 }
194}