cs/parse/
key_extractor.rs

1// src/parse/key_extractor.rs
2
3use crate::error::Result;
4use std::path::Path;
5use walkdir::WalkDir;
6
7use super::json_parser::JsonParser;
8use super::translation::TranslationEntry;
9use super::yaml_parser::YamlParser;
10
11/// `KeyExtractor` provides functionality to search translation entries across
12/// multiple YAML translation files, returning the full dot‑notation key path,
13/// associated file path and line number for each match.
14pub struct KeyExtractor {
15    exclusions: Vec<String>,
16    verbose: bool,
17}
18
19impl Default for KeyExtractor {
20    fn default() -> Self {
21        Self::new()
22    }
23}
24
25impl KeyExtractor {
26    /// Create a new `KeyExtractor`.
27    pub fn new() -> Self {
28        Self {
29            exclusions: Vec::new(),
30            verbose: false,
31        }
32    }
33
34    /// Set exclusion patterns (e.g., directories or files to ignore)
35    pub fn set_exclusions(&mut self, exclusions: Vec<String>) {
36        self.exclusions = exclusions;
37    }
38
39    /// Set verbose mode for detailed error messages
40    pub fn set_verbose(&mut self, verbose: bool) {
41        self.verbose = verbose;
42    }
43
44    /// Recursively walk `base_dir` for `*.yml` (or `*.yaml`) files, parse each,
45    /// and return entries whose **value** contains `query`.
46    ///
47    /// Matching is case‑insensitive by default.
48    pub fn extract(&self, base_dir: &Path, query: &str) -> Result<Vec<TranslationEntry>> {
49        let mut matches = Vec::new();
50        let lowered = query.to_lowercase();
51        let mut skipped_files = 0;
52
53        let walker = WalkDir::new(base_dir).into_iter();
54        for entry in walker
55            .filter_entry(|e| {
56                if is_ignored(e) {
57                    return false;
58                }
59                let name = e.file_name().to_string_lossy();
60                for excl in &self.exclusions {
61                    if name == excl.as_str() {
62                        return false;
63                    }
64                }
65                true
66            })
67            .filter_map(|e| e.ok())
68            .filter(|e| e.file_type().is_file())
69        {
70            let path = entry.path();
71            if let Some(ext) = path.extension() {
72                let ext_str = ext.to_string_lossy();
73                if ext_str == "yml" || ext_str == "yaml" {
74                    match YamlParser::parse_file(path) {
75                        Ok(entries) => {
76                            use colored::Colorize;
77                            eprint!("{}", ".".green()); // Successfully parsed
78                            for e in entries {
79                                if e.value.to_lowercase().contains(&lowered) {
80                                    matches.push(e);
81                                }
82                            }
83                        }
84                        Err(e) => {
85                            use colored::Colorize;
86                            skipped_files += 1;
87                            eprint!("{}", "S".yellow()); // Skipped due to parse error
88                            if self.verbose {
89                                eprintln!(
90                                    "\nWarning: Failed to parse YAML file {}: {}",
91                                    path.display(),
92                                    e
93                                );
94                            }
95                        }
96                    }
97                } else if ext_str == "json" {
98                    match JsonParser::parse_file(path) {
99                        Ok(entries) => {
100                            use colored::Colorize;
101                            eprint!("{}", ".".green()); // Successfully parsed
102                            for e in entries {
103                                if e.value.to_lowercase().contains(&lowered) {
104                                    matches.push(e);
105                                }
106                            }
107                        }
108                        Err(e) => {
109                            use colored::Colorize;
110                            skipped_files += 1;
111                            eprint!("{}", "S".yellow()); // Skipped due to parse error
112                            if self.verbose {
113                                eprintln!(
114                                    "\nWarning: Failed to parse JSON file {}: {}",
115                                    path.display(),
116                                    e
117                                );
118                            }
119                        }
120                    }
121                }
122            }
123        }
124
125        // Print newline and summary if files were skipped
126        if skipped_files > 0 {
127            eprintln!(); // Newline after the S indicators
128            eprintln!(
129                "(Skipped {} invalid translation file{})",
130                skipped_files,
131                if skipped_files == 1 { "" } else { "s" }
132            );
133        }
134
135        Ok(matches)
136    }
137}
138
139fn is_ignored(entry: &walkdir::DirEntry) -> bool {
140    // Always allow the root directory of the search
141    if entry.depth() == 0 {
142        return false;
143    }
144
145    entry
146        .file_name()
147        .to_str()
148        .map(|s| {
149            s.starts_with('.') // Hidden files/dirs
150                || s == "node_modules"
151                || s == "target"
152                || s == "dist"
153                || s == "build"
154                || s == "vendor"
155        })
156        .unwrap_or(false)
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use std::fs;
163
164    use tempfile::tempdir;
165
166    #[test]
167    fn test_key_extractor_simple() -> Result<()> {
168        let dir = tempdir()?;
169        let en_path = dir.path().join("en.yml");
170        let fr_path = dir.path().join("fr.yml");
171
172        // Write simple yaml files with proper format
173        fs::write(
174            &en_path,
175            "greeting:\n  hello: \"Hello World\"\n  goodbye: \"Goodbye\"",
176        )?;
177        fs::write(
178            &fr_path,
179            "greeting:\n  hello: \"Bonjour World\"\n  goodbye: \"Au revoir\"",
180        )?;
181
182        let extractor = KeyExtractor::new();
183        let results = extractor.extract(dir.path(), "world")?;
184
185        // Should find two entries (en and fr)
186        assert_eq!(results.len(), 2);
187        let keys: Vec<_> = results.iter().map(|e| e.key.clone()).collect();
188        assert!(keys.contains(&"greeting.hello".to_string()));
189        Ok(())
190    }
191
192    #[test]
193    fn test_key_extractor_case_insensitive() -> Result<()> {
194        let dir = tempdir()?;
195        let yaml_path = dir.path().join("test.yml");
196
197        fs::write(
198            &yaml_path,
199            "app:\n  title: \"My Application\"\n  description: \"A great APP for everyone\"",
200        )?;
201
202        let extractor = KeyExtractor::new();
203
204        // Test case insensitive search
205        let results = extractor.extract(dir.path(), "APP")?;
206        assert_eq!(results.len(), 2); // Should match both "Application" and "APP"
207
208        let values: Vec<_> = results.iter().map(|e| e.value.clone()).collect();
209        assert!(values.contains(&"My Application".to_string()));
210        assert!(values.contains(&"A great APP for everyone".to_string()));
211
212        Ok(())
213    }
214
215    #[test]
216    fn test_key_extractor_multiple_files() -> Result<()> {
217        let dir = tempdir()?;
218
219        // Create multiple language files
220        let en_path = dir.path().join("en.yml");
221        let fr_path = dir.path().join("fr.yml");
222        let de_path = dir.path().join("de.yml");
223
224        fs::write(&en_path, "common:\n  action: \"Save Data\"")?;
225        fs::write(&fr_path, "common:\n  action: \"Sauvegarder Data\"")?;
226        fs::write(&de_path, "common:\n  action: \"Speichern Data\"")?;
227
228        let extractor = KeyExtractor::new();
229        let results = extractor.extract(dir.path(), "data")?;
230
231        // Should find all three files (case-insensitive)
232        assert_eq!(results.len(), 3);
233
234        let files: Vec<_> = results
235            .iter()
236            .map(|e| e.file.file_name().unwrap().to_string_lossy().to_string())
237            .collect();
238        assert!(files.contains(&"en.yml".to_string()));
239        assert!(files.contains(&"fr.yml".to_string()));
240        assert!(files.contains(&"de.yml".to_string()));
241
242        Ok(())
243    }
244
245    #[test]
246    fn test_key_extractor_deep_nested() -> Result<()> {
247        let dir = tempdir()?;
248        let yaml_path = dir.path().join("nested.yml");
249
250        fs::write(
251            &yaml_path,
252            "level1:\n  level2:\n    level3:\n      deep_key: \"Deep nested value\"\n      another: \"test value\"",
253        )?;
254
255        let extractor = KeyExtractor::new();
256        let results = extractor.extract(dir.path(), "deep")?;
257
258        assert_eq!(results.len(), 1);
259        assert_eq!(results[0].key, "level1.level2.level3.deep_key");
260        assert_eq!(results[0].value, "Deep nested value");
261
262        Ok(())
263    }
264
265    #[test]
266    fn test_key_extractor_no_matches() -> Result<()> {
267        let dir = tempdir()?;
268        let yaml_path = dir.path().join("test.yml");
269
270        fs::write(
271            &yaml_path,
272            "greeting:\n  hello: \"Hello\"\n  goodbye: \"Goodbye\"",
273        )?;
274
275        let extractor = KeyExtractor::new();
276        let results = extractor.extract(dir.path(), "nonexistent")?;
277
278        assert_eq!(results.len(), 0);
279
280        Ok(())
281    }
282
283    #[test]
284    fn test_key_extractor_supports_json_and_yaml() -> Result<()> {
285        let dir = tempdir()?;
286        let yaml_path = dir.path().join("test.yml");
287        let txt_path = dir.path().join("test.txt");
288        let json_path = dir.path().join("test.json");
289
290        fs::write(&yaml_path, "key: \"test value\"")?;
291        fs::write(&txt_path, "key: test value")?; // This should be ignored
292        fs::write(&json_path, "{\"key\": \"test value\"}")?; // This should be ignored
293
294        let extractor = KeyExtractor::new();
295        let results = extractor.extract(dir.path(), "test")?;
296
297        // Should find both YAML and JSON files
298        assert_eq!(results.len(), 2);
299        let extensions: Vec<_> = results
300            .iter()
301            .map(|e| e.file.extension().unwrap().to_string_lossy().to_string())
302            .collect();
303        assert!(extensions.contains(&"yml".to_string()));
304        assert!(extensions.contains(&"json".to_string()));
305
306        Ok(())
307    }
308
309    #[test]
310    fn test_key_extractor_malformed_file() -> Result<()> {
311        let dir = tempdir()?;
312        let good_path = dir.path().join("good.yml");
313        let bad_path = dir.path().join("bad.yml");
314
315        fs::write(&good_path, "key: \"value\"")?;
316        fs::write(&bad_path, "key: value: invalid: yaml")?; // Malformed YAML
317
318        let extractor = KeyExtractor::new();
319        // This should NOT return an error, but just skip the bad file
320        let results = extractor.extract(dir.path(), "value")?;
321
322        // Should find the good file
323        assert_eq!(results.len(), 1);
324        assert_eq!(results[0].value, "value");
325
326        Ok(())
327    }
328}