cs/parse/
key_extractor.rs

1// src/parse/key_extractor.rs
2
3use crate::error::Result;
4use std::path::Path;
5use walkdir::WalkDir;
6
7use super::json_parser::JsonParser;
8use super::translation::TranslationEntry;
9use super::yaml_parser::YamlParser;
10
11/// `KeyExtractor` provides functionality to search translation entries across
12/// multiple YAML translation files, returning the full dot‑notation key path,
13/// associated file path and line number for each match.
14pub struct KeyExtractor {
15    exclusions: Vec<String>,
16}
17
18impl Default for KeyExtractor {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl KeyExtractor {
25    /// Create a new `KeyExtractor`.
26    pub fn new() -> Self {
27        Self {
28            exclusions: Vec::new(),
29        }
30    }
31
32    /// Set exclusion patterns (e.g., directories or files to ignore)
33    pub fn set_exclusions(&mut self, exclusions: Vec<String>) {
34        self.exclusions = exclusions;
35    }
36
37    /// Recursively walk `base_dir` for `*.yml` (or `*.yaml`) files, parse each,
38    /// and return entries whose **value** contains `query`.
39    ///
40    /// Matching is case‑insensitive by default.
41    pub fn extract(&self, base_dir: &Path, query: &str) -> Result<Vec<TranslationEntry>> {
42        let mut matches = Vec::new();
43        let lowered = query.to_lowercase();
44
45        let walker = WalkDir::new(base_dir).into_iter();
46        for entry in walker
47            .filter_entry(|e| {
48                if is_ignored(e) {
49                    return false;
50                }
51                let name = e.file_name().to_string_lossy();
52                for excl in &self.exclusions {
53                    if name == excl.as_str() {
54                        return false;
55                    }
56                }
57                true
58            })
59            .filter_map(|e| e.ok())
60            .filter(|e| e.file_type().is_file())
61        {
62            let path = entry.path();
63            if let Some(ext) = path.extension() {
64                let ext_str = ext.to_string_lossy();
65                if ext_str == "yml" || ext_str == "yaml" {
66                    match YamlParser::parse_file(path) {
67                        Ok(entries) => {
68                            for e in entries {
69                                if e.value.to_lowercase().contains(&lowered) {
70                                    matches.push(e);
71                                }
72                            }
73                        }
74                        Err(e) => {
75                            eprintln!(
76                                "Warning: Failed to parse YAML file {}: {}",
77                                path.display(),
78                                e
79                            );
80                        }
81                    }
82                } else if ext_str == "json" {
83                    match JsonParser::parse_file(path) {
84                        Ok(entries) => {
85                            for e in entries {
86                                if e.value.to_lowercase().contains(&lowered) {
87                                    matches.push(e);
88                                }
89                            }
90                        }
91                        Err(e) => {
92                            // Only log if it's a JsonParseError or Io error, ignore others
93                            eprintln!(
94                                "Warning: Failed to parse JSON file {}: {}",
95                                path.display(),
96                                e
97                            );
98                        }
99                    }
100                }
101            }
102        }
103        Ok(matches)
104    }
105}
106
107fn is_ignored(entry: &walkdir::DirEntry) -> bool {
108    // Always allow the root directory of the search
109    if entry.depth() == 0 {
110        return false;
111    }
112
113    entry
114        .file_name()
115        .to_str()
116        .map(|s| {
117            s.starts_with('.') // Hidden files/dirs
118                || s == "node_modules"
119                || s == "target"
120                || s == "dist"
121                || s == "build"
122                || s == "vendor"
123        })
124        .unwrap_or(false)
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use std::fs;
131
132    use tempfile::tempdir;
133
134    #[test]
135    fn test_key_extractor_simple() -> Result<()> {
136        let dir = tempdir()?;
137        let en_path = dir.path().join("en.yml");
138        let fr_path = dir.path().join("fr.yml");
139
140        // Write simple yaml files with proper format
141        fs::write(
142            &en_path,
143            "greeting:\n  hello: \"Hello World\"\n  goodbye: \"Goodbye\"",
144        )?;
145        fs::write(
146            &fr_path,
147            "greeting:\n  hello: \"Bonjour World\"\n  goodbye: \"Au revoir\"",
148        )?;
149
150        let extractor = KeyExtractor::new();
151        let results = extractor.extract(dir.path(), "world")?;
152
153        // Should find two entries (en and fr)
154        assert_eq!(results.len(), 2);
155        let keys: Vec<_> = results.iter().map(|e| e.key.clone()).collect();
156        assert!(keys.contains(&"greeting.hello".to_string()));
157        Ok(())
158    }
159
160    #[test]
161    fn test_key_extractor_case_insensitive() -> Result<()> {
162        let dir = tempdir()?;
163        let yaml_path = dir.path().join("test.yml");
164
165        fs::write(
166            &yaml_path,
167            "app:\n  title: \"My Application\"\n  description: \"A great APP for everyone\"",
168        )?;
169
170        let extractor = KeyExtractor::new();
171
172        // Test case insensitive search
173        let results = extractor.extract(dir.path(), "APP")?;
174        assert_eq!(results.len(), 2); // Should match both "Application" and "APP"
175
176        let values: Vec<_> = results.iter().map(|e| e.value.clone()).collect();
177        assert!(values.contains(&"My Application".to_string()));
178        assert!(values.contains(&"A great APP for everyone".to_string()));
179
180        Ok(())
181    }
182
183    #[test]
184    fn test_key_extractor_multiple_files() -> Result<()> {
185        let dir = tempdir()?;
186
187        // Create multiple language files
188        let en_path = dir.path().join("en.yml");
189        let fr_path = dir.path().join("fr.yml");
190        let de_path = dir.path().join("de.yml");
191
192        fs::write(&en_path, "common:\n  action: \"Save Data\"")?;
193        fs::write(&fr_path, "common:\n  action: \"Sauvegarder Data\"")?;
194        fs::write(&de_path, "common:\n  action: \"Speichern Data\"")?;
195
196        let extractor = KeyExtractor::new();
197        let results = extractor.extract(dir.path(), "data")?;
198
199        // Should find all three files (case-insensitive)
200        assert_eq!(results.len(), 3);
201
202        let files: Vec<_> = results
203            .iter()
204            .map(|e| e.file.file_name().unwrap().to_string_lossy().to_string())
205            .collect();
206        assert!(files.contains(&"en.yml".to_string()));
207        assert!(files.contains(&"fr.yml".to_string()));
208        assert!(files.contains(&"de.yml".to_string()));
209
210        Ok(())
211    }
212
213    #[test]
214    fn test_key_extractor_deep_nested() -> Result<()> {
215        let dir = tempdir()?;
216        let yaml_path = dir.path().join("nested.yml");
217
218        fs::write(
219            &yaml_path,
220            "level1:\n  level2:\n    level3:\n      deep_key: \"Deep nested value\"\n      another: \"test value\"",
221        )?;
222
223        let extractor = KeyExtractor::new();
224        let results = extractor.extract(dir.path(), "deep")?;
225
226        assert_eq!(results.len(), 1);
227        assert_eq!(results[0].key, "level1.level2.level3.deep_key");
228        assert_eq!(results[0].value, "Deep nested value");
229
230        Ok(())
231    }
232
233    #[test]
234    fn test_key_extractor_no_matches() -> Result<()> {
235        let dir = tempdir()?;
236        let yaml_path = dir.path().join("test.yml");
237
238        fs::write(
239            &yaml_path,
240            "greeting:\n  hello: \"Hello\"\n  goodbye: \"Goodbye\"",
241        )?;
242
243        let extractor = KeyExtractor::new();
244        let results = extractor.extract(dir.path(), "nonexistent")?;
245
246        assert_eq!(results.len(), 0);
247
248        Ok(())
249    }
250
251    #[test]
252    fn test_key_extractor_supports_json_and_yaml() -> Result<()> {
253        let dir = tempdir()?;
254        let yaml_path = dir.path().join("test.yml");
255        let txt_path = dir.path().join("test.txt");
256        let json_path = dir.path().join("test.json");
257
258        fs::write(&yaml_path, "key: \"test value\"")?;
259        fs::write(&txt_path, "key: test value")?; // This should be ignored
260        fs::write(&json_path, "{\"key\": \"test value\"}")?; // This should be ignored
261
262        let extractor = KeyExtractor::new();
263        let results = extractor.extract(dir.path(), "test")?;
264
265        // Should find both YAML and JSON files
266        assert_eq!(results.len(), 2);
267        let extensions: Vec<_> = results
268            .iter()
269            .map(|e| e.file.extension().unwrap().to_string_lossy().to_string())
270            .collect();
271        assert!(extensions.contains(&"yml".to_string()));
272        assert!(extensions.contains(&"json".to_string()));
273
274        Ok(())
275    }
276
277    #[test]
278    fn test_key_extractor_malformed_file() -> Result<()> {
279        let dir = tempdir()?;
280        let good_path = dir.path().join("good.yml");
281        let bad_path = dir.path().join("bad.yml");
282
283        fs::write(&good_path, "key: \"value\"")?;
284        fs::write(&bad_path, "key: value: invalid: yaml")?; // Malformed YAML
285
286        let extractor = KeyExtractor::new();
287        // This should NOT return an error, but just skip the bad file
288        let results = extractor.extract(dir.path(), "value")?;
289
290        // Should find the good file
291        assert_eq!(results.len(), 1);
292        assert_eq!(results[0].value, "value");
293
294        Ok(())
295    }
296}