cs/parse/
key_extractor.rs

1// src/parse/key_extractor.rs
2
3use crate::error::Result;
4use std::path::Path;
5use walkdir::WalkDir;
6
7use super::json_parser::JsonParser;
8use super::translation::TranslationEntry;
9use super::yaml_parser::YamlParser;
10
11/// `KeyExtractor` provides functionality to search translation entries across
12/// multiple YAML translation files, returning the full dot‑notation key path,
13/// associated file path and line number for each match.
14pub struct KeyExtractor;
15
16impl Default for KeyExtractor {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl KeyExtractor {
23    /// Create a new `KeyExtractor`.
24    pub fn new() -> Self {
25        Self
26    }
27
28    /// Recursively walk `base_dir` for `*.yml` (or `*.yaml`) files, parse each,
29    /// and return entries whose **value** contains `query`.
30    ///
31    /// Matching is case‑insensitive by default.
32    pub fn extract(&self, base_dir: &Path, query: &str) -> Result<Vec<TranslationEntry>> {
33        let mut matches = Vec::new();
34        let lowered = query.to_lowercase();
35
36        for entry in WalkDir::new(base_dir)
37            .into_iter()
38            .filter_map(|e| e.ok())
39            .filter(|e| e.file_type().is_file())
40        {
41            let path = entry.path();
42            if let Some(ext) = path.extension() {
43                let ext_str = ext.to_string_lossy();
44                if ext_str == "yml" || ext_str == "yaml" {
45                    match YamlParser::parse_file(path) {
46                        Ok(entries) => {
47                            for e in entries {
48                                if e.value.to_lowercase().contains(&lowered) {
49                                    matches.push(e);
50                                }
51                            }
52                        }
53                        Err(e) => {
54                            eprintln!(
55                                "Warning: Failed to parse YAML file {}: {}",
56                                path.display(),
57                                e
58                            );
59                        }
60                    }
61                } else if ext_str == "json" {
62                    match JsonParser::parse_file(path) {
63                        Ok(entries) => {
64                            for e in entries {
65                                if e.value.to_lowercase().contains(&lowered) {
66                                    matches.push(e);
67                                }
68                            }
69                        }
70                        Err(e) => {
71                            eprintln!(
72                                "Warning: Failed to parse JSON file {}: {}",
73                                path.display(),
74                                e
75                            );
76                        }
77                    }
78                }
79            }
80        }
81        Ok(matches)
82    }
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88    use std::fs;
89
90    use tempfile::tempdir;
91
92    #[test]
93    fn test_key_extractor_simple() -> Result<()> {
94        let dir = tempdir()?;
95        let en_path = dir.path().join("en.yml");
96        let fr_path = dir.path().join("fr.yml");
97
98        // Write simple yaml files with proper format
99        fs::write(
100            &en_path,
101            "greeting:\n  hello: \"Hello World\"\n  goodbye: \"Goodbye\"",
102        )?;
103        fs::write(
104            &fr_path,
105            "greeting:\n  hello: \"Bonjour World\"\n  goodbye: \"Au revoir\"",
106        )?;
107
108        let extractor = KeyExtractor::new();
109        let results = extractor.extract(dir.path(), "world")?;
110
111        // Should find two entries (en and fr)
112        assert_eq!(results.len(), 2);
113        let keys: Vec<_> = results.iter().map(|e| e.key.clone()).collect();
114        assert!(keys.contains(&"greeting.hello".to_string()));
115        Ok(())
116    }
117
118    #[test]
119    fn test_key_extractor_case_insensitive() -> Result<()> {
120        let dir = tempdir()?;
121        let yaml_path = dir.path().join("test.yml");
122
123        fs::write(
124            &yaml_path,
125            "app:\n  title: \"My Application\"\n  description: \"A great APP for everyone\"",
126        )?;
127
128        let extractor = KeyExtractor::new();
129
130        // Test case insensitive search
131        let results = extractor.extract(dir.path(), "APP")?;
132        assert_eq!(results.len(), 2); // Should match both "Application" and "APP"
133
134        let values: Vec<_> = results.iter().map(|e| e.value.clone()).collect();
135        assert!(values.contains(&"My Application".to_string()));
136        assert!(values.contains(&"A great APP for everyone".to_string()));
137
138        Ok(())
139    }
140
141    #[test]
142    fn test_key_extractor_multiple_files() -> Result<()> {
143        let dir = tempdir()?;
144
145        // Create multiple language files
146        let en_path = dir.path().join("en.yml");
147        let fr_path = dir.path().join("fr.yml");
148        let de_path = dir.path().join("de.yml");
149
150        fs::write(&en_path, "common:\n  action: \"Save Data\"")?;
151        fs::write(&fr_path, "common:\n  action: \"Sauvegarder Data\"")?;
152        fs::write(&de_path, "common:\n  action: \"Speichern Data\"")?;
153
154        let extractor = KeyExtractor::new();
155        let results = extractor.extract(dir.path(), "data")?;
156
157        // Should find all three files (case-insensitive)
158        assert_eq!(results.len(), 3);
159
160        let files: Vec<_> = results
161            .iter()
162            .map(|e| e.file.file_name().unwrap().to_string_lossy().to_string())
163            .collect();
164        assert!(files.contains(&"en.yml".to_string()));
165        assert!(files.contains(&"fr.yml".to_string()));
166        assert!(files.contains(&"de.yml".to_string()));
167
168        Ok(())
169    }
170
171    #[test]
172    fn test_key_extractor_deep_nested() -> Result<()> {
173        let dir = tempdir()?;
174        let yaml_path = dir.path().join("nested.yml");
175
176        fs::write(
177            &yaml_path,
178            "level1:\n  level2:\n    level3:\n      deep_key: \"Deep nested value\"\n      another: \"test value\"",
179        )?;
180
181        let extractor = KeyExtractor::new();
182        let results = extractor.extract(dir.path(), "deep")?;
183
184        assert_eq!(results.len(), 1);
185        assert_eq!(results[0].key, "level1.level2.level3.deep_key");
186        assert_eq!(results[0].value, "Deep nested value");
187
188        Ok(())
189    }
190
191    #[test]
192    fn test_key_extractor_no_matches() -> Result<()> {
193        let dir = tempdir()?;
194        let yaml_path = dir.path().join("test.yml");
195
196        fs::write(
197            &yaml_path,
198            "greeting:\n  hello: \"Hello\"\n  goodbye: \"Goodbye\"",
199        )?;
200
201        let extractor = KeyExtractor::new();
202        let results = extractor.extract(dir.path(), "nonexistent")?;
203
204        assert_eq!(results.len(), 0);
205
206        Ok(())
207    }
208
209    #[test]
210    fn test_key_extractor_supports_json_and_yaml() -> Result<()> {
211        let dir = tempdir()?;
212        let yaml_path = dir.path().join("test.yml");
213        let txt_path = dir.path().join("test.txt");
214        let json_path = dir.path().join("test.json");
215
216        fs::write(&yaml_path, "key: \"test value\"")?;
217        fs::write(&txt_path, "key: test value")?; // This should be ignored
218        fs::write(&json_path, "{\"key\": \"test value\"}")?; // This should be ignored
219
220        let extractor = KeyExtractor::new();
221        let results = extractor.extract(dir.path(), "test")?;
222
223        // Should find both YAML and JSON files
224        assert_eq!(results.len(), 2);
225        let extensions: Vec<_> = results
226            .iter()
227            .map(|e| e.file.extension().unwrap().to_string_lossy().to_string())
228            .collect();
229        assert!(extensions.contains(&"yml".to_string()));
230        assert!(extensions.contains(&"json".to_string()));
231
232        Ok(())
233    }
234
235    #[test]
236    fn test_key_extractor_malformed_file() -> Result<()> {
237        let dir = tempdir()?;
238        let good_path = dir.path().join("good.yml");
239        let bad_path = dir.path().join("bad.yml");
240
241        fs::write(&good_path, "key: \"value\"")?;
242        fs::write(&bad_path, "key: value: invalid: yaml")?; // Malformed YAML
243
244        let extractor = KeyExtractor::new();
245        // This should NOT return an error, but just skip the bad file
246        let results = extractor.extract(dir.path(), "value")?;
247
248        // Should find the good file
249        assert_eq!(results.len(), 1);
250        assert_eq!(results[0].value, "value");
251
252        Ok(())
253    }
254}