cs/parse/
yaml_parser.rs

1use crate::error::{Result, SearchError};
2use std::collections::HashMap;
3use std::fs;
4use std::path::{Path, PathBuf};
5use yaml_rust::{Yaml, YamlLoader};
6
7use super::translation::TranslationEntry;
8
9/// Parser for YAML translation files
10pub struct YamlParser;
11
12impl YamlParser {
13    /// Fast pre-check: does this file contain the search query?
14    /// Uses grep library for exact match before expensive YAML parsing.
15    /// Returns true if the file contains the query (case-insensitive).
16    pub fn contains_query(path: &Path, query: &str) -> Result<bool> {
17        use grep_regex::RegexMatcherBuilder;
18        use grep_searcher::sinks::UTF8;
19        use grep_searcher::SearcherBuilder;
20
21        // Build matcher for case-insensitive fixed-string search
22        let matcher = RegexMatcherBuilder::new()
23            .case_insensitive(true)
24            .fixed_strings(true) // Treat as literal string, not regex
25            .build(query)
26            .map_err(|e| {
27                SearchError::yaml_parse_error(path, format!("Failed to build matcher: {}", e))
28            })?;
29
30        // Use searcher to check if file contains the query
31        let mut searcher = SearcherBuilder::new().build();
32        let mut found = false;
33
34        searcher
35            .search_path(
36                &matcher,
37                path,
38                UTF8(|_line_num, _line_content| {
39                    found = true;
40                    Ok(false) // Stop searching after first match
41                }),
42            )
43            .map_err(|e| SearchError::yaml_parse_error(path, format!("Search failed: {}", e)))?;
44
45        Ok(found)
46    }
47
48    pub fn parse_file(path: &Path) -> Result<Vec<TranslationEntry>> {
49        Self::parse_file_with_query(path, None)
50    }
51
52    /// Parse YAML file, optionally filtering by query for better performance.
53    /// If query is provided, uses bottom-up approach: finds exact matches with grep,
54    /// then traces keys upward WITHOUT parsing the entire YAML structure.
55    pub fn parse_file_with_query(
56        path: &Path,
57        query: Option<&str>,
58    ) -> Result<Vec<TranslationEntry>> {
59        let content = fs::read_to_string(path).map_err(|e| {
60            SearchError::yaml_parse_error(path, format!("Failed to read file: {}", e))
61        })?;
62
63        // Strip ERB templates to support Rails-style YAML fixtures
64        let cleaned_content = Self::strip_erb_templates(&content);
65
66        // Parse entire file
67        let mut value_to_line: HashMap<String, usize> = HashMap::new();
68        for (line_num, line) in cleaned_content.lines().enumerate() {
69            if let Some(colon_pos) = line.find(':') {
70                let value = line[colon_pos + 1..].trim();
71                if !value.is_empty() && !value.starts_with('#') {
72                    let clean_value = value.trim_matches('"').trim_matches('\'');
73                    if !clean_value.is_empty() {
74                        value_to_line
75                            .entry(clean_value.to_string())
76                            .or_insert(line_num + 1);
77                    }
78                }
79            }
80        }
81
82        let docs = YamlLoader::load_from_str(&cleaned_content).map_err(|e| {
83            SearchError::yaml_parse_error(path, format!("Invalid YAML syntax: {}", e))
84        })?;
85
86        let mut entries = Vec::new();
87        for doc in docs {
88            Self::flatten_yaml(doc, String::new(), path, &value_to_line, &mut entries, true);
89        }
90
91        // Filter by query if provided (since bottom-up trace is disabled)
92        if let Some(q) = query {
93            let q_lower = q.to_lowercase();
94            entries.retain(|e| e.value.to_lowercase().contains(&q_lower));
95        }
96
97        Ok(entries)
98    }
99
100    /// Strip ERB templates (<%= ... %> and <% ... %>) from YAML
101    /// This enables parsing of Rails fixture files
102    fn strip_erb_templates(content: &str) -> String {
103        let mut result = String::with_capacity(content.len());
104        let mut chars = content.chars().peekable();
105
106        while let Some(ch) = chars.next() {
107            if ch == '<' {
108                if let Some(&'%') = chars.peek() {
109                    chars.next(); // consume '%'
110
111                    // Check for <%= or <%
112                    let _has_equals = if let Some(&'=') = chars.peek() {
113                        chars.next(); // consume '='
114                        true
115                    } else {
116                        false
117                    };
118
119                    // Skip until we find %>
120                    let mut prev = ' ';
121                    for c in chars.by_ref() {
122                        if prev == '%' && c == '>' {
123                            break;
124                        }
125                        if c == '\n' {
126                            result.push('\n'); // preserve newlines
127                        }
128                        prev = c;
129                    }
130
131                    // Replace ERB tag with empty string (already skipped)
132                    continue;
133                }
134            }
135
136            result.push(ch);
137        }
138
139        result
140    }
141
142    fn flatten_yaml(
143        yaml: Yaml,
144        prefix: String,
145        file_path: &Path,
146        value_to_line: &HashMap<String, usize>,
147        entries: &mut Vec<TranslationEntry>,
148        is_root: bool,
149    ) {
150        match yaml {
151            Yaml::Hash(hash) => {
152                for (key, value) in hash {
153                    if let Some(key_str) = key.as_str() {
154                        // Check if this is a locale root BEFORE building prefix
155                        let is_locale_root = is_root
156                            && prefix.is_empty()
157                            && (key_str == "en"
158                                || key_str == "fr"
159                                || key_str == "de"
160                                || key_str == "es"
161                                || key_str == "ja"
162                                || key_str == "zh");
163
164                        // For locale roots, skip the locale prefix entirely
165                        let new_prefix = if is_locale_root {
166                            String::new()
167                        } else if prefix.is_empty() {
168                            key_str.to_string()
169                        } else {
170                            format!("{}.{}", prefix, key_str)
171                        };
172
173                        // Only flatten once, not twice!
174                        Self::flatten_yaml(
175                            value,
176                            new_prefix,
177                            file_path,
178                            value_to_line,
179                            entries,
180                            false,
181                        );
182                    }
183                }
184            }
185            Yaml::String(value) => {
186                let line = value_to_line.get(&value).copied().unwrap_or(0);
187
188                entries.push(TranslationEntry {
189                    key: prefix,
190                    value,
191                    line,
192                    file: PathBuf::from(file_path),
193                });
194            }
195            Yaml::Integer(value) => {
196                let value_str = value.to_string();
197                let line = value_to_line.get(&value_str).copied().unwrap_or(0);
198
199                entries.push(TranslationEntry {
200                    key: prefix,
201                    value: value_str,
202                    line,
203                    file: PathBuf::from(file_path),
204                });
205            }
206            Yaml::Boolean(value) => {
207                let value_str = value.to_string();
208                let line = value_to_line.get(&value_str).copied().unwrap_or(0);
209
210                entries.push(TranslationEntry {
211                    key: prefix,
212                    value: value_str,
213                    line,
214                    file: PathBuf::from(file_path),
215                });
216            }
217            Yaml::Array(arr) => {
218                for (index, val) in arr.into_iter().enumerate() {
219                    let new_prefix = if prefix.is_empty() {
220                        index.to_string()
221                    } else {
222                        format!("{}.{}", prefix, index)
223                    };
224                    Self::flatten_yaml(val, new_prefix, file_path, value_to_line, entries, false);
225                }
226            }
227            _ => {
228                // Ignore other types for now
229            }
230        }
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use std::io::Write;
238    use tempfile::NamedTempFile;
239
240    #[test]
241    fn test_parse_simple_yaml() {
242        let mut file = NamedTempFile::new().unwrap();
243        write!(file, "key: value").unwrap();
244
245        let entries = YamlParser::parse_file(file.path()).unwrap();
246        assert_eq!(entries.len(), 1);
247        assert_eq!(entries[0].key, "key");
248        assert_eq!(entries[0].value, "value");
249        assert_eq!(entries[0].line, 1);
250    }
251
252    #[test]
253    fn test_parse_nested_yaml() {
254        let mut file = NamedTempFile::new().unwrap();
255        write!(file, "parent:\n  child: value").unwrap();
256
257        let entries = YamlParser::parse_file(file.path()).unwrap();
258        assert_eq!(entries.len(), 1);
259        assert_eq!(entries[0].key, "parent.child");
260        assert_eq!(entries[0].value, "value");
261        assert_eq!(entries[0].line, 2);
262    }
263
264    #[test]
265    fn test_parse_multiple_keys() {
266        let mut file = NamedTempFile::new().unwrap();
267        write!(
268            file,
269            "
270key1: value1
271key2: value2
272nested:
273  key3: value3
274"
275        )
276        .unwrap();
277
278        let entries = YamlParser::parse_file(file.path()).unwrap();
279        assert_eq!(entries.len(), 3);
280
281        // Find entries by key
282        let entry1 = entries.iter().find(|e| e.key == "key1").unwrap();
283        assert_eq!(entry1.value, "value1");
284        assert_eq!(entry1.line, 2);
285
286        let entry2 = entries.iter().find(|e| e.key == "key2").unwrap();
287        assert_eq!(entry2.value, "value2");
288        assert_eq!(entry2.line, 3);
289
290        let entry3 = entries.iter().find(|e| e.key == "nested.key3").unwrap();
291        assert_eq!(entry3.value, "value3");
292        assert_eq!(entry3.line, 5);
293    }
294
295    #[test]
296    fn test_parse_yaml_array() {
297        let mut file = NamedTempFile::new().unwrap();
298        write!(file, "list:\n  - item1\n  - item2").unwrap();
299
300        let entries = YamlParser::parse_file(file.path()).unwrap();
301        assert_eq!(entries.len(), 2);
302
303        let item1 = entries.iter().find(|e| e.value == "item1").unwrap();
304        assert_eq!(item1.key, "list.0");
305
306        let item2 = entries.iter().find(|e| e.value == "item2").unwrap();
307        assert_eq!(item2.key, "list.1");
308    }
309
310    #[test]
311    fn test_bottom_up_trace() {
312        let mut file = NamedTempFile::new().unwrap();
313        write!(
314            file,
315            "en:
316  js:
317    user:
318      log_in: \"Log In\"
319      sign_up: \"Sign Up\"
320"
321        )
322        .unwrap();
323
324        let entries = YamlParser::parse_file_with_query(file.path(), Some("Log In")).unwrap();
325        assert_eq!(entries.len(), 1);
326        assert_eq!(entries[0].key, "js.user.log_in");
327        assert_eq!(entries[0].value, "Log In");
328        assert_eq!(entries[0].line, 4);
329    }
330}