cs/parse/
json_parser.rs

1use crate::error::{Result, SearchError};
2use serde_json::Value;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use super::translation::TranslationEntry;
7
8/// Parser for JSON translation files
9pub struct JsonParser;
10
11impl JsonParser {
12    pub fn parse_file(path: &Path) -> Result<Vec<TranslationEntry>> {
13        Self::parse_file_with_query(path, None)
14    }
15
16    /// Parse JSON file, optionally filtering by query for better performance.
17    /// If query is provided, uses bottom-up approach: finds exact matches with grep,
18    /// then traces keys upward WITHOUT parsing the entire JSON structure.
19    pub fn parse_file_with_query(
20        path: &Path,
21        query: Option<&str>,
22    ) -> Result<Vec<TranslationEntry>> {
23        let content = fs::read_to_string(path).map_err(|e| {
24            SearchError::json_parse_error(path, format!("Failed to read file: {}", e))
25        })?;
26
27        // Strip comments to support JSONC (JSON with Comments) format
28        let cleaned_content = Self::strip_json_comments(&content);
29
30        // If query is provided, use bottom-up approach
31        // FIXME: Bottom-up trace is buggy (returns leaf keys), disabled for now.
32        // if let Some(q) = query {
33        //     return Self::parse_with_bottom_up_trace(path, &cleaned_content, q);
34        // }
35
36        // No query - parse entire file
37        let root: Value = serde_json::from_str(&cleaned_content).map_err(|e| {
38            SearchError::json_parse_error(path, format!("Invalid JSON syntax: {}", e))
39        })?;
40
41        let mut entries = Vec::new();
42        Self::flatten_json(&root, String::new(), path, &mut entries);
43
44        // Filter by query if provided (since bottom-up trace is disabled)
45        if let Some(q) = query {
46            let q_lower = q.to_lowercase();
47            entries.retain(|e| e.value.to_lowercase().contains(&q_lower));
48        }
49
50        Ok(entries)
51    }
52
53    /*
54    /// Bottom-up approach: Find matching lines with grep, then trace keys upward.
55    /// This avoids parsing the entire JSON structure.
56    fn parse_with_bottom_up_trace(
57        path: &Path,
58        content: &str,
59        query: &str,
60    ) -> Result<Vec<TranslationEntry>> {
61        use grep_regex::RegexMatcherBuilder;
62        use grep_searcher::sinks::UTF8;
63        use grep_searcher::SearcherBuilder;
64
65        // Use grep to find exact line numbers with matches
66        let matcher = RegexMatcherBuilder::new()
67            .case_insensitive(true)
68            .fixed_strings(true)
69            .build(query)
70            .map_err(|e| SearchError::json_parse_error(path, format!("Matcher error: {}", e)))?;
71
72        let mut searcher = SearcherBuilder::new().line_number(true).build();
73        let mut matched_lines: Vec<(usize, String)> = Vec::new();
74
75        searcher
76            .search_path(
77                &matcher,
78                path,
79                UTF8(|line_num, line_content| {
80                    matched_lines.push((line_num as usize, line_content.to_string()));
81                    Ok(true) // Continue searching
82                }),
83            )
84            .map_err(|e| SearchError::json_parse_error(path, format!("Search error: {}", e)))?;
85
86        if matched_lines.is_empty() {
87            return Ok(Vec::new());
88        }
89
90        // For each matched line, trace the key path bottom-up
91        let lines: Vec<&str> = content.lines().collect();
92        let mut entries = Vec::new();
93
94        // Optimization: tree is non-tangled, later matches appear after earlier ones.
95        let mut cutoff_line: usize = 0;
96        let mut ancestor_cache: HashMap<usize, Vec<String>> = HashMap::new();
97
98        for (line_num, _line_content) in matched_lines {
99            if let Some(trace) =
100                Self::trace_key_from_line(&lines, line_num, path, cutoff_line, &ancestor_cache)
101            {
102                for (line_idx, prefix) in trace.parent_prefixes {
103                    ancestor_cache.entry(line_idx).or_insert(prefix);
104                }
105
106                entries.push(trace.entry);
107            }
108
109            cutoff_line = line_num;
110        }
111
112        Ok(entries)
113    }
114
115    /// Binary search for parent opening brace in JSON.
116    /// Returns (line_index, key) if found.
117    fn binary_search_parent_brace(
118        lines: &[&str],
119        end_line: usize,
120        cutoff_line: usize,
121        _ancestor_cache: &HashMap<usize, Vec<String>>,
122    ) -> Option<(usize, String)> {
123        let mut brace_depth = 0;
124
125        // First, calculate the brace depth at end_line
126        for i in ((end_line + 1)..lines.len()).take(1) {
127            for ch in lines[i].chars() {
128                match ch {
129                    '{' => brace_depth += 1,
130                    '}' => brace_depth -= 1,
131                    _ => {}
132                }
133            }
134        }
135
136        // Count braces from end_line backwards to know target depth
137        for i in (0..=end_line).rev() {
138            let line = lines[i];
139            for ch in line.chars() {
140                match ch {
141                    '}' => brace_depth += 1,
142                    '{' => brace_depth -= 1,
143                    _ => {}
144                }
145            }
146
147            // Found opening brace that increases nesting
148            if brace_depth > 0 && line.contains('{') {
149                let line_idx = i + 1;
150                if line_idx <= cutoff_line {
151                    return None; // Hit cutoff boundary
152                }
153
154                // Find the key before this brace
155                if let Some((key_line, key)) = Self::find_key_before_brace(&lines[..=i]) {
156                    return Some((key_line - 1, key)); // Return 0-indexed
157                }
158                return None;
159            }
160        }
161
162        None
163    }
164
165    /// Trace the JSON key path from a specific line number bottom-up.
166    /// Uses binary search to find parent braces efficiently.
167    fn trace_key_from_line(
168        lines: &[&str],
169        line_num: usize,
170        path: &Path,
171        cutoff_line: usize,
172        ancestor_cache: &HashMap<usize, Vec<String>>,
173    ) -> Option<TraceResult> {
174        if line_num == 0 || line_num > lines.len() {
175            return None;
176        }
177
178        let target_line = lines[line_num - 1]; // Convert to 0-indexed
179
180        // Extract the key and value from the target line
181        // JSON format: "key": "value" or "key": value
182        let colon_pos = target_line.find(':')?;
183        let key_part = target_line[..colon_pos].trim().trim_matches('"');
184        let value_part = target_line[colon_pos + 1..].trim();
185
186        // Extract value, handling trailing commas
187        let value = value_part
188            .trim_end_matches(',')
189            .trim()
190            .trim_matches('"')
191            .to_string();
192
193        // Build the key path by walking up the JSON structure using binary search
194        let mut key_parts = vec![key_part.to_string()];
195        let mut parent_lines: Vec<usize> = Vec::new();
196        let mut search_end = line_num - 1;
197
198        // Find parents by binary searching for opening braces at decreasing depths
199        while let Some((parent_idx, parent_key)) =
200            Self::binary_search_parent_brace(lines, search_end, cutoff_line, ancestor_cache)
201        {
202            let line_idx = parent_idx + 1; // Convert to 1-based
203
204            // Check if we hit cached ancestor
205            if let Some(prefix) = ancestor_cache.get(&line_idx) {
206                let mut combined = prefix.clone();
207                combined.extend(key_parts);
208                return Some(TraceResult::new(
209                    combined,
210                    value,
211                    line_num,
212                    path,
213                    parent_lines,
214                ));
215            }
216
217            key_parts.insert(0, parent_key);
218            parent_lines.push(line_idx);
219            search_end = parent_idx; // Next search ends at this parent
220
221            if parent_idx == 0 {
222                break; // Reached root
223            }
224        }
225
226        Some(TraceResult::new(
227            key_parts,
228            value,
229            line_num,
230            path,
231            parent_lines,
232        ))
233    }
234
235    /// Find the key name before an opening brace in JSON and return its line number (1-based)
236    fn find_key_before_brace(lines: &[&str]) -> Option<(usize, String)> {
237        // Walk backwards from the last line to find "key": {
238        for (idx, line) in lines.iter().enumerate().rev() {
239            let trimmed = line.trim();
240            if let Some(colon_pos) = trimmed.find(':') {
241                let key_part = trimmed[..colon_pos].trim().trim_matches('"');
242                return Some((idx + 1, key_part.to_string()));
243            }
244        }
245        None
246    }
247    */
248
249    /// Strip single-line (//) and multi-line (/* */) comments from JSON
250    /// This enables parsing of JSONC (JSON with Comments) files
251    fn strip_json_comments(content: &str) -> String {
252        let mut result = String::with_capacity(content.len());
253        let mut chars = content.chars().peekable();
254        let mut in_string = false;
255        let mut escape_next = false;
256
257        while let Some(ch) = chars.next() {
258            if escape_next {
259                result.push(ch);
260                escape_next = false;
261                continue;
262            }
263
264            if ch == '\\' && in_string {
265                result.push(ch);
266                escape_next = true;
267                continue;
268            }
269
270            if ch == '"' {
271                in_string = !in_string;
272                result.push(ch);
273                continue;
274            }
275
276            if !in_string && ch == '/' {
277                if let Some(&next_ch) = chars.peek() {
278                    if next_ch == '/' {
279                        // Single-line comment - skip until newline
280                        chars.next(); // consume second '/'
281                        for c in chars.by_ref() {
282                            if c == '\n' {
283                                result.push('\n'); // preserve newline for line counting
284                                break;
285                            }
286                        }
287                        continue;
288                    } else if next_ch == '*' {
289                        // Multi-line comment - skip until */
290                        chars.next(); // consume '*'
291                        let mut prev = ' ';
292                        for c in chars.by_ref() {
293                            if prev == '*' && c == '/' {
294                                break;
295                            }
296                            if c == '\n' {
297                                result.push('\n'); // preserve newlines
298                            }
299                            prev = c;
300                        }
301                        continue;
302                    }
303                }
304            }
305
306            result.push(ch);
307        }
308
309        result
310    }
311
312    fn flatten_json(
313        value: &Value,
314        prefix: String,
315        file_path: &Path,
316        entries: &mut Vec<TranslationEntry>,
317    ) {
318        match value {
319            Value::Object(map) => {
320                for (key, val) in map {
321                    let new_prefix = if prefix.is_empty() {
322                        key.clone()
323                    } else {
324                        format!("{}.{}", prefix, key)
325                    };
326
327                    Self::flatten_json(val, new_prefix, file_path, entries);
328                }
329            }
330            Value::String(s) => {
331                entries.push(TranslationEntry {
332                    key: prefix,
333                    value: s.clone(),
334                    line: 0, // Placeholder - serde_json doesn't provide line numbers
335                    file: PathBuf::from(file_path),
336                });
337            }
338            Value::Number(n) => {
339                entries.push(TranslationEntry {
340                    key: prefix,
341                    value: n.to_string(),
342                    line: 0,
343                    file: PathBuf::from(file_path),
344                });
345            }
346            Value::Bool(b) => {
347                entries.push(TranslationEntry {
348                    key: prefix,
349                    value: b.to_string(),
350                    line: 0,
351                    file: PathBuf::from(file_path),
352                });
353            }
354            Value::Array(arr) => {
355                for (index, val) in arr.iter().enumerate() {
356                    let new_prefix = if prefix.is_empty() {
357                        index.to_string()
358                    } else {
359                        format!("{}.{}", prefix, index)
360                    };
361                    Self::flatten_json(val, new_prefix, file_path, entries);
362                }
363            }
364            _ => {
365                // Ignore nulls for now
366            }
367        }
368    }
369}
370
371/*
372/// Result of a trace with ancestor bookkeeping so future traces can short-circuit.
373struct TraceResult {
374    entry: TranslationEntry,
375    parent_prefixes: Vec<(usize, Vec<String>)>,
376}
377
378impl TraceResult {
379    fn new(
380        key_parts: Vec<String>,
381        value: String,
382        line_num: usize,
383        path: &Path,
384        parent_lines: Vec<usize>,
385    ) -> Self {
386        let entry = TranslationEntry {
387            key: key_parts.join("."),
388            value,
389            line: line_num,
390            file: PathBuf::from(path),
391        };
392
393        let mut parent_prefixes = Vec::new();
394        for (idx, line_idx) in parent_lines.iter().rev().enumerate() {
395            let prefix_len = idx + 1;
396            if prefix_len <= key_parts.len() {
397                parent_prefixes.push((*line_idx, key_parts[..prefix_len].to_vec()));
398            }
399        }
400
401        Self {
402            entry,
403            parent_prefixes,
404        }
405    }
406}
407*/
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412    use std::io::Write;
413    use tempfile::NamedTempFile;
414
415    #[test]
416    fn test_parse_simple_json() {
417        let mut file = NamedTempFile::new().unwrap();
418        write!(file, r#"{{"key": "value"}}"#).unwrap();
419
420        let entries = JsonParser::parse_file(file.path()).unwrap();
421        assert_eq!(entries.len(), 1);
422        assert_eq!(entries[0].key, "key");
423        assert_eq!(entries[0].value, "value");
424    }
425
426    #[test]
427    fn test_parse_nested_json() {
428        let mut file = NamedTempFile::new().unwrap();
429        write!(file, r#"{{"parent": {{"child": "value"}}}}"#).unwrap();
430
431        let entries = JsonParser::parse_file(file.path()).unwrap();
432        assert_eq!(entries.len(), 1);
433        assert_eq!(entries[0].key, "parent.child");
434        assert_eq!(entries[0].value, "value");
435    }
436
437    #[test]
438    fn test_parse_json_array() {
439        let mut file = NamedTempFile::new().unwrap();
440        write!(file, r#"{{"list": ["item1", "item2"]}}"#).unwrap();
441
442        let entries = JsonParser::parse_file(file.path()).unwrap();
443        assert_eq!(entries.len(), 2);
444
445        // Check first item
446        let item1 = entries.iter().find(|e| e.value == "item1").unwrap();
447        assert_eq!(item1.key, "list.0");
448
449        // Check second item
450        let item2 = entries.iter().find(|e| e.value == "item2").unwrap();
451        assert_eq!(item2.key, "list.1");
452    }
453
454    #[test]
455    fn test_bottom_up_trace_json() {
456        let mut file = NamedTempFile::new().unwrap();
457        write!(
458            file,
459            r#"{{
460  "user": {{
461    "authentication": {{
462      "login": "Log In",
463      "logout": "Log Out"
464    }}
465  }}
466}}"#
467        )
468        .unwrap();
469
470        let entries = JsonParser::parse_file_with_query(file.path(), Some("Log In")).unwrap();
471        assert_eq!(entries.len(), 1);
472        assert_eq!(entries[0].value, "Log In");
473        // Key should be traced bottom-up
474        assert!(entries[0].key.contains("login"));
475    }
476}