cs/parse/json_parser.rs
1use crate::error::{Result, SearchError};
2use serde_json::Value;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6use super::translation::TranslationEntry;
7
8/// Parser for JSON translation files
9pub struct JsonParser;
10
11impl JsonParser {
12 pub fn parse_file(path: &Path) -> Result<Vec<TranslationEntry>> {
13 Self::parse_file_with_query(path, None)
14 }
15
16 /// Parse JSON file, optionally filtering by query for better performance.
17 /// If query is provided, uses bottom-up approach: finds exact matches with grep,
18 /// then traces keys upward WITHOUT parsing the entire JSON structure.
19 pub fn parse_file_with_query(
20 path: &Path,
21 query: Option<&str>,
22 ) -> Result<Vec<TranslationEntry>> {
23 let content = fs::read_to_string(path).map_err(|e| {
24 SearchError::json_parse_error(path, format!("Failed to read file: {}", e))
25 })?;
26
27 // Strip comments to support JSONC (JSON with Comments) format
28 let cleaned_content = Self::strip_json_comments(&content);
29
30 // If query is provided, use bottom-up approach
31 // FIXME: Bottom-up trace is buggy (returns leaf keys), disabled for now.
32 // if let Some(q) = query {
33 // return Self::parse_with_bottom_up_trace(path, &cleaned_content, q);
34 // }
35
36 // No query - parse entire file
37 let root: Value = serde_json::from_str(&cleaned_content).map_err(|e| {
38 SearchError::json_parse_error(path, format!("Invalid JSON syntax: {}", e))
39 })?;
40
41 let mut entries = Vec::new();
42 Self::flatten_json(&root, String::new(), path, &mut entries);
43
44 // Filter by query if provided (since bottom-up trace is disabled)
45 if let Some(q) = query {
46 let q_lower = q.to_lowercase();
47 entries.retain(|e| e.value.to_lowercase().contains(&q_lower));
48 }
49
50 Ok(entries)
51 }
52
53 /*
54 /// Bottom-up approach: Find matching lines with grep, then trace keys upward.
55 /// This avoids parsing the entire JSON structure.
56 fn parse_with_bottom_up_trace(
57 path: &Path,
58 content: &str,
59 query: &str,
60 ) -> Result<Vec<TranslationEntry>> {
61 use grep_regex::RegexMatcherBuilder;
62 use grep_searcher::sinks::UTF8;
63 use grep_searcher::SearcherBuilder;
64
65 // Use grep to find exact line numbers with matches
66 let matcher = RegexMatcherBuilder::new()
67 .case_insensitive(true)
68 .fixed_strings(true)
69 .build(query)
70 .map_err(|e| SearchError::json_parse_error(path, format!("Matcher error: {}", e)))?;
71
72 let mut searcher = SearcherBuilder::new().line_number(true).build();
73 let mut matched_lines: Vec<(usize, String)> = Vec::new();
74
75 searcher
76 .search_path(
77 &matcher,
78 path,
79 UTF8(|line_num, line_content| {
80 matched_lines.push((line_num as usize, line_content.to_string()));
81 Ok(true) // Continue searching
82 }),
83 )
84 .map_err(|e| SearchError::json_parse_error(path, format!("Search error: {}", e)))?;
85
86 if matched_lines.is_empty() {
87 return Ok(Vec::new());
88 }
89
90 // For each matched line, trace the key path bottom-up
91 let lines: Vec<&str> = content.lines().collect();
92 let mut entries = Vec::new();
93
94 // Optimization: tree is non-tangled, later matches appear after earlier ones.
95 let mut cutoff_line: usize = 0;
96 let mut ancestor_cache: HashMap<usize, Vec<String>> = HashMap::new();
97
98 for (line_num, _line_content) in matched_lines {
99 if let Some(trace) =
100 Self::trace_key_from_line(&lines, line_num, path, cutoff_line, &ancestor_cache)
101 {
102 for (line_idx, prefix) in trace.parent_prefixes {
103 ancestor_cache.entry(line_idx).or_insert(prefix);
104 }
105
106 entries.push(trace.entry);
107 }
108
109 cutoff_line = line_num;
110 }
111
112 Ok(entries)
113 }
114
115 /// Binary search for parent opening brace in JSON.
116 /// Returns (line_index, key) if found.
117 fn binary_search_parent_brace(
118 lines: &[&str],
119 end_line: usize,
120 cutoff_line: usize,
121 _ancestor_cache: &HashMap<usize, Vec<String>>,
122 ) -> Option<(usize, String)> {
123 let mut brace_depth = 0;
124
125 // First, calculate the brace depth at end_line
126 for i in ((end_line + 1)..lines.len()).take(1) {
127 for ch in lines[i].chars() {
128 match ch {
129 '{' => brace_depth += 1,
130 '}' => brace_depth -= 1,
131 _ => {}
132 }
133 }
134 }
135
136 // Count braces from end_line backwards to know target depth
137 for i in (0..=end_line).rev() {
138 let line = lines[i];
139 for ch in line.chars() {
140 match ch {
141 '}' => brace_depth += 1,
142 '{' => brace_depth -= 1,
143 _ => {}
144 }
145 }
146
147 // Found opening brace that increases nesting
148 if brace_depth > 0 && line.contains('{') {
149 let line_idx = i + 1;
150 if line_idx <= cutoff_line {
151 return None; // Hit cutoff boundary
152 }
153
154 // Find the key before this brace
155 if let Some((key_line, key)) = Self::find_key_before_brace(&lines[..=i]) {
156 return Some((key_line - 1, key)); // Return 0-indexed
157 }
158 return None;
159 }
160 }
161
162 None
163 }
164
165 /// Trace the JSON key path from a specific line number bottom-up.
166 /// Uses binary search to find parent braces efficiently.
167 fn trace_key_from_line(
168 lines: &[&str],
169 line_num: usize,
170 path: &Path,
171 cutoff_line: usize,
172 ancestor_cache: &HashMap<usize, Vec<String>>,
173 ) -> Option<TraceResult> {
174 if line_num == 0 || line_num > lines.len() {
175 return None;
176 }
177
178 let target_line = lines[line_num - 1]; // Convert to 0-indexed
179
180 // Extract the key and value from the target line
181 // JSON format: "key": "value" or "key": value
182 let colon_pos = target_line.find(':')?;
183 let key_part = target_line[..colon_pos].trim().trim_matches('"');
184 let value_part = target_line[colon_pos + 1..].trim();
185
186 // Extract value, handling trailing commas
187 let value = value_part
188 .trim_end_matches(',')
189 .trim()
190 .trim_matches('"')
191 .to_string();
192
193 // Build the key path by walking up the JSON structure using binary search
194 let mut key_parts = vec![key_part.to_string()];
195 let mut parent_lines: Vec<usize> = Vec::new();
196 let mut search_end = line_num - 1;
197
198 // Find parents by binary searching for opening braces at decreasing depths
199 while let Some((parent_idx, parent_key)) =
200 Self::binary_search_parent_brace(lines, search_end, cutoff_line, ancestor_cache)
201 {
202 let line_idx = parent_idx + 1; // Convert to 1-based
203
204 // Check if we hit cached ancestor
205 if let Some(prefix) = ancestor_cache.get(&line_idx) {
206 let mut combined = prefix.clone();
207 combined.extend(key_parts);
208 return Some(TraceResult::new(
209 combined,
210 value,
211 line_num,
212 path,
213 parent_lines,
214 ));
215 }
216
217 key_parts.insert(0, parent_key);
218 parent_lines.push(line_idx);
219 search_end = parent_idx; // Next search ends at this parent
220
221 if parent_idx == 0 {
222 break; // Reached root
223 }
224 }
225
226 Some(TraceResult::new(
227 key_parts,
228 value,
229 line_num,
230 path,
231 parent_lines,
232 ))
233 }
234
235 /// Find the key name before an opening brace in JSON and return its line number (1-based)
236 fn find_key_before_brace(lines: &[&str]) -> Option<(usize, String)> {
237 // Walk backwards from the last line to find "key": {
238 for (idx, line) in lines.iter().enumerate().rev() {
239 let trimmed = line.trim();
240 if let Some(colon_pos) = trimmed.find(':') {
241 let key_part = trimmed[..colon_pos].trim().trim_matches('"');
242 return Some((idx + 1, key_part.to_string()));
243 }
244 }
245 None
246 }
247 */
248
249 /// Strip single-line (//) and multi-line (/* */) comments from JSON
250 /// This enables parsing of JSONC (JSON with Comments) files
251 fn strip_json_comments(content: &str) -> String {
252 let mut result = String::with_capacity(content.len());
253 let mut chars = content.chars().peekable();
254 let mut in_string = false;
255 let mut escape_next = false;
256
257 while let Some(ch) = chars.next() {
258 if escape_next {
259 result.push(ch);
260 escape_next = false;
261 continue;
262 }
263
264 if ch == '\\' && in_string {
265 result.push(ch);
266 escape_next = true;
267 continue;
268 }
269
270 if ch == '"' {
271 in_string = !in_string;
272 result.push(ch);
273 continue;
274 }
275
276 if !in_string && ch == '/' {
277 if let Some(&next_ch) = chars.peek() {
278 if next_ch == '/' {
279 // Single-line comment - skip until newline
280 chars.next(); // consume second '/'
281 for c in chars.by_ref() {
282 if c == '\n' {
283 result.push('\n'); // preserve newline for line counting
284 break;
285 }
286 }
287 continue;
288 } else if next_ch == '*' {
289 // Multi-line comment - skip until */
290 chars.next(); // consume '*'
291 let mut prev = ' ';
292 for c in chars.by_ref() {
293 if prev == '*' && c == '/' {
294 break;
295 }
296 if c == '\n' {
297 result.push('\n'); // preserve newlines
298 }
299 prev = c;
300 }
301 continue;
302 }
303 }
304 }
305
306 result.push(ch);
307 }
308
309 result
310 }
311
312 fn flatten_json(
313 value: &Value,
314 prefix: String,
315 file_path: &Path,
316 entries: &mut Vec<TranslationEntry>,
317 ) {
318 match value {
319 Value::Object(map) => {
320 for (key, val) in map {
321 let new_prefix = if prefix.is_empty() {
322 key.clone()
323 } else {
324 format!("{}.{}", prefix, key)
325 };
326
327 Self::flatten_json(val, new_prefix, file_path, entries);
328 }
329 }
330 Value::String(s) => {
331 entries.push(TranslationEntry {
332 key: prefix,
333 value: s.clone(),
334 line: 0, // Placeholder - serde_json doesn't provide line numbers
335 file: PathBuf::from(file_path),
336 });
337 }
338 Value::Number(n) => {
339 entries.push(TranslationEntry {
340 key: prefix,
341 value: n.to_string(),
342 line: 0,
343 file: PathBuf::from(file_path),
344 });
345 }
346 Value::Bool(b) => {
347 entries.push(TranslationEntry {
348 key: prefix,
349 value: b.to_string(),
350 line: 0,
351 file: PathBuf::from(file_path),
352 });
353 }
354 Value::Array(arr) => {
355 for (index, val) in arr.iter().enumerate() {
356 let new_prefix = if prefix.is_empty() {
357 index.to_string()
358 } else {
359 format!("{}.{}", prefix, index)
360 };
361 Self::flatten_json(val, new_prefix, file_path, entries);
362 }
363 }
364 _ => {
365 // Ignore nulls for now
366 }
367 }
368 }
369}
370
371/*
372/// Result of a trace with ancestor bookkeeping so future traces can short-circuit.
373struct TraceResult {
374 entry: TranslationEntry,
375 parent_prefixes: Vec<(usize, Vec<String>)>,
376}
377
378impl TraceResult {
379 fn new(
380 key_parts: Vec<String>,
381 value: String,
382 line_num: usize,
383 path: &Path,
384 parent_lines: Vec<usize>,
385 ) -> Self {
386 let entry = TranslationEntry {
387 key: key_parts.join("."),
388 value,
389 line: line_num,
390 file: PathBuf::from(path),
391 };
392
393 let mut parent_prefixes = Vec::new();
394 for (idx, line_idx) in parent_lines.iter().rev().enumerate() {
395 let prefix_len = idx + 1;
396 if prefix_len <= key_parts.len() {
397 parent_prefixes.push((*line_idx, key_parts[..prefix_len].to_vec()));
398 }
399 }
400
401 Self {
402 entry,
403 parent_prefixes,
404 }
405 }
406}
407*/
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412 use std::io::Write;
413 use tempfile::NamedTempFile;
414
415 #[test]
416 fn test_parse_simple_json() {
417 let mut file = NamedTempFile::new().unwrap();
418 write!(file, r#"{{"key": "value"}}"#).unwrap();
419
420 let entries = JsonParser::parse_file(file.path()).unwrap();
421 assert_eq!(entries.len(), 1);
422 assert_eq!(entries[0].key, "key");
423 assert_eq!(entries[0].value, "value");
424 }
425
426 #[test]
427 fn test_parse_nested_json() {
428 let mut file = NamedTempFile::new().unwrap();
429 write!(file, r#"{{"parent": {{"child": "value"}}}}"#).unwrap();
430
431 let entries = JsonParser::parse_file(file.path()).unwrap();
432 assert_eq!(entries.len(), 1);
433 assert_eq!(entries[0].key, "parent.child");
434 assert_eq!(entries[0].value, "value");
435 }
436
437 #[test]
438 fn test_parse_json_array() {
439 let mut file = NamedTempFile::new().unwrap();
440 write!(file, r#"{{"list": ["item1", "item2"]}}"#).unwrap();
441
442 let entries = JsonParser::parse_file(file.path()).unwrap();
443 assert_eq!(entries.len(), 2);
444
445 // Check first item
446 let item1 = entries.iter().find(|e| e.value == "item1").unwrap();
447 assert_eq!(item1.key, "list.0");
448
449 // Check second item
450 let item2 = entries.iter().find(|e| e.value == "item2").unwrap();
451 assert_eq!(item2.key, "list.1");
452 }
453
454 #[test]
455 fn test_bottom_up_trace_json() {
456 let mut file = NamedTempFile::new().unwrap();
457 write!(
458 file,
459 r#"{{
460 "user": {{
461 "authentication": {{
462 "login": "Log In",
463 "logout": "Log Out"
464 }}
465 }}
466}}"#
467 )
468 .unwrap();
469
470 let entries = JsonParser::parse_file_with_query(file.path(), Some("Log In")).unwrap();
471 assert_eq!(entries.len(), 1);
472 assert_eq!(entries[0].value, "Log In");
473 // Key should be traced bottom-up
474 assert!(entries[0].key.contains("login"));
475 }
476}