Skip to main content

sem_core/parser/plugins/
json.rs

1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct JsonParserPlugin;
6
7impl SemanticParserPlugin for JsonParserPlugin {
8    fn id(&self) -> &str {
9        "json"
10    }
11
12    fn extensions(&self) -> &[&str] {
13        &[".json"]
14    }
15
16    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17        // Extract top-level properties from JSON objects, plus depth-2 children
18        // for "object" entities (e.g. scripts, dependencies in package.json).
19        // We scan the source text directly to get accurate line positions,
20        // which weave needs for entity-level merge reconstruction.
21        let trimmed = content.trim();
22        if !trimmed.starts_with('{') {
23            return Vec::new();
24        }
25
26        let lines: Vec<&str> = content.lines().collect();
27        let entries = find_top_level_entries(content);
28        let closing = find_closing_brace_line(&lines);
29
30        let mut entities = Vec::new();
31        for (i, entry) in entries.iter().enumerate() {
32            let end_line = if i + 1 < entries.len() {
33                let next_start = entries[i + 1].start_line;
34                trim_trailing_blanks(&lines, entry.start_line, next_start)
35            } else {
36                trim_trailing_blanks(&lines, entry.start_line, closing)
37            };
38
39            let entity_content = lines[entry.start_line - 1..end_line]
40                .join("\n");
41
42            let value_content = extract_value_content(&entity_content);
43            let structural_hash = Some(content_hash(value_content));
44
45            let parent_id = build_entity_id(file_path, &entry.entity_type, &entry.pointer, None);
46
47            entities.push(SemanticEntity {
48                id: parent_id.clone(),
49                file_path: file_path.to_string(),
50                entity_type: entry.entity_type.clone(),
51                name: entry.key.clone(),
52                parent_id: None,
53                content_hash: content_hash(&entity_content),
54                structural_hash,
55                content: entity_content.clone(),
56                start_line: entry.start_line,
57                end_line,
58                metadata: None,
59            });
60
61            // Extract depth-2 children from "object" entities
62            if entry.entity_type == "object" {
63                let nested = find_nested_object_entries(&entity_content, entry.start_line);
64                for (j, nentry) in nested.iter().enumerate() {
65                    let child_end = if j + 1 < nested.len() {
66                        trim_trailing_blanks(&lines, nentry.start_line, nested[j + 1].start_line)
67                    } else {
68                        trim_trailing_blanks(&lines, nentry.start_line, end_line)
69                    };
70
71                    let child_content = lines[nentry.start_line - 1..child_end].join("\n");
72                    let child_value = extract_value_content(&child_content);
73
74                    entities.push(SemanticEntity {
75                        id: build_entity_id(file_path, &nentry.entity_type, &nentry.key, Some(&parent_id)),
76                        file_path: file_path.to_string(),
77                        entity_type: nentry.entity_type.clone(),
78                        name: nentry.key.clone(),
79                        parent_id: Some(parent_id.clone()),
80                        content_hash: content_hash(&child_content),
81                        structural_hash: Some(content_hash(child_value)),
82                        content: child_content,
83                        start_line: nentry.start_line,
84                        end_line: child_end,
85                        metadata: None,
86                    });
87                }
88            }
89        }
90
91        entities
92    }
93}
94
95struct JsonEntry {
96    key: String,
97    pointer: String,
98    entity_type: String,
99    start_line: usize, // 1-based
100}
101
102/// Scan the source text to find each top-level key in the root JSON object.
103/// Returns entries with accurate start_line positions.
104fn find_top_level_entries(content: &str) -> Vec<JsonEntry> {
105    let mut entries = Vec::new();
106    let mut depth = 0;
107    let mut in_string = false;
108    let mut escape_next = false;
109    let mut line_num: usize = 1;
110
111    // State for tracking when we find a key at depth 1
112    let mut current_key: Option<String> = None;
113    let mut key_start = false;
114    let mut key_buf = String::new();
115    let mut reading_key = false;
116
117    for ch in content.chars() {
118        if ch == '\n' {
119            line_num += 1;
120            continue;
121        }
122
123        if escape_next {
124            if reading_key {
125                key_buf.push(ch);
126            }
127            escape_next = false;
128            continue;
129        }
130
131        if ch == '\\' && in_string {
132            if reading_key {
133                key_buf.push(ch);
134            }
135            escape_next = true;
136            continue;
137        }
138
139        if in_string {
140            if ch == '"' {
141                in_string = false;
142                if reading_key {
143                    reading_key = false;
144                    current_key = Some(key_buf.clone());
145                    key_buf.clear();
146                }
147            } else if reading_key {
148                key_buf.push(ch);
149            }
150            continue;
151        }
152
153        match ch {
154            '"' => {
155                in_string = true;
156                // At depth 1, a string could be a key (before ':') or value (after ':')
157                if depth == 1 && current_key.is_none() && !key_start {
158                    reading_key = true;
159                    key_buf.clear();
160                }
161            }
162            ':' => {
163                if depth == 1 {
164                    if let Some(ref key) = current_key {
165                        // Found a key: value pair at depth 1
166                        let escaped_key = key.replace('~', "~0").replace('/', "~1");
167                        let pointer = format!("/{escaped_key}");
168                        entries.push(JsonEntry {
169                            key: key.clone(),
170                            pointer,
171                            entity_type: String::new(), // filled in below
172                            start_line: line_num,
173                        });
174                        key_start = true;
175                    }
176                }
177            }
178            '{' | '[' => {
179                depth += 1;
180                if depth == 2 && key_start {
181                    // The value for this key is an object/array
182                    if let Some(entry) = entries.last_mut() {
183                        entry.entity_type = "object".to_string();
184                    }
185                }
186            }
187            '}' | ']' => {
188                depth -= 1;
189            }
190            ',' => {
191                if depth == 1 {
192                    // End of a top-level entry
193                    if let Some(entry) = entries.last_mut() {
194                        if entry.entity_type.is_empty() {
195                            entry.entity_type = "property".to_string();
196                        }
197                    }
198                    current_key = None;
199                    key_start = false;
200                }
201            }
202            _ => {}
203        }
204    }
205
206    // Handle last entry (no trailing comma)
207    if let Some(entry) = entries.last_mut() {
208        if entry.entity_type.is_empty() {
209            entry.entity_type = "property".to_string();
210        }
211    }
212
213    entries
214}
215
216/// Find keys inside a depth-1 object value within an entity's content.
217/// Returns entries with absolute line numbers computed from `base_line`.
218fn find_nested_object_entries(entity_content: &str, base_line: usize) -> Vec<JsonEntry> {
219    let mut entries = Vec::new();
220    let mut in_string = false;
221    let mut escape_next = false;
222    let mut line_num: usize = 0; // 0-based offset from base_line
223    let mut found_outer_colon = false;
224    let mut found_value_start = false;
225    let mut value_depth: usize = 0;
226    let mut current_key: Option<String> = None;
227    let mut reading_key = false;
228    let mut key_buf = String::new();
229    let mut key_start = false;
230
231    for ch in entity_content.chars() {
232        if ch == '\n' {
233            line_num += 1;
234            continue;
235        }
236
237        if escape_next {
238            if reading_key {
239                key_buf.push(ch);
240            }
241            escape_next = false;
242            continue;
243        }
244
245        if ch == '\\' && in_string {
246            if reading_key {
247                key_buf.push(ch);
248            }
249            escape_next = true;
250            continue;
251        }
252
253        if in_string {
254            if ch == '"' {
255                in_string = false;
256                if reading_key {
257                    reading_key = false;
258                    current_key = Some(key_buf.clone());
259                    key_buf.clear();
260                }
261            } else if reading_key {
262                key_buf.push(ch);
263            }
264            continue;
265        }
266
267        if !found_value_start {
268            match ch {
269                '"' => {
270                    in_string = true;
271                }
272                ':' => {
273                    found_outer_colon = true;
274                }
275                '{' if found_outer_colon => {
276                    found_value_start = true;
277                    value_depth = 1;
278                }
279                _ => {}
280            }
281            continue;
282        }
283
284        match ch {
285            '"' => {
286                in_string = true;
287                if value_depth == 1 && current_key.is_none() && !key_start {
288                    reading_key = true;
289                    key_buf.clear();
290                }
291            }
292            ':' => {
293                if value_depth == 1 {
294                    if let Some(ref key) = current_key {
295                        entries.push(JsonEntry {
296                            key: key.clone(),
297                            pointer: String::new(),
298                            entity_type: "property".to_string(),
299                            start_line: base_line + line_num,
300                        });
301                        key_start = true;
302                    }
303                }
304            }
305            '{' | '[' => {
306                value_depth += 1;
307            }
308            '}' | ']' => {
309                value_depth -= 1;
310                if value_depth == 0 {
311                    break;
312                }
313            }
314            ',' => {
315                if value_depth == 1 {
316                    current_key = None;
317                    key_start = false;
318                }
319            }
320            _ => {}
321        }
322    }
323
324    entries
325}
326
327/// Extract just the value portion of a `"key": value` entity content string,
328/// stripping the key name so that renamed keys with identical values share the
329/// same structural_hash and are detected as renames rather than delete + add.
330fn extract_value_content(content: &str) -> &str {
331    let mut in_string = false;
332    let mut escape_next = false;
333    for (i, ch) in content.char_indices() {
334        if escape_next {
335            escape_next = false;
336            continue;
337        }
338        if ch == '\\' && in_string {
339            escape_next = true;
340            continue;
341        }
342        if ch == '"' {
343            in_string = !in_string;
344        }
345        if ch == ':' && !in_string {
346            let rest = content[i + 1..].trim();
347            return rest.trim_end_matches(',').trim();
348        }
349    }
350    content
351}
352
353/// Find the line number (1-based) of the closing `}` of the root object.
354fn find_closing_brace_line(lines: &[&str]) -> usize {
355    for (i, line) in lines.iter().enumerate().rev() {
356        if line.trim() == "}" {
357            return i + 1;
358        }
359    }
360    lines.len()
361}
362
363/// Walk backwards from next_start to skip trailing blank lines and commas,
364/// returning the end_line (1-based, inclusive) for the current entry.
365fn trim_trailing_blanks(lines: &[&str], start: usize, next_start: usize) -> usize {
366    let mut end = next_start - 1;
367    while end > start {
368        let trimmed = lines[end - 1].trim();
369        if trimmed.is_empty() || trimmed == "," {
370            end -= 1;
371        } else {
372            break;
373        }
374    }
375    end
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381    use crate::model::change::ChangeType;
382    use crate::model::identity::match_entities;
383
384    #[test]
385    fn test_json_line_positions() {
386        let content = r#"{
387  "name": "my-app",
388  "version": "1.0.0",
389  "scripts": {
390    "build": "tsc",
391    "test": "jest"
392  },
393  "description": "a test app"
394}
395"#;
396        let plugin = JsonParserPlugin;
397        let entities = plugin.extract_entities(content, "package.json");
398
399        assert_eq!(entities.len(), 6);
400
401        assert_eq!(entities[0].name, "name");
402        assert_eq!(entities[0].start_line, 2);
403        assert_eq!(entities[0].end_line, 2);
404        assert!(entities[0].parent_id.is_none());
405
406        assert_eq!(entities[1].name, "version");
407        assert_eq!(entities[1].start_line, 3);
408        assert_eq!(entities[1].end_line, 3);
409
410        assert_eq!(entities[2].name, "scripts");
411        assert_eq!(entities[2].entity_type, "object");
412        assert_eq!(entities[2].start_line, 4);
413        assert_eq!(entities[2].end_line, 7);
414
415        // Depth-2 children of "scripts"
416        assert_eq!(entities[3].name, "build");
417        assert_eq!(entities[3].start_line, 5);
418        assert_eq!(entities[3].end_line, 5);
419        assert_eq!(entities[3].parent_id.as_deref(), Some(&entities[2].id as &str));
420
421        assert_eq!(entities[4].name, "test");
422        assert_eq!(entities[4].start_line, 6);
423        assert_eq!(entities[4].end_line, 6);
424        assert_eq!(entities[4].parent_id.as_deref(), Some(&entities[2].id as &str));
425
426        assert_eq!(entities[5].name, "description");
427        assert_eq!(entities[5].start_line, 8);
428        assert_eq!(entities[5].end_line, 8);
429    }
430
431    #[test]
432    fn test_rename_detected_end_to_end() {
433        let before_content = "{\n  \"timeout\": 30\n}\n";
434        let after_content = "{\n  \"request_timeout\": 30\n}\n";
435        let plugin = JsonParserPlugin;
436        let before = plugin.extract_entities(before_content, "config.json");
437        let after = plugin.extract_entities(after_content, "config.json");
438        let result = match_entities(&before, &after, "config.json", None, None, None);
439        assert_eq!(result.changes.len(), 1);
440        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
441        assert_eq!(result.changes[0].entity_name, "request_timeout");
442    }
443
444    #[test]
445    fn test_renamed_scalar_property_shares_structural_hash() {
446        let before_content = "{\n  \"timeout\": 30\n}\n";
447        let after_content = "{\n  \"request_timeout\": 30\n}\n";
448        let plugin = JsonParserPlugin;
449        let before = plugin.extract_entities(before_content, "config.json");
450        let after = plugin.extract_entities(after_content, "config.json");
451        assert_eq!(before.len(), 1);
452        assert_eq!(after.len(), 1);
453        // content_hash differs (key name is part of content)
454        assert_ne!(before[0].content_hash, after[0].content_hash);
455        // structural_hash matches (same value)
456        assert_eq!(before[0].structural_hash, after[0].structural_hash);
457    }
458
459    #[test]
460    fn test_renamed_object_property_shares_structural_hash() {
461        let before_content = "{\n  \"config\": {\n    \"port\": 8080\n  }\n}\n";
462        let after_content = "{\n  \"settings\": {\n    \"port\": 8080\n  }\n}\n";
463        let plugin = JsonParserPlugin;
464        let before = plugin.extract_entities(before_content, "config.json");
465        let after = plugin.extract_entities(after_content, "config.json");
466        // 1 parent + 1 child ("port")
467        assert_eq!(before.len(), 2);
468        assert_eq!(after.len(), 2);
469        assert_ne!(before[0].content_hash, after[0].content_hash);
470        assert_eq!(before[0].structural_hash, after[0].structural_hash);
471    }
472}