Skip to main content

sem_core/parser/plugins/
json.rs

1use crate::model::entity::SemanticEntity;
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct JsonParserPlugin;
6
7impl SemanticParserPlugin for JsonParserPlugin {
8    fn id(&self) -> &str {
9        "json"
10    }
11
12    fn extensions(&self) -> &[&str] {
13        &[".json"]
14    }
15
16    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17        if !content.trim().starts_with('{') {
18            return Vec::new();
19        }
20        extract_entries(content, file_path)
21    }
22}
23
24struct Frame {
25    content: String,
26    entries: Vec<JsonEntry>,
27    cursor: usize,
28    line_offset: usize,
29    parent_pointer: Option<String>,
30    parent_entity_id: Option<String>,
31}
32
33/// Iterative walk of the JSON tree, emitting entities in DFS pre-order.
34/// Frames track a cursor through their entries; encountering an
35/// object-valued entry pushes both the parent frame (resumed after) and the
36/// child frame (visited next), so children appear before later siblings.
37fn extract_entries(content: &str, file_path: &str) -> Vec<SemanticEntity> {
38    let mut entities = Vec::new();
39    let root_entries = find_top_level_entries(content);
40    let mut worklist: Vec<Frame> = vec![Frame {
41        content: content.to_string(),
42        entries: root_entries,
43        cursor: 0,
44        line_offset: 1,
45        parent_pointer: None,
46        parent_entity_id: None,
47    }];
48
49    while let Some(mut frame) = worklist.pop() {
50        let lines: Vec<&str> = frame.content.lines().collect();
51        let closing = find_closing_brace_line(&lines);
52
53        while frame.cursor < frame.entries.len() {
54            let i = frame.cursor;
55            frame.cursor += 1;
56            let entry = &frame.entries[i];
57            let next_boundary = frame.entries.get(i + 1).map(|e| e.start_line).unwrap_or(closing);
58            let end_line = trim_trailing_blanks(&lines, entry.start_line, next_boundary);
59
60            let entity_content = lines[entry.start_line - 1..end_line].join("\n");
61            let value_content = extract_value_content(&entity_content);
62
63            let pointer = match &frame.parent_pointer {
64                Some(pp) => format!("{pp}{}", entry.pointer),
65                None => entry.pointer.clone(),
66            };
67            let entity_id = format!("{}::{}", file_path, pointer);
68            let abs_start = frame.line_offset + entry.start_line - 1;
69            let abs_end = frame.line_offset + end_line - 1;
70
71            entities.push(SemanticEntity {
72                id: entity_id.clone(),
73                file_path: file_path.to_string(),
74                entity_type: entry.entity_type.clone(),
75                name: entry.key.clone(),
76                parent_id: frame.parent_entity_id.clone(),
77                content_hash: content_hash(&entity_content),
78                structural_hash: Some(content_hash(value_content)),
79                content: entity_content.clone(),
80                start_line: abs_start,
81                end_line: abs_end,
82                metadata: None,
83            });
84
85            if entry.entity_type == "object" {
86                if let Some(obj_str) = extract_object_value(&entity_content) {
87                    let obj_line_in_entity = find_value_start_line(&entity_content);
88                    let child = Frame {
89                        content: obj_str.to_string(),
90                        entries: find_top_level_entries(obj_str),
91                        cursor: 0,
92                        line_offset: abs_start + obj_line_in_entity - 1,
93                        parent_pointer: Some(pointer),
94                        parent_entity_id: Some(entity_id),
95                    };
96                    worklist.push(frame);
97                    worklist.push(child);
98                    break;
99                }
100            }
101        }
102    }
103
104    entities
105}
106
107/// Given an entity content string like `  "scripts": {\n    "build": "tsc"\n  }`,
108/// return a slice that starts at the opening `{` of the value and ends at (and
109/// including) the matching closing `}`.
110fn extract_object_value(content: &str) -> Option<&str> {
111    // Skip past the first `:` (outside strings) to find the value
112    let mut in_string = false;
113    let mut escape_next = false;
114    let mut colon_pos: Option<usize> = None;
115
116    for (i, ch) in content.char_indices() {
117        if escape_next {
118            escape_next = false;
119            continue;
120        }
121        if ch == '\\' && in_string {
122            escape_next = true;
123            continue;
124        }
125        if ch == '"' {
126            in_string = !in_string;
127        }
128        if ch == ':' && !in_string {
129            colon_pos = Some(i);
130            break;
131        }
132    }
133
134    let after_colon = &content[colon_pos? + 1..];
135    // Find the opening `{`
136    let brace_offset = after_colon.find('{')?;
137    let obj_start = colon_pos? + 1 + brace_offset;
138
139    // Find the matching `}`. Track brace and bracket depth separately so
140    // that a `}` only terminates extraction when no array is still open.
141    let mut brace_depth = 0usize;
142    let mut bracket_depth = 0usize;
143    in_string = false;
144    escape_next = false;
145
146    for (i, ch) in content[obj_start..].char_indices() {
147        if escape_next {
148            escape_next = false;
149            continue;
150        }
151        if ch == '\\' && in_string {
152            escape_next = true;
153            continue;
154        }
155        if ch == '"' {
156            in_string = !in_string;
157            continue;
158        }
159        if !in_string {
160            match ch {
161                '{' => brace_depth += 1,
162                '[' => bracket_depth += 1,
163                '}' => {
164                    brace_depth = brace_depth.saturating_sub(1);
165                    if brace_depth == 0 && bracket_depth == 0 {
166                        return Some(&content[obj_start..obj_start + i + 1]);
167                    }
168                }
169                ']' => bracket_depth = bracket_depth.saturating_sub(1),
170                _ => {}
171            }
172        }
173    }
174    None
175}
176
177/// Return the 1-based line number (relative to the entity content) where the
178/// object value's `{` appears.
179fn find_value_start_line(content: &str) -> usize {
180    let mut in_string = false;
181    let mut escape_next = false;
182    let mut past_colon = false;
183    let mut line = 1usize;
184
185    for ch in content.chars() {
186        if ch == '\n' {
187            line += 1;
188            continue;
189        }
190        if escape_next {
191            escape_next = false;
192            continue;
193        }
194        if ch == '\\' && in_string {
195            escape_next = true;
196            continue;
197        }
198        if ch == '"' {
199            in_string = !in_string;
200            continue;
201        }
202        if ch == ':' && !in_string {
203            past_colon = true;
204            continue;
205        }
206        if past_colon && ch == '{' {
207            return line;
208        }
209    }
210    1
211}
212
213struct JsonEntry {
214    key: String,
215    pointer: String,
216    entity_type: String,
217    start_line: usize, // 1-based, relative to the content passed in
218}
219
220/// Scan the source text to find each top-level key in the root JSON object.
221/// Returns entries with accurate start_line positions (1-based, relative to `content`).
222fn find_top_level_entries(content: &str) -> Vec<JsonEntry> {
223    let mut entries = Vec::new();
224    let mut depth = 0;
225    let mut in_string = false;
226    let mut escape_next = false;
227    let mut line_num: usize = 1;
228
229    let mut current_key: Option<String> = None;
230    let mut key_start = false;
231    let mut key_buf = String::new();
232    let mut reading_key = false;
233
234    for ch in content.chars() {
235        if ch == '\n' {
236            line_num += 1;
237            continue;
238        }
239
240        if escape_next {
241            if reading_key {
242                key_buf.push(ch);
243            }
244            escape_next = false;
245            continue;
246        }
247
248        if ch == '\\' && in_string {
249            if reading_key {
250                key_buf.push(ch);
251            }
252            escape_next = true;
253            continue;
254        }
255
256        if in_string {
257            if ch == '"' {
258                in_string = false;
259                if reading_key {
260                    reading_key = false;
261                    current_key = Some(key_buf.clone());
262                    key_buf.clear();
263                }
264            } else if reading_key {
265                key_buf.push(ch);
266            }
267            continue;
268        }
269
270        match ch {
271            '"' => {
272                in_string = true;
273                if depth == 1 && current_key.is_none() && !key_start {
274                    reading_key = true;
275                    key_buf.clear();
276                }
277            }
278            ':' => {
279                if depth == 1 {
280                    if let Some(ref key) = current_key {
281                        let escaped_key = key.replace('~', "~0").replace('/', "~1");
282                        let pointer = format!("/{escaped_key}");
283                        entries.push(JsonEntry {
284                            key: key.clone(),
285                            pointer,
286                            entity_type: String::new(),
287                            start_line: line_num,
288                        });
289                        key_start = true;
290                    }
291                }
292            }
293            '{' | '[' => {
294                depth += 1;
295                if depth == 2 && key_start {
296                    if let Some(entry) = entries.last_mut() {
297                        entry.entity_type = if ch == '{' { "object" } else { "array" }.to_string();
298                    }
299                }
300            }
301            '}' | ']' => {
302                depth -= 1;
303            }
304            ',' => {
305                if depth == 1 {
306                    if let Some(entry) = entries.last_mut() {
307                        if entry.entity_type.is_empty() {
308                            entry.entity_type = "property".to_string();
309                        }
310                    }
311                    current_key = None;
312                    key_start = false;
313                }
314            }
315            _ => {}
316        }
317    }
318
319    if let Some(entry) = entries.last_mut() {
320        if entry.entity_type.is_empty() {
321            entry.entity_type = "property".to_string();
322        }
323    }
324
325    entries
326}
327
328/// Extract just the value portion of a `"key": value` entity content string,
329/// stripping the key name so that renamed keys with identical values share the
330/// same structural_hash and are detected as renames rather than delete + add.
331fn extract_value_content(content: &str) -> &str {
332    let mut in_string = false;
333    let mut escape_next = false;
334    for (i, ch) in content.char_indices() {
335        if escape_next {
336            escape_next = false;
337            continue;
338        }
339        if ch == '\\' && in_string {
340            escape_next = true;
341            continue;
342        }
343        if ch == '"' {
344            in_string = !in_string;
345        }
346        if ch == ':' && !in_string {
347            let rest = content[i + 1..].trim();
348            return rest.trim_end_matches(',').trim();
349        }
350    }
351    content
352}
353
354/// Find the line number (1-based) of the closing `}` of the root object.
355fn find_closing_brace_line(lines: &[&str]) -> usize {
356    for (i, line) in lines.iter().enumerate().rev() {
357        if line.trim() == "}" {
358            return i + 1;
359        }
360    }
361    lines.len()
362}
363
364/// Walk backwards from next_start to skip trailing blank lines and commas,
365/// returning the end_line (1-based, inclusive) for the current entry.
366fn trim_trailing_blanks(lines: &[&str], start: usize, next_start: usize) -> usize {
367    let mut end = next_start - 1;
368    while end > start {
369        let trimmed = lines[end - 1].trim();
370        if trimmed.is_empty() || trimmed == "," {
371            end -= 1;
372        } else {
373            break;
374        }
375    }
376    end
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use crate::git::types::{FileChange, FileStatus};
383    use crate::model::change::{ChangeType, SemanticChange};
384    use crate::parser::differ::compute_semantic_diff;
385    use crate::parser::registry::ParserRegistry;
386
387    /// Run the full pipeline and drop orphan changes (which represent line-level
388    /// noise outside entity spans like the root `{` `}` brackets).
389    fn json_diff(before: &str, after: &str) -> Vec<SemanticChange> {
390        let mut registry = ParserRegistry::new();
391        registry.register(Box::new(JsonParserPlugin));
392        let changes = vec![FileChange {
393            file_path: "test.json".to_string(),
394            status: FileStatus::Modified,
395            old_file_path: None,
396            before_content: Some(before.to_string()),
397            after_content: Some(after.to_string()),
398        }];
399        compute_semantic_diff(&changes, &registry, None, None)
400            .changes
401            .into_iter()
402            .filter(|c| c.entity_type != "orphan")
403            .collect()
404    }
405
406    fn names(changes: &[SemanticChange]) -> Vec<(String, ChangeType)> {
407        changes.iter().map(|c| (c.entity_name.clone(), c.change_type)).collect()
408    }
409
410    fn find_change<'a>(changes: &'a [SemanticChange], name: &str, kind: ChangeType) -> &'a SemanticChange {
411        changes.iter().find(|c| c.entity_name == name && c.change_type == kind)
412            .unwrap_or_else(|| panic!("expected {:?} {} in changes; got: {:?}", kind, name, names(changes)))
413    }
414
415    #[test]
416    fn test_json_line_positions() {
417        let content = r#"{
418  "name": "my-app",
419  "version": "1.0.0",
420  "scripts": {
421    "build": "tsc",
422    "test": "jest"
423  },
424  "description": "a test app"
425}
426"#;
427        let plugin = JsonParserPlugin;
428        let entities = plugin.extract_entities(content, "package.json");
429
430        // Top-level entities
431        let top: Vec<_> = entities.iter().filter(|e| e.parent_id.is_none()).collect();
432        assert_eq!(top.len(), 4);
433
434        assert_eq!(top[0].name, "name");
435        assert_eq!(top[0].start_line, 2);
436        assert_eq!(top[0].end_line, 2);
437
438        assert_eq!(top[1].name, "version");
439        assert_eq!(top[1].start_line, 3);
440        assert_eq!(top[1].end_line, 3);
441
442        assert_eq!(top[2].name, "scripts");
443        assert_eq!(top[2].entity_type, "object");
444        assert_eq!(top[2].start_line, 4);
445        assert_eq!(top[2].end_line, 7);
446
447        assert_eq!(top[3].name, "description");
448        assert_eq!(top[3].start_line, 8);
449        assert_eq!(top[3].end_line, 8);
450    }
451
452    #[test]
453    fn test_nested_entities_extracted() {
454        let content = r#"{
455  "scripts": {
456    "build": "tsc",
457    "test": "jest"
458  }
459}
460"#;
461        let plugin = JsonParserPlugin;
462        let entities = plugin.extract_entities(content, "package.json");
463
464        // Should have "scripts" (top-level) + "build" and "test" (nested)
465        assert_eq!(entities.len(), 3);
466
467        let scripts = entities.iter().find(|e| e.name == "scripts").unwrap();
468        assert!(scripts.parent_id.is_none());
469
470        let build = entities.iter().find(|e| e.name == "build").unwrap();
471        assert_eq!(build.parent_id, Some(scripts.id.clone()));
472        assert_eq!(build.start_line, 3);
473
474        let test = entities.iter().find(|e| e.name == "test").unwrap();
475        assert_eq!(test.parent_id, Some(scripts.id.clone()));
476        assert_eq!(test.start_line, 4);
477    }
478
479    // ─────────────────────────────────────────────────────────────────────────
480    //  Top-level scalars
481    // ─────────────────────────────────────────────────────────────────────────
482
483    #[test]
484    fn scalar_value_change_reports_modified() {
485        let changes = json_diff(
486            "{\n  \"name\": \"foo\"\n}",
487            "{\n  \"name\": \"bar\"\n}",
488        );
489        assert_eq!(names(&changes), vec![("name".into(), ChangeType::Modified)]);
490        assert_eq!(changes[0].parent_name, None);
491    }
492
493    #[test]
494    fn scalar_added_to_empty_object_reports_only_the_scalar() {
495        let changes = json_diff("{}", "{\n  \"name\": \"foo\"\n}");
496        assert_eq!(names(&changes), vec![("name".into(), ChangeType::Added)]);
497    }
498
499    #[test]
500    fn scalar_deleted_from_object_reports_only_the_scalar() {
501        let changes = json_diff("{\n  \"name\": \"foo\"\n}", "{}");
502        assert_eq!(names(&changes), vec![("name".into(), ChangeType::Deleted)]);
503    }
504
505    #[test]
506    fn scalar_key_renamed_with_unchanged_value_reports_renamed() {
507        let changes = json_diff(
508            "{\n  \"timeout\": 30\n}",
509            "{\n  \"testTimeout\": 30\n}",
510        );
511        assert_eq!(changes.len(), 1);
512        assert_eq!(changes[0].change_type, ChangeType::Renamed);
513        assert_eq!(changes[0].entity_name, "testTimeout");
514        assert_eq!(changes[0].old_entity_name.as_deref(), Some("timeout"));
515    }
516
517    // ─────────────────────────────────────────────────────────────────────────
518    //  Parent suppression — object containers don't surface when children change
519    // ─────────────────────────────────────────────────────────────────────────
520
521    #[test]
522    fn child_modified_inside_object_only_child_reported() {
523        let changes = json_diff(
524            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
525            "{\n  \"scripts\": {\n    \"build\": \"webpack\"\n  }\n}",
526        );
527        assert!(!changes.iter().any(|c| c.entity_name == "scripts"),
528            "scripts should be suppressed; got: {:?}", names(&changes));
529        let build = find_change(&changes, "build", ChangeType::Modified);
530        assert_eq!(build.parent_name.as_deref(), Some("scripts"));
531    }
532
533    #[test]
534    fn child_added_inside_object_only_child_reported() {
535        let changes = json_diff(
536            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
537            "{\n  \"scripts\": {\n    \"build\": \"tsc\",\n    \"test\": \"jest\"\n  }\n}",
538        );
539        assert!(!changes.iter().any(|c| c.entity_name == "scripts" && c.change_type == ChangeType::Modified),
540            "scripts should be suppressed; got: {:?}", names(&changes));
541        let test = find_change(&changes, "test", ChangeType::Added);
542        assert_eq!(test.parent_name.as_deref(), Some("scripts"));
543    }
544
545    #[test]
546    fn child_deleted_inside_object_only_child_reported() {
547        let changes = json_diff(
548            "{\n  \"scripts\": {\n    \"build\": \"tsc\",\n    \"test\": \"jest\"\n  }\n}",
549            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
550        );
551        assert!(!changes.iter().any(|c| c.entity_name == "scripts" && c.change_type == ChangeType::Modified),
552            "scripts should be suppressed; got: {:?}", names(&changes));
553        let test = find_change(&changes, "test", ChangeType::Deleted);
554        assert_eq!(test.parent_name.as_deref(), Some("scripts"));
555    }
556
557    #[test]
558    fn whole_object_added_only_leaf_children_reported() {
559        let changes = json_diff(
560            "{}",
561            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
562        );
563        assert!(!changes.iter().any(|c| c.entity_name == "scripts"),
564            "scripts (container) should be suppressed; got: {:?}", names(&changes));
565        let build = find_change(&changes, "build", ChangeType::Added);
566        assert_eq!(build.parent_name.as_deref(), Some("scripts"));
567    }
568
569    #[test]
570    fn whole_object_deleted_only_leaf_children_reported() {
571        let changes = json_diff(
572            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
573            "{}",
574        );
575        assert!(!changes.iter().any(|c| c.entity_name == "scripts"),
576            "scripts (container) should be suppressed; got: {:?}", names(&changes));
577        find_change(&changes, "build", ChangeType::Deleted);
578    }
579
580    // ─────────────────────────────────────────────────────────────────────────
581    //  Deep nesting — full ancestor chain in parent_name
582    // ─────────────────────────────────────────────────────────────────────────
583
584    #[test]
585    fn deep_nested_value_change_reports_only_the_leaf_with_full_chain() {
586        let before = r#"{
587  "jest": {
588    "config": {
589      "testTimeout": 5000
590    }
591  }
592}"#;
593        let after = r#"{
594  "jest": {
595    "config": {
596      "testTimeout": 10000
597    }
598  }
599}"#;
600        let changes = json_diff(before, after);
601        assert_eq!(names(&changes), vec![("testTimeout".into(), ChangeType::Modified)]);
602        assert_eq!(changes[0].parent_name.as_deref(), Some("jest::config"));
603    }
604
605    #[test]
606    fn empty_string_key_ancestor_is_skipped_in_parent_name() {
607        // package-lock.json uses "" as a key for the root project.
608        // Walking the parent chain for a deeply-nested change must not emit
609        // the empty name (would render as "::::") in the displayed path.
610        let before = r#"{
611  "packages": {
612    "": {
613      "dependencies": {
614        "jose": "^6.1.3"
615      }
616    }
617  }
618}"#;
619        let after = r#"{
620  "packages": {
621    "": {
622      "dependencies": {
623        "jose": "^6.1.4"
624      }
625    }
626  }
627}"#;
628        let changes = json_diff(before, after);
629        let jose = find_change(&changes, "jose", ChangeType::Modified);
630        // The empty-string key ancestor is dropped from the displayed chain.
631        assert_eq!(jose.parent_name.as_deref(), Some("packages::dependencies"));
632    }
633
634    // ─────────────────────────────────────────────────────────────────────────
635    //  Renames at the object level
636    // ─────────────────────────────────────────────────────────────────────────
637
638    #[test]
639    fn nested_scalar_rename_with_unchanged_value() {
640        // Same value → structural_hash matches → Renamed.
641        let before = r#"{
642  "scripts": {
643    "run": "node .",
644    "test": "jest"
645  }
646}"#;
647        let after = r#"{
648  "scripts": {
649    "start": "node .",
650    "test": "jest"
651  }
652}"#;
653        let changes = json_diff(before, after);
654        let renames: Vec<_> = changes.iter().filter(|c| c.change_type == ChangeType::Renamed).collect();
655        assert_eq!(renames.len(), 1);
656        assert_eq!(renames[0].entity_name, "start");
657        assert_eq!(renames[0].old_entity_name.as_deref(), Some("run"));
658        assert_eq!(renames[0].parent_name.as_deref(), Some("scripts"));
659    }
660
661    #[test]
662    fn parent_object_renamed_unchanged_child_move_suppressed() {
663        // scripts → tasks, dev unchanged: only the parent rename is reported.
664        let before = "{\n  \"scripts\": {\n    \"dev\": \"vite\"\n  }\n}\n";
665        let after = "{\n  \"tasks\": {\n    \"dev\": \"vite\"\n  }\n}\n";
666        let changes = json_diff(before, after);
667        let tasks = find_change(&changes, "tasks", ChangeType::Renamed);
668        assert_eq!(tasks.old_entity_name.as_deref(), Some("scripts"));
669        assert!(!changes.iter().any(|c| c.entity_name == "dev"),
670            "child 'dev' should be suppressed (only moved due to parent rename); got: {:?}", names(&changes));
671    }
672
673    #[test]
674    fn parent_object_renamed_and_child_renamed_only_child_surfaces() {
675        // scripts → tasks AND dev → develop. Parent rename cannot be detected
676        // because the renamed child key changes the parent's structural_hash.
677        // The child move alone conveys the move + rename via:
678        //   parent_name="tasks", old_entity_name="dev", old_parent_id=<scripts>
679        let before = "{\n  \"scripts\": {\n    \"dev\": \"vite\"\n  }\n}\n";
680        let after = "{\n  \"tasks\": {\n    \"develop\": \"vite\"\n  }\n}\n";
681        let changes = json_diff(before, after);
682        assert_eq!(names(&changes), vec![("develop".into(), ChangeType::Moved)]);
683        let develop = &changes[0];
684        assert_eq!(develop.old_entity_name.as_deref(), Some("dev"));
685        assert_eq!(develop.parent_name.as_deref(), Some("tasks"));
686        assert!(develop.old_parent_id.is_some(), "child Moved should carry old_parent_id");
687    }
688
689    // ─────────────────────────────────────────────────────────────────────────
690    //  Type transitions — scalar ↔ object
691    // ─────────────────────────────────────────────────────────────────────────
692
693    #[test]
694    fn scalar_to_object_transition_reports_modified_plus_new_children_added() {
695        let changes = json_diff(
696            "{\n  \"build\": \"tsc\"\n}",
697            "{\n  \"build\": {\n    \"command\": \"tsc\"\n  }\n}",
698        );
699        let build = find_change(&changes, "build", ChangeType::Modified);
700        assert_eq!(build.entity_type, "object", "after type should reflect new value");
701        let command = find_change(&changes, "command", ChangeType::Added);
702        assert_eq!(command.parent_name.as_deref(), Some("build"));
703    }
704
705    #[test]
706    fn object_to_scalar_transition_reports_modified_plus_old_children_deleted() {
707        let changes = json_diff(
708            "{\n  \"config\": {\n    \"watch\": true\n  }\n}",
709            "{\n  \"config\": \"auto\"\n}",
710        );
711        let config = find_change(&changes, "config", ChangeType::Modified);
712        assert_eq!(config.entity_type, "property", "after type should reflect new value");
713        find_change(&changes, "watch", ChangeType::Deleted);
714    }
715
716    // ─────────────────────────────────────────────────────────────────────────
717    //  Arrays — opaque (no recursion into elements)
718    // ─────────────────────────────────────────────────────────────────────────
719
720    #[test]
721    fn array_modified_reports_only_the_array_key() {
722        let changes = json_diff(
723            "{\n  \"deps\": [\"react\", \"vue\"]\n}",
724            "{\n  \"deps\": [\"react\", \"vue\", \"lodash\"]\n}",
725        );
726        assert_eq!(names(&changes), vec![("deps".into(), ChangeType::Modified)]);
727    }
728
729    #[test]
730    fn array_renamed_when_contents_unchanged() {
731        let changes = json_diff(
732            "{\n  \"deps\": [\"react\", \"vue\"]\n}",
733            "{\n  \"dependencies\": [\"react\", \"vue\"]\n}",
734        );
735        assert_eq!(changes.len(), 1);
736        assert_eq!(changes[0].change_type, ChangeType::Renamed);
737        assert_eq!(changes[0].entity_name, "dependencies");
738    }
739
740    #[test]
741    fn array_element_keys_are_not_tracked_as_entities() {
742        let before = r#"{
743  "deps": [
744    {"name": "react"},
745    {"name": "vue"}
746  ]
747}"#;
748        let after = r#"{
749  "deps": [
750    {"package": "react"},
751    {"name": "vue"}
752  ]
753}"#;
754        let changes = json_diff(before, after);
755        assert_eq!(names(&changes), vec![("deps".into(), ChangeType::Modified)],
756            "array elements have no stable identity; only the array key should change");
757    }
758
759    // ─────────────────────────────────────────────────────────────────────────
760    //  Null and empty values
761    // ─────────────────────────────────────────────────────────────────────────
762
763    #[test]
764    fn null_to_string_value_reports_modified() {
765        let changes = json_diff(
766            "{\n  \"key\": null\n}",
767            "{\n  \"key\": \"value\"\n}",
768        );
769        assert_eq!(names(&changes), vec![("key".into(), ChangeType::Modified)]);
770    }
771
772    #[test]
773    fn empty_object_gains_child_reports_both_parent_and_child() {
774        // The precision guard keeps `key` Modified — its declaration shape
775        // changed from `{}` to `{...}`.
776        let changes = json_diff(
777            "{\n  \"key\": {}\n}",
778            "{\n  \"key\": {\n    \"build\": \"tsc\"\n  }\n}",
779        );
780        let key = find_change(&changes, "key", ChangeType::Modified);
781        assert_eq!(key.parent_name, None);
782        let build = find_change(&changes, "build", ChangeType::Added);
783        assert_eq!(build.parent_name.as_deref(), Some("key"));
784    }
785
786    // ─────────────────────────────────────────────────────────────────────────
787    //  Entity ID format — file::pointer (no entity_type)
788    // ─────────────────────────────────────────────────────────────────────────
789
790    #[test]
791    fn entity_id_for_nested_property_uses_full_pointer_only() {
792        let changes = json_diff(
793            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
794            "{\n  \"scripts\": {\n    \"build\": \"webpack\"\n  }\n}",
795        );
796        let build = find_change(&changes, "build", ChangeType::Modified);
797        assert_eq!(build.entity_id, "test.json::/scripts/build");
798    }
799
800
801    // ─────────────────────────────────────────────────────────────────────────
802    //  Phase 3 fuzzy matching
803    // ─────────────────────────────────────────────────────────────────────────
804
805    #[test]
806    fn fuzzy_rename_detected_when_value_mostly_unchanged() {
807        // config → settings: key rename (Phase 1 & 2 miss).
808        // testTimeout 30 → 60: small value change rules out structural_hash.
809        // Many siblings unchanged → Jaccard > 0.8 → Phase 3 catches it.
810        let before = r#"{
811  "config": {
812    "host": "localhost",
813    "protocol": "https",
814    "retries": 3,
815    "testTimeout": 30,
816    "keepalive": true,
817    "compression": true,
818    "logging": "verbose",
819    "maxConnections": 100
820  }
821}"#;
822        let after = r#"{
823  "settings": {
824    "host": "localhost",
825    "protocol": "https",
826    "retries": 3,
827    "testTimeout": 60,
828    "keepalive": true,
829    "compression": true,
830    "logging": "verbose",
831    "maxConnections": 100
832  }
833}"#;
834        let changes = json_diff(before, after);
835        assert!(changes.iter().any(|c| c.entity_name == "settings" && c.change_type == ChangeType::Renamed),
836            "expected fuzzy rename of config → settings; got: {:?}", names(&changes));
837    }
838
839    // ─────────────────────────────────────────────────────────────────────────
840    //  Known limitations (documented in spec)
841    // ─────────────────────────────────────────────────────────────────────────
842
843    #[test]
844    fn parent_rename_with_sibling_added_surfaces_leaf_moves() {
845        // Parent renamed AND a new sibling appears: structural_hash diverges,
846        // Phase 2 misses the parent rename. The unchanged child still matches
847        // by structural_hash and surfaces as Moved; the parent Deleted/Added
848        // entries are container-suppressed.
849        let before = r#"{
850  "scripts": {
851    "build": "tsc"
852  }
853}"#;
854        let after = r#"{
855  "tasks": {
856    "build": "tsc",
857    "test": "jest"
858  }
859}"#;
860        let changes = json_diff(before, after);
861        let build = find_change(&changes, "build", ChangeType::Moved);
862        assert_eq!(build.parent_name.as_deref(), Some("tasks"));
863        assert!(build.old_parent_id.is_some());
864        find_change(&changes, "test", ChangeType::Added);
865        assert!(!changes.iter().any(|c| c.entity_name == "scripts" || c.entity_name == "tasks"),
866            "parent Deleted/Added should be suppressed; got: {:?}", names(&changes));
867    }
868
869    #[test]
870    fn scalar_array_transitions_report_modified_only() {
871        // Arrays are opaque, so the type transition surfaces as a single
872        // Modified entry with entity_type reflecting the after value.
873        let cases = [
874            ("{\n  \"deps\": \"react\"\n}", "{\n  \"deps\": [\"react\", \"vue\"]\n}", "array"),
875            ("{\n  \"deps\": [\"react\", \"vue\"]\n}", "{\n  \"deps\": \"react\"\n}", "property"),
876        ];
877        for (before, after, after_type) in cases {
878            let changes = json_diff(before, after);
879            assert_eq!(names(&changes), vec![("deps".into(), ChangeType::Modified)]);
880            assert_eq!(changes[0].entity_type, after_type);
881        }
882    }
883
884    #[test]
885    fn object_to_array_transition_reports_modified_plus_old_children_deleted() {
886        let changes = json_diff(
887            "{\n  \"deps\": {\n    \"react\": \"18\"\n  }\n}",
888            "{\n  \"deps\": [\"react\"]\n}",
889        );
890        let deps = find_change(&changes, "deps", ChangeType::Modified);
891        assert_eq!(deps.entity_type, "array");
892        find_change(&changes, "react", ChangeType::Deleted);
893    }
894
895    #[test]
896    fn array_to_object_transition_reports_modified_plus_new_children_added() {
897        let changes = json_diff(
898            "{\n  \"deps\": [\"react\"]\n}",
899            "{\n  \"deps\": {\n    \"react\": \"18\"\n  }\n}",
900        );
901        let deps = find_change(&changes, "deps", ChangeType::Modified);
902        assert_eq!(deps.entity_type, "object");
903        let react = find_change(&changes, "react", ChangeType::Added);
904        assert_eq!(react.parent_name.as_deref(), Some("deps"));
905    }
906
907    #[test]
908    fn deep_whole_section_deleted_only_leaf_reported() {
909        let changes = json_diff(
910            "{\n  \"jest\": {\n    \"config\": {\n      \"testTimeout\": 5000\n    }\n  }\n}",
911            "{}",
912        );
913        let timeout = find_change(&changes, "testTimeout", ChangeType::Deleted);
914        assert_eq!(timeout.parent_name.as_deref(), Some("jest::config"));
915        assert!(!changes.iter().any(|c| c.entity_name == "jest" || c.entity_name == "config"),
916            "intermediate containers should be suppressed; got: {:?}", names(&changes));
917    }
918
919    #[test]
920    fn pointer_escapes_preserve_rfc6901_order() {
921        // '~' must be escaped before '/'. Otherwise a literal '/' would become
922        // '~1' and the '~' inside that would then become '~01'.
923        let cases = [
924            ("a/b", "test.json::/a~1b"),
925            ("a~b", "test.json::/a~0b"),
926            ("a~/b", "test.json::/a~0~1b"),
927        ];
928        for (key, expected_id) in cases {
929            let changes = json_diff(
930                &format!("{{\n  \"{key}\": 1\n}}"),
931                &format!("{{\n  \"{key}\": 2\n}}"),
932            );
933            assert_eq!(changes.len(), 1);
934            assert_eq!(changes[0].entity_id, expected_id, "key {key}");
935        }
936    }
937
938    // ─────────────────────────────────────────────────────────────────────────
939    //  Document-level edge cases
940    // ─────────────────────────────────────────────────────────────────────────
941
942    #[test]
943    fn documents_without_extractable_keys_produce_no_entities() {
944        let plugin = JsonParserPlugin;
945        for input in ["[1, 2, 3]", "\"hello\"", "42", "null", "{}"] {
946            assert!(
947                plugin.extract_entities(input, "test.json").is_empty(),
948                "input: {input}"
949            );
950        }
951    }
952
953    #[test]
954    fn malformed_input_does_not_panic() {
955        let plugin = JsonParserPlugin;
956        let cases = [
957            "{",                                 // unclosed root
958            "{\"a\":",                           // dangling colon
959            "{\"a\": {",                         // unclosed nested object
960            "{\"a\": {] }}",                     // stray ']' inside object value
961            "{\"a\": {\"b\": [}]}",              // mismatched brackets in array
962            "{\"a\": }}}}",                      // multiple stray '}'
963            "{\"a\": {\"b\": 1}, \"c\":",        // truncated mid-object
964        ];
965        for input in cases {
966            let _ = plugin.extract_entities(input, "test.json");
967        }
968    }
969
970    #[test]
971    fn parent_rename_with_child_value_change_falls_back_to_leaf_delete_add() {
972        let changes = json_diff(
973            "{\n  \"scripts\": {\n    \"dev\": \"vite\"\n  }\n}\n",
974            "{\n  \"tasks\": {\n    \"dev\": \"rollup\"\n  }\n}\n",
975        );
976        find_change(&changes, "dev", ChangeType::Deleted);
977        find_change(&changes, "dev", ChangeType::Added);
978        assert!(!changes.iter().any(|c| c.change_type == ChangeType::Renamed),
979            "rename should not be detectable; got: {:?}", names(&changes));
980    }
981}