Skip to main content

sem_core/parser/plugins/
json.rs

1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct JsonParserPlugin;
6
7impl SemanticParserPlugin for JsonParserPlugin {
8    fn id(&self) -> &str {
9        "json"
10    }
11
12    fn extensions(&self) -> &[&str] {
13        &[".json"]
14    }
15
16    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17        let trimmed = content.trim_start();
18        if trimmed.starts_with('{') {
19            return extract_entries(content, file_path, JsonContainerKind::Object);
20        }
21        if trimmed.starts_with('[') {
22            return extract_entries(content, file_path, JsonContainerKind::Array);
23        }
24        if trimmed.is_empty() {
25            return Vec::new();
26        }
27        vec![document_chunk_entity(content, file_path)]
28    }
29}
30
31#[derive(Clone, Copy)]
32enum JsonContainerKind {
33    Object,
34    Array,
35}
36
37struct Frame {
38    content: String,
39    entries: Vec<JsonEntry>,
40    cursor: usize,
41    line_offset: usize,
42    parent_pointer: Option<String>,
43    parent_entity_id: Option<String>,
44    container_kind: JsonContainerKind,
45}
46
47/// Iterative walk of the JSON tree, emitting entities in DFS pre-order.
48/// Frames track a cursor through their entries; encountering an
49/// object-valued entry pushes both the parent frame (resumed after) and the
50/// child frame (visited next), so children appear before later siblings.
51fn extract_entries(
52    content: &str,
53    file_path: &str,
54    container_kind: JsonContainerKind,
55) -> Vec<SemanticEntity> {
56    let mut entities = Vec::new();
57    let root_entries = match container_kind {
58        JsonContainerKind::Object => find_top_level_entries(content),
59        JsonContainerKind::Array => find_top_level_array_entries(content),
60    };
61    let mut worklist: Vec<Frame> = vec![Frame {
62        content: content.to_string(),
63        entries: root_entries,
64        cursor: 0,
65        line_offset: 1,
66        parent_pointer: None,
67        parent_entity_id: None,
68        container_kind,
69    }];
70
71    while let Some(mut frame) = worklist.pop() {
72        let lines: Vec<&str> = frame.content.lines().collect();
73        let closing = find_closing_container_line(&lines, frame.container_kind);
74
75        while frame.cursor < frame.entries.len() {
76            let i = frame.cursor;
77            frame.cursor += 1;
78            let entry = &frame.entries[i];
79            let (end_line, entity_content) =
80                if let (Some(start_byte), Some(end_byte), Some(end_line)) = (
81                    entry.content_start_byte,
82                    entry.content_end_byte_exclusive,
83                    entry.end_line,
84                )
85                {
86                    let Some(entity_content) = frame
87                        .content
88                        .get(start_byte..end_byte)
89                        .map(|content| content.to_string())
90                    else {
91                        debug_assert!(
92                            false,
93                            "array entry byte range must be valid within frame content"
94                        );
95                        continue;
96                    };
97                    (
98                        end_line,
99                        entity_content,
100                    )
101                } else {
102                    let next_boundary = frame
103                        .entries
104                        .get(i + 1)
105                        .map(|e| e.start_line)
106                        .unwrap_or(closing);
107                    let end_line = trim_trailing_blanks(&lines, entry.start_line, next_boundary);
108                    let entity_content = lines[entry.start_line - 1..end_line].join("\n");
109                    (end_line, entity_content)
110                };
111            let value_content = extract_value_content(&entity_content);
112
113            let pointer = match &frame.parent_pointer {
114                Some(pp) => format!("{pp}{}", entry.pointer),
115                None => entry.pointer.clone(),
116            };
117            let entity_id = format!("{}::{}", file_path, pointer);
118            let abs_start = frame.line_offset + entry.start_line - 1;
119            let abs_end = frame.line_offset + end_line - 1;
120
121            entities.push(SemanticEntity {
122                id: entity_id.clone(),
123                file_path: file_path.to_string(),
124                entity_type: entry.entity_type.clone(),
125                name: entry.key.clone(),
126                parent_id: frame.parent_entity_id.clone(),
127                content_hash: content_hash(&entity_content),
128                structural_hash: Some(content_hash(value_content)),
129                content: entity_content.clone(),
130                start_line: abs_start,
131                end_line: abs_end,
132                metadata: None,
133            });
134
135            if entry.entity_type == "object" && entry.descend_into_object {
136                if let Some(obj_str) = extract_object_value(&entity_content) {
137                    let obj_line_in_entity = find_value_start_line(&entity_content);
138                    let child = Frame {
139                        content: obj_str.to_string(),
140                        entries: find_top_level_entries(obj_str),
141                        cursor: 0,
142                        line_offset: abs_start + obj_line_in_entity - 1,
143                        parent_pointer: Some(pointer),
144                        parent_entity_id: Some(entity_id),
145                        container_kind: JsonContainerKind::Object,
146                    };
147                    worklist.push(frame);
148                    worklist.push(child);
149                    break;
150                }
151            }
152        }
153    }
154
155    entities
156}
157
158fn document_chunk_entity(content: &str, file_path: &str) -> SemanticEntity {
159    let line_count = content.lines().count().max(1);
160    SemanticEntity {
161        id: build_entity_id(file_path, "chunk", "(document)", None),
162        file_path: file_path.to_string(),
163        entity_type: "chunk".to_string(),
164        name: "(document)".to_string(),
165        parent_id: None,
166        content_hash: content_hash(content),
167        structural_hash: None,
168        content: content.to_string(),
169        start_line: 1,
170        end_line: line_count,
171        metadata: None,
172    }
173}
174
175/// Given an entity content string like `  "scripts": {\n    "build": "tsc"\n  }`,
176/// return a slice that starts at the opening `{` of the value and ends at (and
177/// including) the matching closing `}`.
178fn extract_object_value(content: &str) -> Option<&str> {
179    // Skip past the first `:` (outside strings) to find the value
180    let mut in_string = false;
181    let mut escape_next = false;
182    let mut colon_pos: Option<usize> = None;
183
184    for (i, ch) in content.char_indices() {
185        if escape_next {
186            escape_next = false;
187            continue;
188        }
189        if ch == '\\' && in_string {
190            escape_next = true;
191            continue;
192        }
193        if ch == '"' {
194            in_string = !in_string;
195        }
196        if ch == ':' && !in_string {
197            colon_pos = Some(i);
198            break;
199        }
200    }
201
202    let after_colon = &content[colon_pos? + 1..];
203    // Find the opening `{`
204    let brace_offset = after_colon.find('{')?;
205    let obj_start = colon_pos? + 1 + brace_offset;
206
207    // Find the matching `}`. Track brace and bracket depth separately so
208    // that a `}` only terminates extraction when no array is still open.
209    let mut brace_depth = 0usize;
210    let mut bracket_depth = 0usize;
211    in_string = false;
212    escape_next = false;
213
214    for (i, ch) in content[obj_start..].char_indices() {
215        if escape_next {
216            escape_next = false;
217            continue;
218        }
219        if ch == '\\' && in_string {
220            escape_next = true;
221            continue;
222        }
223        if ch == '"' {
224            in_string = !in_string;
225            continue;
226        }
227        if !in_string {
228            match ch {
229                '{' => brace_depth += 1,
230                '[' => bracket_depth += 1,
231                '}' => {
232                    brace_depth = brace_depth.saturating_sub(1);
233                    if brace_depth == 0 && bracket_depth == 0 {
234                        return Some(&content[obj_start..obj_start + i + 1]);
235                    }
236                }
237                ']' => bracket_depth = bracket_depth.saturating_sub(1),
238                _ => {}
239            }
240        }
241    }
242    None
243}
244
245/// Return the 1-based line number (relative to the entity content) where the
246/// object value's `{` appears.
247fn find_value_start_line(content: &str) -> usize {
248    let mut in_string = false;
249    let mut escape_next = false;
250    let mut past_colon = false;
251    let mut line = 1usize;
252
253    for ch in content.chars() {
254        if ch == '\n' {
255            line += 1;
256            continue;
257        }
258        if escape_next {
259            escape_next = false;
260            continue;
261        }
262        if ch == '\\' && in_string {
263            escape_next = true;
264            continue;
265        }
266        if ch == '"' {
267            in_string = !in_string;
268            continue;
269        }
270        if ch == ':' && !in_string {
271            past_colon = true;
272            continue;
273        }
274        if past_colon && ch == '{' {
275            return line;
276        }
277    }
278    1
279}
280
281struct JsonEntry {
282    key: String,
283    pointer: String,
284    entity_type: String,
285    start_line: usize, // 1-based, relative to the content passed in
286    end_line: Option<usize>,
287    // Byte offsets are relative to the current frame content; end is exclusive.
288    content_start_byte: Option<usize>,
289    content_end_byte_exclusive: Option<usize>,
290    descend_into_object: bool,
291}
292
293/// Scan the source text to find each top-level key in the root JSON object.
294/// Returns entries with accurate start_line positions (1-based, relative to `content`).
295fn find_top_level_entries(content: &str) -> Vec<JsonEntry> {
296    let mut entries = Vec::new();
297    let mut depth = 0;
298    let mut in_string = false;
299    let mut escape_next = false;
300    let mut line_num: usize = 1;
301
302    let mut current_key: Option<String> = None;
303    let mut key_start = false;
304    let mut key_buf = String::new();
305    let mut reading_key = false;
306
307    for ch in content.chars() {
308        if ch == '\n' {
309            line_num += 1;
310            continue;
311        }
312
313        if escape_next {
314            if reading_key {
315                key_buf.push(ch);
316            }
317            escape_next = false;
318            continue;
319        }
320
321        if ch == '\\' && in_string {
322            if reading_key {
323                key_buf.push(ch);
324            }
325            escape_next = true;
326            continue;
327        }
328
329        if in_string {
330            if ch == '"' {
331                in_string = false;
332                if reading_key {
333                    reading_key = false;
334                    current_key = Some(key_buf.clone());
335                    key_buf.clear();
336                }
337            } else if reading_key {
338                key_buf.push(ch);
339            }
340            continue;
341        }
342
343        match ch {
344            '"' => {
345                in_string = true;
346                if depth == 1 && current_key.is_none() && !key_start {
347                    reading_key = true;
348                    key_buf.clear();
349                }
350            }
351            ':' => {
352                if depth == 1 {
353                    if let Some(ref key) = current_key {
354                        let escaped_key = key.replace('~', "~0").replace('/', "~1");
355                        let pointer = format!("/{escaped_key}");
356                        entries.push(JsonEntry {
357                            key: key.clone(),
358                            pointer,
359                            entity_type: String::new(),
360                            start_line: line_num,
361                            end_line: None,
362                            content_start_byte: None,
363                            content_end_byte_exclusive: None,
364                            descend_into_object: false,
365                        });
366                        key_start = true;
367                    }
368                }
369            }
370            '{' | '[' => {
371                depth += 1;
372                if depth == 2 && key_start {
373                    if let Some(entry) = entries.last_mut() {
374                        entry.entity_type = if ch == '{' { "object" } else { "array" }.to_string();
375                        entry.descend_into_object = ch == '{';
376                    }
377                }
378            }
379            '}' | ']' => {
380                depth -= 1;
381            }
382            ',' => {
383                if depth == 1 {
384                    if let Some(entry) = entries.last_mut() {
385                        if entry.entity_type.is_empty() {
386                            entry.entity_type = "property".to_string();
387                        }
388                    }
389                    current_key = None;
390                    key_start = false;
391                }
392            }
393            _ => {}
394        }
395    }
396
397    if let Some(entry) = entries.last_mut() {
398        if entry.entity_type.is_empty() {
399            entry.entity_type = "property".to_string();
400        }
401    }
402
403    entries
404}
405
406/// Scan a root JSON array and emit each top-level index as an opaque entity.
407/// Nested object fields are intentionally not extracted here because array
408/// elements usually do not have stable identity beyond their current index.
409fn find_top_level_array_entries(content: &str) -> Vec<JsonEntry> {
410    let mut entries = Vec::new();
411    let mut depth = 0usize;
412    let mut in_string = false;
413    let mut escape_next = false;
414    let mut line_num: usize = 1;
415    let mut expecting_item = false;
416    let mut current: Option<JsonEntry> = None;
417
418    for (i, ch) in content.char_indices() {
419        if ch == '\n' {
420            line_num += 1;
421        }
422
423        if escape_next {
424            escape_next = false;
425            continue;
426        }
427        if ch == '\\' && in_string {
428            escape_next = true;
429            continue;
430        }
431        if in_string {
432            if ch == '"' {
433                in_string = false;
434            }
435            continue;
436        }
437
438        if depth == 1 && expecting_item && !ch.is_whitespace() && ch != ']' && ch != ',' {
439            let index = entries.len();
440            current = Some(JsonEntry {
441                key: index.to_string(),
442                pointer: format!("/{index}"),
443                entity_type: match ch {
444                    '{' => "object",
445                    '[' => "array",
446                    _ => "array_item",
447                }
448                .to_string(),
449                start_line: line_num,
450                end_line: None,
451                content_start_byte: Some(i),
452                content_end_byte_exclusive: None,
453                descend_into_object: false,
454            });
455            expecting_item = false;
456        }
457
458        match ch {
459            '"' => {
460                in_string = true;
461            }
462            '[' => {
463                depth += 1;
464                if depth == 1 {
465                    expecting_item = true;
466                }
467            }
468            '{' => {
469                depth += 1;
470            }
471            ']' => {
472                if depth == 1 {
473                    finish_array_entry(&mut entries, &mut current, content, i);
474                    expecting_item = false;
475                }
476                depth = depth.saturating_sub(1);
477            }
478            '}' => {
479                depth = depth.saturating_sub(1);
480            }
481            ',' => {
482                if depth == 1 {
483                    finish_array_entry(&mut entries, &mut current, content, i);
484                    expecting_item = true;
485                }
486            }
487            _ => {}
488        }
489    }
490
491    finish_array_entry(&mut entries, &mut current, content, content.len());
492
493    entries
494}
495
496fn finish_array_entry(
497    entries: &mut Vec<JsonEntry>,
498    current: &mut Option<JsonEntry>,
499    content: &str,
500    delimiter_byte: usize,
501) {
502    if let Some(mut entry) = current.take() {
503        let Some(start_byte) = entry.content_start_byte else {
504            return;
505        };
506        let end_byte = content
507            .get(..delimiter_byte)
508            .map(|prefix| prefix.trim_end().len())
509            .unwrap_or(delimiter_byte);
510        if start_byte >= end_byte {
511            debug_assert!(
512                start_byte < end_byte,
513                "array entry start byte must precede content end byte"
514            );
515            return;
516        }
517
518        entry.content_end_byte_exclusive = Some(end_byte);
519        entry.end_line = entry_end_line(content, &entry);
520        entries.push(entry);
521    }
522}
523
524fn entry_end_line(content: &str, entry: &JsonEntry) -> Option<usize> {
525    let start = entry.content_start_byte?;
526    let end = entry.content_end_byte_exclusive?;
527    Some(
528        entry.start_line
529            + content
530                .get(start..end)?
531                .trim_end()
532                .chars()
533                .filter(|ch| *ch == '\n')
534                .count(),
535    )
536}
537
538/// Extract just the value portion of a `"key": value` entity content string,
539/// stripping the key name so that renamed keys with identical values share the
540/// same structural_hash and are detected as renames rather than delete + add.
541fn extract_value_content(content: &str) -> &str {
542    let mut in_string = false;
543    let mut escape_next = false;
544    for (i, ch) in content.char_indices() {
545        if escape_next {
546            escape_next = false;
547            continue;
548        }
549        if ch == '\\' && in_string {
550            escape_next = true;
551            continue;
552        }
553        if ch == '"' {
554            in_string = !in_string;
555        }
556        if ch == ':' && !in_string {
557            let rest = content[i + 1..].trim();
558            return rest.trim_end_matches(',').trim();
559        }
560    }
561    content
562}
563
564/// Find the line number (1-based) of the root closing delimiter.
565fn find_closing_container_line(lines: &[&str], container_kind: JsonContainerKind) -> usize {
566    let closing = match container_kind {
567        JsonContainerKind::Object => "}",
568        JsonContainerKind::Array => "]",
569    };
570    for (i, line) in lines.iter().enumerate().rev() {
571        if line.trim() == closing {
572            return i + 1;
573        }
574    }
575    lines.len()
576}
577
578/// Walk backwards from next_start to skip trailing blank lines and commas,
579/// returning the end_line (1-based, inclusive) for the current entry.
580fn trim_trailing_blanks(lines: &[&str], start: usize, next_start: usize) -> usize {
581    let mut end = next_start - 1;
582    while end > start {
583        let trimmed = lines[end - 1].trim();
584        if trimmed.is_empty() || trimmed == "," {
585            end -= 1;
586        } else {
587            break;
588        }
589    }
590    end
591}
592
593#[cfg(test)]
594mod tests {
595    use super::*;
596    use crate::git::types::{FileChange, FileStatus};
597    use crate::model::change::{ChangeType, SemanticChange};
598    use crate::parser::differ::compute_semantic_diff;
599    use crate::parser::registry::ParserRegistry;
600
601    /// Run the full pipeline and drop orphan changes (which represent line-level
602    /// noise outside entity spans like the root `{` `}` brackets).
603    fn json_diff(before: &str, after: &str) -> Vec<SemanticChange> {
604        let mut registry = ParserRegistry::new();
605        registry.register(Box::new(JsonParserPlugin));
606        let changes = vec![FileChange {
607            file_path: "test.json".to_string(),
608            status: FileStatus::Modified,
609            old_file_path: None,
610            before_content: Some(before.to_string()),
611            after_content: Some(after.to_string()),
612        }];
613        compute_semantic_diff(&changes, &registry, None, None)
614            .changes
615            .into_iter()
616            .filter(|c| c.entity_type != "orphan")
617            .collect()
618    }
619
620    fn names(changes: &[SemanticChange]) -> Vec<(String, ChangeType)> {
621        changes.iter().map(|c| (c.entity_name.clone(), c.change_type)).collect()
622    }
623
624    fn find_change<'a>(changes: &'a [SemanticChange], name: &str, kind: ChangeType) -> &'a SemanticChange {
625        changes.iter().find(|c| c.entity_name == name && c.change_type == kind)
626            .unwrap_or_else(|| panic!("expected {:?} {} in changes; got: {:?}", kind, name, names(changes)))
627    }
628
629    #[test]
630    fn test_json_line_positions() {
631        let content = r#"{
632  "name": "my-app",
633  "version": "1.0.0",
634  "scripts": {
635    "build": "tsc",
636    "test": "jest"
637  },
638  "description": "a test app"
639}
640"#;
641        let plugin = JsonParserPlugin;
642        let entities = plugin.extract_entities(content, "package.json");
643
644        // Top-level entities
645        let top: Vec<_> = entities.iter().filter(|e| e.parent_id.is_none()).collect();
646        assert_eq!(top.len(), 4);
647
648        assert_eq!(top[0].name, "name");
649        assert_eq!(top[0].start_line, 2);
650        assert_eq!(top[0].end_line, 2);
651
652        assert_eq!(top[1].name, "version");
653        assert_eq!(top[1].start_line, 3);
654        assert_eq!(top[1].end_line, 3);
655
656        assert_eq!(top[2].name, "scripts");
657        assert_eq!(top[2].entity_type, "object");
658        assert_eq!(top[2].start_line, 4);
659        assert_eq!(top[2].end_line, 7);
660
661        assert_eq!(top[3].name, "description");
662        assert_eq!(top[3].start_line, 8);
663        assert_eq!(top[3].end_line, 8);
664    }
665
666    #[test]
667    fn test_nested_entities_extracted() {
668        let content = r#"{
669  "scripts": {
670    "build": "tsc",
671    "test": "jest"
672  }
673}
674"#;
675        let plugin = JsonParserPlugin;
676        let entities = plugin.extract_entities(content, "package.json");
677
678        // Should have "scripts" (top-level) + "build" and "test" (nested)
679        assert_eq!(entities.len(), 3);
680
681        let scripts = entities.iter().find(|e| e.name == "scripts").unwrap();
682        assert!(scripts.parent_id.is_none());
683
684        let build = entities.iter().find(|e| e.name == "build").unwrap();
685        assert_eq!(build.parent_id, Some(scripts.id.clone()));
686        assert_eq!(build.start_line, 3);
687
688        let test = entities.iter().find(|e| e.name == "test").unwrap();
689        assert_eq!(test.parent_id, Some(scripts.id.clone()));
690        assert_eq!(test.start_line, 4);
691    }
692
693    // ─────────────────────────────────────────────────────────────────────────
694    //  Top-level scalars
695    // ─────────────────────────────────────────────────────────────────────────
696
697    #[test]
698    fn scalar_value_change_reports_modified() {
699        let changes = json_diff(
700            "{\n  \"name\": \"foo\"\n}",
701            "{\n  \"name\": \"bar\"\n}",
702        );
703        assert_eq!(names(&changes), vec![("name".into(), ChangeType::Modified)]);
704        assert_eq!(changes[0].parent_name, None);
705    }
706
707    #[test]
708    fn scalar_added_to_empty_object_reports_only_the_scalar() {
709        let changes = json_diff("{}", "{\n  \"name\": \"foo\"\n}");
710        assert_eq!(names(&changes), vec![("name".into(), ChangeType::Added)]);
711    }
712
713    #[test]
714    fn scalar_deleted_from_object_reports_only_the_scalar() {
715        let changes = json_diff("{\n  \"name\": \"foo\"\n}", "{}");
716        assert_eq!(names(&changes), vec![("name".into(), ChangeType::Deleted)]);
717    }
718
719    #[test]
720    fn scalar_key_renamed_with_unchanged_value_reports_renamed() {
721        let changes = json_diff(
722            "{\n  \"timeout\": 30\n}",
723            "{\n  \"testTimeout\": 30\n}",
724        );
725        assert_eq!(changes.len(), 1);
726        assert_eq!(changes[0].change_type, ChangeType::Renamed);
727        assert_eq!(changes[0].entity_name, "testTimeout");
728        assert_eq!(changes[0].old_entity_name.as_deref(), Some("timeout"));
729    }
730
731    // ─────────────────────────────────────────────────────────────────────────
732    //  Parent suppression — object containers don't surface when children change
733    // ─────────────────────────────────────────────────────────────────────────
734
735    #[test]
736    fn child_modified_inside_object_only_child_reported() {
737        let changes = json_diff(
738            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
739            "{\n  \"scripts\": {\n    \"build\": \"webpack\"\n  }\n}",
740        );
741        assert!(!changes.iter().any(|c| c.entity_name == "scripts"),
742            "scripts should be suppressed; got: {:?}", names(&changes));
743        let build = find_change(&changes, "build", ChangeType::Modified);
744        assert_eq!(build.parent_name.as_deref(), Some("scripts"));
745    }
746
747    #[test]
748    fn child_added_inside_object_only_child_reported() {
749        let changes = json_diff(
750            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
751            "{\n  \"scripts\": {\n    \"build\": \"tsc\",\n    \"test\": \"jest\"\n  }\n}",
752        );
753        assert!(!changes.iter().any(|c| c.entity_name == "scripts" && c.change_type == ChangeType::Modified),
754            "scripts should be suppressed; got: {:?}", names(&changes));
755        let test = find_change(&changes, "test", ChangeType::Added);
756        assert_eq!(test.parent_name.as_deref(), Some("scripts"));
757    }
758
759    #[test]
760    fn child_deleted_inside_object_only_child_reported() {
761        let changes = json_diff(
762            "{\n  \"scripts\": {\n    \"build\": \"tsc\",\n    \"test\": \"jest\"\n  }\n}",
763            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
764        );
765        assert!(!changes.iter().any(|c| c.entity_name == "scripts" && c.change_type == ChangeType::Modified),
766            "scripts should be suppressed; got: {:?}", names(&changes));
767        let test = find_change(&changes, "test", ChangeType::Deleted);
768        assert_eq!(test.parent_name.as_deref(), Some("scripts"));
769    }
770
771    #[test]
772    fn whole_object_added_only_leaf_children_reported() {
773        let changes = json_diff(
774            "{}",
775            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
776        );
777        assert!(!changes.iter().any(|c| c.entity_name == "scripts"),
778            "scripts (container) should be suppressed; got: {:?}", names(&changes));
779        let build = find_change(&changes, "build", ChangeType::Added);
780        assert_eq!(build.parent_name.as_deref(), Some("scripts"));
781    }
782
783    #[test]
784    fn whole_object_deleted_only_leaf_children_reported() {
785        let changes = json_diff(
786            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
787            "{}",
788        );
789        assert!(!changes.iter().any(|c| c.entity_name == "scripts"),
790            "scripts (container) should be suppressed; got: {:?}", names(&changes));
791        find_change(&changes, "build", ChangeType::Deleted);
792    }
793
794    // ─────────────────────────────────────────────────────────────────────────
795    //  Deep nesting — full ancestor chain in parent_name
796    // ─────────────────────────────────────────────────────────────────────────
797
798    #[test]
799    fn deep_nested_value_change_reports_only_the_leaf_with_full_chain() {
800        let before = r#"{
801  "jest": {
802    "config": {
803      "testTimeout": 5000
804    }
805  }
806}"#;
807        let after = r#"{
808  "jest": {
809    "config": {
810      "testTimeout": 10000
811    }
812  }
813}"#;
814        let changes = json_diff(before, after);
815        assert_eq!(names(&changes), vec![("testTimeout".into(), ChangeType::Modified)]);
816        assert_eq!(changes[0].parent_name.as_deref(), Some("jest::config"));
817    }
818
819    #[test]
820    fn empty_string_key_ancestor_is_skipped_in_parent_name() {
821        // package-lock.json uses "" as a key for the root project.
822        // Walking the parent chain for a deeply-nested change must not emit
823        // the empty name (would render as "::::") in the displayed path.
824        let before = r#"{
825  "packages": {
826    "": {
827      "dependencies": {
828        "jose": "^6.1.3"
829      }
830    }
831  }
832}"#;
833        let after = r#"{
834  "packages": {
835    "": {
836      "dependencies": {
837        "jose": "^6.1.4"
838      }
839    }
840  }
841}"#;
842        let changes = json_diff(before, after);
843        let jose = find_change(&changes, "jose", ChangeType::Modified);
844        // The empty-string key ancestor is dropped from the displayed chain.
845        assert_eq!(jose.parent_name.as_deref(), Some("packages::dependencies"));
846    }
847
848    // ─────────────────────────────────────────────────────────────────────────
849    //  Renames at the object level
850    // ─────────────────────────────────────────────────────────────────────────
851
852    #[test]
853    fn nested_scalar_rename_with_unchanged_value() {
854        // Same value → structural_hash matches → Renamed.
855        let before = r#"{
856  "scripts": {
857    "run": "node .",
858    "test": "jest"
859  }
860}"#;
861        let after = r#"{
862  "scripts": {
863    "start": "node .",
864    "test": "jest"
865  }
866}"#;
867        let changes = json_diff(before, after);
868        let renames: Vec<_> = changes.iter().filter(|c| c.change_type == ChangeType::Renamed).collect();
869        assert_eq!(renames.len(), 1);
870        assert_eq!(renames[0].entity_name, "start");
871        assert_eq!(renames[0].old_entity_name.as_deref(), Some("run"));
872        assert_eq!(renames[0].parent_name.as_deref(), Some("scripts"));
873    }
874
875    #[test]
876    fn parent_object_renamed_unchanged_child_move_suppressed() {
877        // scripts → tasks, dev unchanged: only the parent rename is reported.
878        let before = "{\n  \"scripts\": {\n    \"dev\": \"vite\"\n  }\n}\n";
879        let after = "{\n  \"tasks\": {\n    \"dev\": \"vite\"\n  }\n}\n";
880        let changes = json_diff(before, after);
881        let tasks = find_change(&changes, "tasks", ChangeType::Renamed);
882        assert_eq!(tasks.old_entity_name.as_deref(), Some("scripts"));
883        assert!(!changes.iter().any(|c| c.entity_name == "dev"),
884            "child 'dev' should be suppressed (only moved due to parent rename); got: {:?}", names(&changes));
885    }
886
887    #[test]
888    fn parent_object_renamed_and_child_renamed_only_child_surfaces() {
889        // scripts → tasks AND dev → develop. Parent rename cannot be detected
890        // because the renamed child key changes the parent's structural_hash.
891        // The child move alone conveys the move + rename via:
892        //   parent_name="tasks", old_entity_name="dev", old_parent_id=<scripts>
893        let before = "{\n  \"scripts\": {\n    \"dev\": \"vite\"\n  }\n}\n";
894        let after = "{\n  \"tasks\": {\n    \"develop\": \"vite\"\n  }\n}\n";
895        let changes = json_diff(before, after);
896        assert_eq!(names(&changes), vec![("develop".into(), ChangeType::Moved)]);
897        let develop = &changes[0];
898        assert_eq!(develop.old_entity_name.as_deref(), Some("dev"));
899        assert_eq!(develop.parent_name.as_deref(), Some("tasks"));
900        assert!(develop.old_parent_id.is_some(), "child Moved should carry old_parent_id");
901    }
902
903    // ─────────────────────────────────────────────────────────────────────────
904    //  Type transitions — scalar ↔ object
905    // ─────────────────────────────────────────────────────────────────────────
906
907    #[test]
908    fn scalar_to_object_transition_reports_modified_plus_new_children_added() {
909        let changes = json_diff(
910            "{\n  \"build\": \"tsc\"\n}",
911            "{\n  \"build\": {\n    \"command\": \"tsc\"\n  }\n}",
912        );
913        let build = find_change(&changes, "build", ChangeType::Modified);
914        assert_eq!(build.entity_type, "object", "after type should reflect new value");
915        let command = find_change(&changes, "command", ChangeType::Added);
916        assert_eq!(command.parent_name.as_deref(), Some("build"));
917    }
918
919    #[test]
920    fn object_to_scalar_transition_reports_modified_plus_old_children_deleted() {
921        let changes = json_diff(
922            "{\n  \"config\": {\n    \"watch\": true\n  }\n}",
923            "{\n  \"config\": \"auto\"\n}",
924        );
925        let config = find_change(&changes, "config", ChangeType::Modified);
926        assert_eq!(config.entity_type, "property", "after type should reflect new value");
927        find_change(&changes, "watch", ChangeType::Deleted);
928    }
929
930    // ─────────────────────────────────────────────────────────────────────────
931    //  Arrays — opaque (no recursion into elements)
932    // ─────────────────────────────────────────────────────────────────────────
933
934    #[test]
935    fn array_modified_reports_only_the_array_key() {
936        let changes = json_diff(
937            "{\n  \"deps\": [\"react\", \"vue\"]\n}",
938            "{\n  \"deps\": [\"react\", \"vue\", \"lodash\"]\n}",
939        );
940        assert_eq!(names(&changes), vec![("deps".into(), ChangeType::Modified)]);
941    }
942
943    #[test]
944    fn array_renamed_when_contents_unchanged() {
945        let changes = json_diff(
946            "{\n  \"deps\": [\"react\", \"vue\"]\n}",
947            "{\n  \"dependencies\": [\"react\", \"vue\"]\n}",
948        );
949        assert_eq!(changes.len(), 1);
950        assert_eq!(changes[0].change_type, ChangeType::Renamed);
951        assert_eq!(changes[0].entity_name, "dependencies");
952    }
953
954    #[test]
955    fn array_element_keys_are_not_tracked_as_entities() {
956        let before = r#"{
957  "deps": [
958    {"name": "react"},
959    {"name": "vue"}
960  ]
961}"#;
962        let after = r#"{
963  "deps": [
964    {"package": "react"},
965    {"name": "vue"}
966  ]
967}"#;
968        let changes = json_diff(before, after);
969        assert_eq!(names(&changes), vec![("deps".into(), ChangeType::Modified)],
970            "array elements have no stable identity; only the array key should change");
971    }
972
973    #[test]
974    fn root_array_items_are_top_level_entities() {
975        let content = r#"[
976  {"id": 1, "name": "alpha"},
977  "beta",
978  [1, 2]
979]
980"#;
981        let plugin = JsonParserPlugin;
982        let entities = plugin.extract_entities(content, "arr.json");
983
984        assert_eq!(entities.len(), 3);
985
986        assert_eq!(entities[0].id, "arr.json::/0");
987        assert_eq!(entities[0].name, "0");
988        assert_eq!(entities[0].entity_type, "object");
989        assert_eq!(entities[0].parent_id, None);
990        assert_eq!(entities[0].start_line, 2);
991        assert_eq!(entities[0].end_line, 2);
992
993        assert_eq!(entities[1].id, "arr.json::/1");
994        assert_eq!(entities[1].name, "1");
995        assert_eq!(entities[1].entity_type, "array_item");
996        assert_eq!(entities[1].start_line, 3);
997
998        assert_eq!(entities[2].id, "arr.json::/2");
999        assert_eq!(entities[2].name, "2");
1000        assert_eq!(entities[2].entity_type, "array");
1001        assert_eq!(entities[2].start_line, 4);
1002    }
1003
1004    #[test]
1005    fn compact_root_array_items_keep_separate_content() {
1006        let plugin = JsonParserPlugin;
1007        let entities = plugin.extract_entities(r#"[{"id":1},{"id":2}]"#, "arr.json");
1008
1009        assert_eq!(entities.len(), 2);
1010        assert_eq!(entities[0].id, "arr.json::/0");
1011        assert_eq!(entities[0].content, r#"{"id":1}"#);
1012        assert_eq!(entities[0].start_line, 1);
1013        assert_eq!(entities[0].end_line, 1);
1014        assert_eq!(entities[1].id, "arr.json::/1");
1015        assert_eq!(entities[1].content, r#"{"id":2}"#);
1016        assert_eq!(entities[1].start_line, 1);
1017        assert_eq!(entities[1].end_line, 1);
1018    }
1019
1020    #[test]
1021    fn root_array_nested_containers_keep_whole_item_content() {
1022        let plugin = JsonParserPlugin;
1023        let entities = plugin.extract_entities(
1024            r#"[{"id":1,"meta":{"a":true},"list":[{"b":2},3]},[{"nested":4}]]"#,
1025            "arr.json",
1026        );
1027
1028        assert_eq!(entities.len(), 2);
1029        assert_eq!(entities[0].id, "arr.json::/0");
1030        assert_eq!(entities[0].entity_type, "object");
1031        assert_eq!(
1032            entities[0].content,
1033            r#"{"id":1,"meta":{"a":true},"list":[{"b":2},3]}"#
1034        );
1035        assert_eq!(entities[1].id, "arr.json::/1");
1036        assert_eq!(entities[1].entity_type, "array");
1037        assert_eq!(entities[1].content, r#"[{"nested":4}]"#);
1038    }
1039
1040    #[test]
1041    fn compact_root_array_scalars_keep_exact_value_content() {
1042        let plugin = JsonParserPlugin;
1043        let entities = plugin.extract_entities(r#"[1,"two",[3,4]]"#, "arr.json");
1044
1045        assert_eq!(entities.len(), 3);
1046
1047        assert_eq!(entities[0].id, "arr.json::/0");
1048        assert_eq!(entities[0].content, "1");
1049        assert_eq!(entities[0].entity_type, "array_item");
1050
1051        assert_eq!(entities[1].id, "arr.json::/1");
1052        assert_eq!(entities[1].content, r#""two""#);
1053        assert_eq!(entities[1].entity_type, "array_item");
1054
1055        assert_eq!(entities[2].id, "arr.json::/2");
1056        assert_eq!(entities[2].content, "[3,4]");
1057        assert_eq!(entities[2].entity_type, "array");
1058    }
1059
1060    #[test]
1061    fn root_array_items_trim_delimiter_whitespace_from_content() {
1062        let plugin = JsonParserPlugin;
1063        let content = "[1 ,\n  {\"id\": 2}\n]\n";
1064        let entities = plugin.extract_entities(content, "arr.json");
1065
1066        assert_eq!(entities.len(), 2);
1067        assert_eq!(entities[0].content, "1");
1068        assert_eq!(entities[0].start_line, 1);
1069        assert_eq!(entities[0].end_line, 1);
1070        assert_eq!(entities[1].content, "{\"id\": 2}");
1071        assert_eq!(entities[1].start_line, 2);
1072        assert_eq!(entities[1].end_line, 2);
1073    }
1074
1075    #[test]
1076    fn root_array_item_at_eof_is_preserved_for_truncated_json() {
1077        let plugin = JsonParserPlugin;
1078        let entities = plugin.extract_entities("[1", "arr.json");
1079
1080        assert_eq!(entities.len(), 1);
1081        assert_eq!(entities[0].id, "arr.json::/0");
1082        assert_eq!(entities[0].content, "1");
1083        assert_eq!(entities[0].entity_type, "array_item");
1084    }
1085
1086    #[test]
1087    fn root_array_item_modified_reports_the_index() {
1088        let changes = json_diff(
1089            "[\n  {\"id\": 1, \"name\": \"alpha\"}\n]",
1090            "[\n  {\"id\": 1, \"name\": \"beta\"}\n]",
1091        );
1092
1093        assert_eq!(names(&changes), vec![("0".into(), ChangeType::Modified)]);
1094        assert_eq!(changes[0].entity_id, "test.json::/0");
1095        assert_eq!(changes[0].entity_type, "object");
1096    }
1097
1098    #[test]
1099    fn root_array_item_added_from_empty_array() {
1100        let changes = json_diff("[]", "[\n  {\"id\": 1}\n]");
1101
1102        assert_eq!(names(&changes), vec![("0".into(), ChangeType::Added)]);
1103        assert_eq!(changes[0].entity_id, "test.json::/0");
1104    }
1105
1106    // ─────────────────────────────────────────────────────────────────────────
1107    //  Null and empty values
1108    // ─────────────────────────────────────────────────────────────────────────
1109
1110    #[test]
1111    fn null_to_string_value_reports_modified() {
1112        let changes = json_diff(
1113            "{\n  \"key\": null\n}",
1114            "{\n  \"key\": \"value\"\n}",
1115        );
1116        assert_eq!(names(&changes), vec![("key".into(), ChangeType::Modified)]);
1117    }
1118
1119    #[test]
1120    fn empty_object_gains_child_reports_both_parent_and_child() {
1121        // The precision guard keeps `key` Modified — its declaration shape
1122        // changed from `{}` to `{...}`.
1123        let changes = json_diff(
1124            "{\n  \"key\": {}\n}",
1125            "{\n  \"key\": {\n    \"build\": \"tsc\"\n  }\n}",
1126        );
1127        let key = find_change(&changes, "key", ChangeType::Modified);
1128        assert_eq!(key.parent_name, None);
1129        let build = find_change(&changes, "build", ChangeType::Added);
1130        assert_eq!(build.parent_name.as_deref(), Some("key"));
1131    }
1132
1133    // ─────────────────────────────────────────────────────────────────────────
1134    //  Entity ID format — file::pointer (no entity_type)
1135    // ─────────────────────────────────────────────────────────────────────────
1136
1137    #[test]
1138    fn entity_id_for_nested_property_uses_full_pointer_only() {
1139        let changes = json_diff(
1140            "{\n  \"scripts\": {\n    \"build\": \"tsc\"\n  }\n}",
1141            "{\n  \"scripts\": {\n    \"build\": \"webpack\"\n  }\n}",
1142        );
1143        let build = find_change(&changes, "build", ChangeType::Modified);
1144        assert_eq!(build.entity_id, "test.json::/scripts/build");
1145    }
1146
1147
1148    // ─────────────────────────────────────────────────────────────────────────
1149    //  Phase 3 fuzzy matching
1150    // ─────────────────────────────────────────────────────────────────────────
1151
1152    #[test]
1153    fn fuzzy_rename_detected_when_value_mostly_unchanged() {
1154        // config → settings: key rename (Phase 1 & 2 miss).
1155        // testTimeout 30 → 60: small value change rules out structural_hash.
1156        // Many siblings unchanged → Jaccard > 0.8 → Phase 3 catches it.
1157        let before = r#"{
1158  "config": {
1159    "host": "localhost",
1160    "protocol": "https",
1161    "retries": 3,
1162    "testTimeout": 30,
1163    "keepalive": true,
1164    "compression": true,
1165    "logging": "verbose",
1166    "maxConnections": 100
1167  }
1168}"#;
1169        let after = r#"{
1170  "settings": {
1171    "host": "localhost",
1172    "protocol": "https",
1173    "retries": 3,
1174    "testTimeout": 60,
1175    "keepalive": true,
1176    "compression": true,
1177    "logging": "verbose",
1178    "maxConnections": 100
1179  }
1180}"#;
1181        let changes = json_diff(before, after);
1182        assert!(changes.iter().any(|c| c.entity_name == "settings" && c.change_type == ChangeType::Renamed),
1183            "expected fuzzy rename of config → settings; got: {:?}", names(&changes));
1184    }
1185
1186    // ─────────────────────────────────────────────────────────────────────────
1187    //  Known limitations (documented in spec)
1188    // ─────────────────────────────────────────────────────────────────────────
1189
1190    #[test]
1191    fn parent_rename_with_sibling_added_surfaces_leaf_moves() {
1192        // Parent renamed AND a new sibling appears: structural_hash diverges,
1193        // Phase 2 misses the parent rename. The unchanged child still matches
1194        // by structural_hash and surfaces as Moved; the parent Deleted/Added
1195        // entries are container-suppressed.
1196        let before = r#"{
1197  "scripts": {
1198    "build": "tsc"
1199  }
1200}"#;
1201        let after = r#"{
1202  "tasks": {
1203    "build": "tsc",
1204    "test": "jest"
1205  }
1206}"#;
1207        let changes = json_diff(before, after);
1208        let build = find_change(&changes, "build", ChangeType::Moved);
1209        assert_eq!(build.parent_name.as_deref(), Some("tasks"));
1210        assert!(build.old_parent_id.is_some());
1211        find_change(&changes, "test", ChangeType::Added);
1212        assert!(!changes.iter().any(|c| c.entity_name == "scripts" || c.entity_name == "tasks"),
1213            "parent Deleted/Added should be suppressed; got: {:?}", names(&changes));
1214    }
1215
1216    #[test]
1217    fn scalar_array_transitions_report_modified_only() {
1218        // Arrays are opaque, so the type transition surfaces as a single
1219        // Modified entry with entity_type reflecting the after value.
1220        let cases = [
1221            ("{\n  \"deps\": \"react\"\n}", "{\n  \"deps\": [\"react\", \"vue\"]\n}", "array"),
1222            ("{\n  \"deps\": [\"react\", \"vue\"]\n}", "{\n  \"deps\": \"react\"\n}", "property"),
1223        ];
1224        for (before, after, after_type) in cases {
1225            let changes = json_diff(before, after);
1226            assert_eq!(names(&changes), vec![("deps".into(), ChangeType::Modified)]);
1227            assert_eq!(changes[0].entity_type, after_type);
1228        }
1229    }
1230
1231    #[test]
1232    fn object_to_array_transition_reports_modified_plus_old_children_deleted() {
1233        let changes = json_diff(
1234            "{\n  \"deps\": {\n    \"react\": \"18\"\n  }\n}",
1235            "{\n  \"deps\": [\"react\"]\n}",
1236        );
1237        let deps = find_change(&changes, "deps", ChangeType::Modified);
1238        assert_eq!(deps.entity_type, "array");
1239        find_change(&changes, "react", ChangeType::Deleted);
1240    }
1241
1242    #[test]
1243    fn array_to_object_transition_reports_modified_plus_new_children_added() {
1244        let changes = json_diff(
1245            "{\n  \"deps\": [\"react\"]\n}",
1246            "{\n  \"deps\": {\n    \"react\": \"18\"\n  }\n}",
1247        );
1248        let deps = find_change(&changes, "deps", ChangeType::Modified);
1249        assert_eq!(deps.entity_type, "object");
1250        let react = find_change(&changes, "react", ChangeType::Added);
1251        assert_eq!(react.parent_name.as_deref(), Some("deps"));
1252    }
1253
1254    #[test]
1255    fn deep_whole_section_deleted_only_leaf_reported() {
1256        let changes = json_diff(
1257            "{\n  \"jest\": {\n    \"config\": {\n      \"testTimeout\": 5000\n    }\n  }\n}",
1258            "{}",
1259        );
1260        let timeout = find_change(&changes, "testTimeout", ChangeType::Deleted);
1261        assert_eq!(timeout.parent_name.as_deref(), Some("jest::config"));
1262        assert!(!changes.iter().any(|c| c.entity_name == "jest" || c.entity_name == "config"),
1263            "intermediate containers should be suppressed; got: {:?}", names(&changes));
1264    }
1265
1266    #[test]
1267    fn pointer_escapes_preserve_rfc6901_order() {
1268        // '~' must be escaped before '/'. Otherwise a literal '/' would become
1269        // '~1' and the '~' inside that would then become '~01'.
1270        let cases = [
1271            ("a/b", "test.json::/a~1b"),
1272            ("a~b", "test.json::/a~0b"),
1273            ("a~/b", "test.json::/a~0~1b"),
1274        ];
1275        for (key, expected_id) in cases {
1276            let changes = json_diff(
1277                &format!("{{\n  \"{key}\": 1\n}}"),
1278                &format!("{{\n  \"{key}\": 2\n}}"),
1279            );
1280            assert_eq!(changes.len(), 1);
1281            assert_eq!(changes[0].entity_id, expected_id, "key {key}");
1282        }
1283    }
1284
1285    // ─────────────────────────────────────────────────────────────────────────
1286    //  Document-level edge cases
1287    // ─────────────────────────────────────────────────────────────────────────
1288
1289    #[test]
1290    fn empty_object_and_array_produce_no_entities() {
1291        let plugin = JsonParserPlugin;
1292        for input in ["{}", "[]"] {
1293            assert!(
1294                plugin.extract_entities(input, "test.json").is_empty(),
1295                "input: {input}"
1296            );
1297        }
1298    }
1299
1300    #[test]
1301    fn root_scalars_produce_document_chunk() {
1302        let plugin = JsonParserPlugin;
1303        for input in ["\"hello\"", "42", "null"] {
1304            let entities = plugin.extract_entities(input, "test.json");
1305            assert_eq!(entities.len(), 1, "input: {input}");
1306            assert_eq!(entities[0].id, "test.json::chunk::(document)");
1307            assert_eq!(entities[0].entity_type, "chunk");
1308            assert_eq!(entities[0].name, "(document)");
1309            assert_eq!(entities[0].start_line, 1);
1310            assert_eq!(entities[0].end_line, 1);
1311        }
1312    }
1313
1314    #[test]
1315    fn root_scalar_change_reports_document_modified() {
1316        let changes = json_diff("42", "43");
1317
1318        assert_eq!(names(&changes), vec![("(document)".into(), ChangeType::Modified)]);
1319        assert_eq!(changes[0].entity_type, "chunk");
1320    }
1321
1322    #[test]
1323    fn malformed_input_does_not_panic() {
1324        let plugin = JsonParserPlugin;
1325        let cases = [
1326            "{",                                 // unclosed root
1327            "{\"a\":",                           // dangling colon
1328            "{\"a\": {",                         // unclosed nested object
1329            "{\"a\": {] }}",                     // stray ']' inside object value
1330            "{\"a\": {\"b\": [}]}",              // mismatched brackets in array
1331            "{\"a\": }}}}",                      // multiple stray '}'
1332            "{\"a\": {\"b\": 1}, \"c\":",        // truncated mid-object
1333        ];
1334        for input in cases {
1335            let _ = plugin.extract_entities(input, "test.json");
1336        }
1337    }
1338
1339    #[test]
1340    fn parent_rename_with_child_value_change_falls_back_to_leaf_delete_add() {
1341        let changes = json_diff(
1342            "{\n  \"scripts\": {\n    \"dev\": \"vite\"\n  }\n}\n",
1343            "{\n  \"tasks\": {\n    \"dev\": \"rollup\"\n  }\n}\n",
1344        );
1345        find_change(&changes, "dev", ChangeType::Deleted);
1346        find_change(&changes, "dev", ChangeType::Added);
1347        assert!(!changes.iter().any(|c| c.change_type == ChangeType::Renamed),
1348            "rename should not be detectable; got: {:?}", names(&changes));
1349    }
1350}