Skip to main content

sem_core/parser/plugins/
json.rs

1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct JsonParserPlugin;
6
7impl SemanticParserPlugin for JsonParserPlugin {
8    fn id(&self) -> &str {
9        "json"
10    }
11
12    fn extensions(&self) -> &[&str] {
13        &[".json"]
14    }
15
16    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17        // Only extract top-level properties from JSON objects.
18        // We scan the source text directly to get accurate line positions,
19        // which weave needs for entity-level merge reconstruction.
20        let trimmed = content.trim();
21        if !trimmed.starts_with('{') {
22            return Vec::new();
23        }
24
25        let lines: Vec<&str> = content.lines().collect();
26        let entries = find_top_level_entries(content);
27
28        let mut entities = Vec::new();
29        for (i, entry) in entries.iter().enumerate() {
30            let end_line = if i + 1 < entries.len() {
31                // End just before the next entry starts (minus trailing blank/comma lines)
32                let next_start = entries[i + 1].start_line;
33                trim_trailing_blanks(&lines, entry.start_line, next_start)
34            } else {
35                // Last entry: end before the closing brace
36                let closing = find_closing_brace_line(&lines);
37                trim_trailing_blanks(&lines, entry.start_line, closing)
38            };
39
40            let entity_content = lines[entry.start_line - 1..end_line]
41                .join("\n");
42
43            // Compute a structural_hash over just the value (excluding the key name)
44            // so that rename detection works: "timeout": 30 → "request_timeout": 30
45            let value_content = extract_value_content(&entity_content);
46            let structural_hash = Some(content_hash(value_content));
47
48            entities.push(SemanticEntity {
49                id: build_entity_id(file_path, &entry.entity_type, &entry.pointer, None),
50                file_path: file_path.to_string(),
51                entity_type: entry.entity_type.clone(),
52                name: entry.key.clone(),
53                parent_id: None,
54                content_hash: content_hash(&entity_content),
55                structural_hash,
56                content: entity_content,
57                start_line: entry.start_line,
58                end_line,
59                metadata: None,
60            });
61        }
62
63        entities
64    }
65}
66
67struct JsonEntry {
68    key: String,
69    pointer: String,
70    entity_type: String,
71    start_line: usize, // 1-based
72}
73
74/// Scan the source text to find each top-level key in the root JSON object.
75/// Returns entries with accurate start_line positions.
76fn find_top_level_entries(content: &str) -> Vec<JsonEntry> {
77    let mut entries = Vec::new();
78    let mut depth = 0;
79    let mut in_string = false;
80    let mut escape_next = false;
81    let mut line_num: usize = 1;
82
83    // State for tracking when we find a key at depth 1
84    let mut current_key: Option<String> = None;
85    let mut key_start = false;
86    let mut key_buf = String::new();
87    let mut reading_key = false;
88
89    for ch in content.chars() {
90        if ch == '\n' {
91            line_num += 1;
92            continue;
93        }
94
95        if escape_next {
96            if reading_key {
97                key_buf.push(ch);
98            }
99            escape_next = false;
100            continue;
101        }
102
103        if ch == '\\' && in_string {
104            if reading_key {
105                key_buf.push(ch);
106            }
107            escape_next = true;
108            continue;
109        }
110
111        if in_string {
112            if ch == '"' {
113                in_string = false;
114                if reading_key {
115                    reading_key = false;
116                    current_key = Some(key_buf.clone());
117                    key_buf.clear();
118                }
119            } else if reading_key {
120                key_buf.push(ch);
121            }
122            continue;
123        }
124
125        match ch {
126            '"' => {
127                in_string = true;
128                // At depth 1, a string could be a key (before ':') or value (after ':')
129                if depth == 1 && current_key.is_none() && !key_start {
130                    reading_key = true;
131                    key_buf.clear();
132                }
133            }
134            ':' => {
135                if depth == 1 {
136                    if let Some(ref key) = current_key {
137                        // Found a key: value pair at depth 1
138                        let escaped_key = key.replace('~', "~0").replace('/', "~1");
139                        let pointer = format!("/{escaped_key}");
140                        entries.push(JsonEntry {
141                            key: key.clone(),
142                            pointer,
143                            entity_type: String::new(), // filled in below
144                            start_line: line_num,
145                        });
146                        key_start = true;
147                    }
148                }
149            }
150            '{' | '[' => {
151                depth += 1;
152                if depth == 2 && key_start {
153                    // The value for this key is an object/array
154                    if let Some(entry) = entries.last_mut() {
155                        entry.entity_type = "object".to_string();
156                    }
157                }
158            }
159            '}' | ']' => {
160                depth -= 1;
161            }
162            ',' => {
163                if depth == 1 {
164                    // End of a top-level entry
165                    if let Some(entry) = entries.last_mut() {
166                        if entry.entity_type.is_empty() {
167                            entry.entity_type = "property".to_string();
168                        }
169                    }
170                    current_key = None;
171                    key_start = false;
172                }
173            }
174            _ => {}
175        }
176    }
177
178    // Handle last entry (no trailing comma)
179    if let Some(entry) = entries.last_mut() {
180        if entry.entity_type.is_empty() {
181            entry.entity_type = "property".to_string();
182        }
183    }
184
185    entries
186}
187
188/// Extract just the value portion of a `"key": value` entity content string,
189/// stripping the key name so that renamed keys with identical values share the
190/// same structural_hash and are detected as renames rather than delete + add.
191fn extract_value_content(content: &str) -> &str {
192    let mut in_string = false;
193    let mut escape_next = false;
194    for (i, ch) in content.char_indices() {
195        if escape_next {
196            escape_next = false;
197            continue;
198        }
199        if ch == '\\' && in_string {
200            escape_next = true;
201            continue;
202        }
203        if ch == '"' {
204            in_string = !in_string;
205        }
206        if ch == ':' && !in_string {
207            let rest = content[i + 1..].trim();
208            return rest.trim_end_matches(',').trim();
209        }
210    }
211    content
212}
213
214/// Find the line number (1-based) of the closing `}` of the root object.
215fn find_closing_brace_line(lines: &[&str]) -> usize {
216    for (i, line) in lines.iter().enumerate().rev() {
217        if line.trim() == "}" {
218            return i + 1;
219        }
220    }
221    lines.len()
222}
223
224/// Walk backwards from next_start to skip trailing blank lines and commas,
225/// returning the end_line (1-based, inclusive) for the current entry.
226fn trim_trailing_blanks(lines: &[&str], start: usize, next_start: usize) -> usize {
227    let mut end = next_start - 1;
228    while end > start {
229        let trimmed = lines[end - 1].trim();
230        if trimmed.is_empty() || trimmed == "," {
231            end -= 1;
232        } else {
233            break;
234        }
235    }
236    end
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use crate::model::change::ChangeType;
243    use crate::model::identity::match_entities;
244
245    #[test]
246    fn test_json_line_positions() {
247        let content = r#"{
248  "name": "my-app",
249  "version": "1.0.0",
250  "scripts": {
251    "build": "tsc",
252    "test": "jest"
253  },
254  "description": "a test app"
255}
256"#;
257        let plugin = JsonParserPlugin;
258        let entities = plugin.extract_entities(content, "package.json");
259
260        assert_eq!(entities.len(), 4);
261
262        assert_eq!(entities[0].name, "name");
263        assert_eq!(entities[0].start_line, 2);
264        assert_eq!(entities[0].end_line, 2);
265
266        assert_eq!(entities[1].name, "version");
267        assert_eq!(entities[1].start_line, 3);
268        assert_eq!(entities[1].end_line, 3);
269
270        assert_eq!(entities[2].name, "scripts");
271        assert_eq!(entities[2].entity_type, "object");
272        assert_eq!(entities[2].start_line, 4);
273        assert_eq!(entities[2].end_line, 7);
274
275        assert_eq!(entities[3].name, "description");
276        assert_eq!(entities[3].start_line, 8);
277        assert_eq!(entities[3].end_line, 8);
278    }
279
280    #[test]
281    fn test_rename_detected_end_to_end() {
282        let before_content = "{\n  \"timeout\": 30\n}\n";
283        let after_content = "{\n  \"request_timeout\": 30\n}\n";
284        let plugin = JsonParserPlugin;
285        let before = plugin.extract_entities(before_content, "config.json");
286        let after = plugin.extract_entities(after_content, "config.json");
287        let result = match_entities(&before, &after, "config.json", None, None, None);
288        assert_eq!(result.changes.len(), 1);
289        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
290        assert_eq!(result.changes[0].entity_name, "request_timeout");
291    }
292
293    #[test]
294    fn test_renamed_scalar_property_shares_structural_hash() {
295        let before_content = "{\n  \"timeout\": 30\n}\n";
296        let after_content = "{\n  \"request_timeout\": 30\n}\n";
297        let plugin = JsonParserPlugin;
298        let before = plugin.extract_entities(before_content, "config.json");
299        let after = plugin.extract_entities(after_content, "config.json");
300        assert_eq!(before.len(), 1);
301        assert_eq!(after.len(), 1);
302        // content_hash differs (key name is part of content)
303        assert_ne!(before[0].content_hash, after[0].content_hash);
304        // structural_hash matches (same value)
305        assert_eq!(before[0].structural_hash, after[0].structural_hash);
306    }
307
308    #[test]
309    fn test_renamed_object_property_shares_structural_hash() {
310        let before_content = "{\n  \"config\": {\n    \"port\": 8080\n  }\n}\n";
311        let after_content = "{\n  \"settings\": {\n    \"port\": 8080\n  }\n}\n";
312        let plugin = JsonParserPlugin;
313        let before = plugin.extract_entities(before_content, "config.json");
314        let after = plugin.extract_entities(after_content, "config.json");
315        assert_eq!(before.len(), 1);
316        assert_eq!(after.len(), 1);
317        assert_ne!(before[0].content_hash, after[0].content_hash);
318        assert_eq!(before[0].structural_hash, after[0].structural_hash);
319    }
320}