1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct JsonParserPlugin;
6
7impl SemanticParserPlugin for JsonParserPlugin {
8 fn id(&self) -> &str {
9 "json"
10 }
11
12 fn extensions(&self) -> &[&str] {
13 &[".json"]
14 }
15
16 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17 let trimmed = content.trim();
22 if !trimmed.starts_with('{') {
23 return Vec::new();
24 }
25
26 let lines: Vec<&str> = content.lines().collect();
27 let entries = find_top_level_entries(content);
28 let closing = find_closing_brace_line(&lines);
29
30 let mut entities = Vec::new();
31 for (i, entry) in entries.iter().enumerate() {
32 let end_line = if i + 1 < entries.len() {
33 let next_start = entries[i + 1].start_line;
34 trim_trailing_blanks(&lines, entry.start_line, next_start)
35 } else {
36 trim_trailing_blanks(&lines, entry.start_line, closing)
37 };
38
39 let entity_content = lines[entry.start_line - 1..end_line]
40 .join("\n");
41
42 let value_content = extract_value_content(&entity_content);
43 let structural_hash = Some(content_hash(value_content));
44
45 let parent_id = build_entity_id(file_path, &entry.entity_type, &entry.pointer, None);
46
47 entities.push(SemanticEntity {
48 id: parent_id.clone(),
49 file_path: file_path.to_string(),
50 entity_type: entry.entity_type.clone(),
51 name: entry.key.clone(),
52 parent_id: None,
53 content_hash: content_hash(&entity_content),
54 structural_hash,
55 content: entity_content.clone(),
56 start_line: entry.start_line,
57 end_line,
58 metadata: None,
59 });
60
61 if entry.entity_type == "object" {
63 let nested = find_nested_object_entries(&entity_content, entry.start_line);
64 for (j, nentry) in nested.iter().enumerate() {
65 let child_end = if j + 1 < nested.len() {
66 trim_trailing_blanks(&lines, nentry.start_line, nested[j + 1].start_line)
67 } else {
68 trim_trailing_blanks(&lines, nentry.start_line, end_line)
69 };
70
71 let child_content = lines[nentry.start_line - 1..child_end].join("\n");
72 let child_value = extract_value_content(&child_content);
73
74 entities.push(SemanticEntity {
75 id: build_entity_id(file_path, &nentry.entity_type, &nentry.key, Some(&parent_id)),
76 file_path: file_path.to_string(),
77 entity_type: nentry.entity_type.clone(),
78 name: nentry.key.clone(),
79 parent_id: Some(parent_id.clone()),
80 content_hash: content_hash(&child_content),
81 structural_hash: Some(content_hash(child_value)),
82 content: child_content,
83 start_line: nentry.start_line,
84 end_line: child_end,
85 metadata: None,
86 });
87 }
88 }
89 }
90
91 entities
92 }
93}
94
95struct JsonEntry {
96 key: String,
97 pointer: String,
98 entity_type: String,
99 start_line: usize, }
101
102fn find_top_level_entries(content: &str) -> Vec<JsonEntry> {
105 let mut entries = Vec::new();
106 let mut depth = 0;
107 let mut in_string = false;
108 let mut escape_next = false;
109 let mut line_num: usize = 1;
110
111 let mut current_key: Option<String> = None;
113 let mut key_start = false;
114 let mut key_buf = String::new();
115 let mut reading_key = false;
116
117 for ch in content.chars() {
118 if ch == '\n' {
119 line_num += 1;
120 continue;
121 }
122
123 if escape_next {
124 if reading_key {
125 key_buf.push(ch);
126 }
127 escape_next = false;
128 continue;
129 }
130
131 if ch == '\\' && in_string {
132 if reading_key {
133 key_buf.push(ch);
134 }
135 escape_next = true;
136 continue;
137 }
138
139 if in_string {
140 if ch == '"' {
141 in_string = false;
142 if reading_key {
143 reading_key = false;
144 current_key = Some(key_buf.clone());
145 key_buf.clear();
146 }
147 } else if reading_key {
148 key_buf.push(ch);
149 }
150 continue;
151 }
152
153 match ch {
154 '"' => {
155 in_string = true;
156 if depth == 1 && current_key.is_none() && !key_start {
158 reading_key = true;
159 key_buf.clear();
160 }
161 }
162 ':' => {
163 if depth == 1 {
164 if let Some(ref key) = current_key {
165 let escaped_key = key.replace('~', "~0").replace('/', "~1");
167 let pointer = format!("/{escaped_key}");
168 entries.push(JsonEntry {
169 key: key.clone(),
170 pointer,
171 entity_type: String::new(), start_line: line_num,
173 });
174 key_start = true;
175 }
176 }
177 }
178 '{' | '[' => {
179 depth += 1;
180 if depth == 2 && key_start {
181 if let Some(entry) = entries.last_mut() {
183 entry.entity_type = "object".to_string();
184 }
185 }
186 }
187 '}' | ']' => {
188 depth -= 1;
189 }
190 ',' => {
191 if depth == 1 {
192 if let Some(entry) = entries.last_mut() {
194 if entry.entity_type.is_empty() {
195 entry.entity_type = "property".to_string();
196 }
197 }
198 current_key = None;
199 key_start = false;
200 }
201 }
202 _ => {}
203 }
204 }
205
206 if let Some(entry) = entries.last_mut() {
208 if entry.entity_type.is_empty() {
209 entry.entity_type = "property".to_string();
210 }
211 }
212
213 entries
214}
215
216fn find_nested_object_entries(entity_content: &str, base_line: usize) -> Vec<JsonEntry> {
219 let mut entries = Vec::new();
220 let mut in_string = false;
221 let mut escape_next = false;
222 let mut line_num: usize = 0; let mut found_outer_colon = false;
224 let mut found_value_start = false;
225 let mut value_depth: usize = 0;
226 let mut current_key: Option<String> = None;
227 let mut reading_key = false;
228 let mut key_buf = String::new();
229 let mut key_start = false;
230
231 for ch in entity_content.chars() {
232 if ch == '\n' {
233 line_num += 1;
234 continue;
235 }
236
237 if escape_next {
238 if reading_key {
239 key_buf.push(ch);
240 }
241 escape_next = false;
242 continue;
243 }
244
245 if ch == '\\' && in_string {
246 if reading_key {
247 key_buf.push(ch);
248 }
249 escape_next = true;
250 continue;
251 }
252
253 if in_string {
254 if ch == '"' {
255 in_string = false;
256 if reading_key {
257 reading_key = false;
258 current_key = Some(key_buf.clone());
259 key_buf.clear();
260 }
261 } else if reading_key {
262 key_buf.push(ch);
263 }
264 continue;
265 }
266
267 if !found_value_start {
268 match ch {
269 '"' => {
270 in_string = true;
271 }
272 ':' => {
273 found_outer_colon = true;
274 }
275 '{' if found_outer_colon => {
276 found_value_start = true;
277 value_depth = 1;
278 }
279 _ => {}
280 }
281 continue;
282 }
283
284 match ch {
285 '"' => {
286 in_string = true;
287 if value_depth == 1 && current_key.is_none() && !key_start {
288 reading_key = true;
289 key_buf.clear();
290 }
291 }
292 ':' => {
293 if value_depth == 1 {
294 if let Some(ref key) = current_key {
295 entries.push(JsonEntry {
296 key: key.clone(),
297 pointer: String::new(),
298 entity_type: "property".to_string(),
299 start_line: base_line + line_num,
300 });
301 key_start = true;
302 }
303 }
304 }
305 '{' | '[' => {
306 value_depth += 1;
307 }
308 '}' | ']' => {
309 value_depth -= 1;
310 if value_depth == 0 {
311 break;
312 }
313 }
314 ',' => {
315 if value_depth == 1 {
316 current_key = None;
317 key_start = false;
318 }
319 }
320 _ => {}
321 }
322 }
323
324 entries
325}
326
327fn extract_value_content(content: &str) -> &str {
331 let mut in_string = false;
332 let mut escape_next = false;
333 for (i, ch) in content.char_indices() {
334 if escape_next {
335 escape_next = false;
336 continue;
337 }
338 if ch == '\\' && in_string {
339 escape_next = true;
340 continue;
341 }
342 if ch == '"' {
343 in_string = !in_string;
344 }
345 if ch == ':' && !in_string {
346 let rest = content[i + 1..].trim();
347 return rest.trim_end_matches(',').trim();
348 }
349 }
350 content
351}
352
353fn find_closing_brace_line(lines: &[&str]) -> usize {
355 for (i, line) in lines.iter().enumerate().rev() {
356 if line.trim() == "}" {
357 return i + 1;
358 }
359 }
360 lines.len()
361}
362
363fn trim_trailing_blanks(lines: &[&str], start: usize, next_start: usize) -> usize {
366 let mut end = next_start - 1;
367 while end > start {
368 let trimmed = lines[end - 1].trim();
369 if trimmed.is_empty() || trimmed == "," {
370 end -= 1;
371 } else {
372 break;
373 }
374 }
375 end
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381 use crate::model::change::ChangeType;
382 use crate::model::identity::match_entities;
383
384 #[test]
385 fn test_json_line_positions() {
386 let content = r#"{
387 "name": "my-app",
388 "version": "1.0.0",
389 "scripts": {
390 "build": "tsc",
391 "test": "jest"
392 },
393 "description": "a test app"
394}
395"#;
396 let plugin = JsonParserPlugin;
397 let entities = plugin.extract_entities(content, "package.json");
398
399 assert_eq!(entities.len(), 6);
400
401 assert_eq!(entities[0].name, "name");
402 assert_eq!(entities[0].start_line, 2);
403 assert_eq!(entities[0].end_line, 2);
404 assert!(entities[0].parent_id.is_none());
405
406 assert_eq!(entities[1].name, "version");
407 assert_eq!(entities[1].start_line, 3);
408 assert_eq!(entities[1].end_line, 3);
409
410 assert_eq!(entities[2].name, "scripts");
411 assert_eq!(entities[2].entity_type, "object");
412 assert_eq!(entities[2].start_line, 4);
413 assert_eq!(entities[2].end_line, 7);
414
415 assert_eq!(entities[3].name, "build");
417 assert_eq!(entities[3].start_line, 5);
418 assert_eq!(entities[3].end_line, 5);
419 assert_eq!(entities[3].parent_id.as_deref(), Some(&entities[2].id as &str));
420
421 assert_eq!(entities[4].name, "test");
422 assert_eq!(entities[4].start_line, 6);
423 assert_eq!(entities[4].end_line, 6);
424 assert_eq!(entities[4].parent_id.as_deref(), Some(&entities[2].id as &str));
425
426 assert_eq!(entities[5].name, "description");
427 assert_eq!(entities[5].start_line, 8);
428 assert_eq!(entities[5].end_line, 8);
429 }
430
431 #[test]
432 fn test_rename_detected_end_to_end() {
433 let before_content = "{\n \"timeout\": 30\n}\n";
434 let after_content = "{\n \"request_timeout\": 30\n}\n";
435 let plugin = JsonParserPlugin;
436 let before = plugin.extract_entities(before_content, "config.json");
437 let after = plugin.extract_entities(after_content, "config.json");
438 let result = match_entities(&before, &after, "config.json", None, None, None);
439 assert_eq!(result.changes.len(), 1);
440 assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
441 assert_eq!(result.changes[0].entity_name, "request_timeout");
442 }
443
444 #[test]
445 fn test_renamed_scalar_property_shares_structural_hash() {
446 let before_content = "{\n \"timeout\": 30\n}\n";
447 let after_content = "{\n \"request_timeout\": 30\n}\n";
448 let plugin = JsonParserPlugin;
449 let before = plugin.extract_entities(before_content, "config.json");
450 let after = plugin.extract_entities(after_content, "config.json");
451 assert_eq!(before.len(), 1);
452 assert_eq!(after.len(), 1);
453 assert_ne!(before[0].content_hash, after[0].content_hash);
455 assert_eq!(before[0].structural_hash, after[0].structural_hash);
457 }
458
459 #[test]
460 fn test_renamed_object_property_shares_structural_hash() {
461 let before_content = "{\n \"config\": {\n \"port\": 8080\n }\n}\n";
462 let after_content = "{\n \"settings\": {\n \"port\": 8080\n }\n}\n";
463 let plugin = JsonParserPlugin;
464 let before = plugin.extract_entities(before_content, "config.json");
465 let after = plugin.extract_entities(after_content, "config.json");
466 assert_eq!(before.len(), 2);
468 assert_eq!(after.len(), 2);
469 assert_ne!(before[0].content_hash, after[0].content_hash);
470 assert_eq!(before[0].structural_hash, after[0].structural_hash);
471 }
472}