sem_core/parser/plugins/
json.rs1use crate::model::entity::{build_entity_id, SemanticEntity};
2use crate::parser::plugin::SemanticParserPlugin;
3use crate::utils::hash::content_hash;
4
5pub struct JsonParserPlugin;
6
7impl SemanticParserPlugin for JsonParserPlugin {
8 fn id(&self) -> &str {
9 "json"
10 }
11
12 fn extensions(&self) -> &[&str] {
13 &[".json"]
14 }
15
16 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
17 let trimmed = content.trim();
21 if !trimmed.starts_with('{') {
22 return Vec::new();
23 }
24
25 let lines: Vec<&str> = content.lines().collect();
26 let entries = find_top_level_entries(content);
27
28 let mut entities = Vec::new();
29 for (i, entry) in entries.iter().enumerate() {
30 let end_line = if i + 1 < entries.len() {
31 let next_start = entries[i + 1].start_line;
33 trim_trailing_blanks(&lines, entry.start_line, next_start)
34 } else {
35 let closing = find_closing_brace_line(&lines);
37 trim_trailing_blanks(&lines, entry.start_line, closing)
38 };
39
40 let entity_content = lines[entry.start_line - 1..end_line]
41 .join("\n");
42
43 let value_content = extract_value_content(&entity_content);
46 let structural_hash = Some(content_hash(value_content));
47
48 entities.push(SemanticEntity {
49 id: build_entity_id(file_path, &entry.entity_type, &entry.pointer, None),
50 file_path: file_path.to_string(),
51 entity_type: entry.entity_type.clone(),
52 name: entry.key.clone(),
53 parent_id: None,
54 content_hash: content_hash(&entity_content),
55 structural_hash,
56 content: entity_content,
57 start_line: entry.start_line,
58 end_line,
59 metadata: None,
60 });
61 }
62
63 entities
64 }
65}
66
67struct JsonEntry {
68 key: String,
69 pointer: String,
70 entity_type: String,
71 start_line: usize, }
73
74fn find_top_level_entries(content: &str) -> Vec<JsonEntry> {
77 let mut entries = Vec::new();
78 let mut depth = 0;
79 let mut in_string = false;
80 let mut escape_next = false;
81 let mut line_num: usize = 1;
82
83 let mut current_key: Option<String> = None;
85 let mut key_start = false;
86 let mut key_buf = String::new();
87 let mut reading_key = false;
88
89 for ch in content.chars() {
90 if ch == '\n' {
91 line_num += 1;
92 continue;
93 }
94
95 if escape_next {
96 if reading_key {
97 key_buf.push(ch);
98 }
99 escape_next = false;
100 continue;
101 }
102
103 if ch == '\\' && in_string {
104 if reading_key {
105 key_buf.push(ch);
106 }
107 escape_next = true;
108 continue;
109 }
110
111 if in_string {
112 if ch == '"' {
113 in_string = false;
114 if reading_key {
115 reading_key = false;
116 current_key = Some(key_buf.clone());
117 key_buf.clear();
118 }
119 } else if reading_key {
120 key_buf.push(ch);
121 }
122 continue;
123 }
124
125 match ch {
126 '"' => {
127 in_string = true;
128 if depth == 1 && current_key.is_none() && !key_start {
130 reading_key = true;
131 key_buf.clear();
132 }
133 }
134 ':' => {
135 if depth == 1 {
136 if let Some(ref key) = current_key {
137 let escaped_key = key.replace('~', "~0").replace('/', "~1");
139 let pointer = format!("/{escaped_key}");
140 entries.push(JsonEntry {
141 key: key.clone(),
142 pointer,
143 entity_type: String::new(), start_line: line_num,
145 });
146 key_start = true;
147 }
148 }
149 }
150 '{' | '[' => {
151 depth += 1;
152 if depth == 2 && key_start {
153 if let Some(entry) = entries.last_mut() {
155 entry.entity_type = "object".to_string();
156 }
157 }
158 }
159 '}' | ']' => {
160 depth -= 1;
161 }
162 ',' => {
163 if depth == 1 {
164 if let Some(entry) = entries.last_mut() {
166 if entry.entity_type.is_empty() {
167 entry.entity_type = "property".to_string();
168 }
169 }
170 current_key = None;
171 key_start = false;
172 }
173 }
174 _ => {}
175 }
176 }
177
178 if let Some(entry) = entries.last_mut() {
180 if entry.entity_type.is_empty() {
181 entry.entity_type = "property".to_string();
182 }
183 }
184
185 entries
186}
187
188fn extract_value_content(content: &str) -> &str {
192 let mut in_string = false;
193 let mut escape_next = false;
194 for (i, ch) in content.char_indices() {
195 if escape_next {
196 escape_next = false;
197 continue;
198 }
199 if ch == '\\' && in_string {
200 escape_next = true;
201 continue;
202 }
203 if ch == '"' {
204 in_string = !in_string;
205 }
206 if ch == ':' && !in_string {
207 let rest = content[i + 1..].trim();
208 return rest.trim_end_matches(',').trim();
209 }
210 }
211 content
212}
213
214fn find_closing_brace_line(lines: &[&str]) -> usize {
216 for (i, line) in lines.iter().enumerate().rev() {
217 if line.trim() == "}" {
218 return i + 1;
219 }
220 }
221 lines.len()
222}
223
224fn trim_trailing_blanks(lines: &[&str], start: usize, next_start: usize) -> usize {
227 let mut end = next_start - 1;
228 while end > start {
229 let trimmed = lines[end - 1].trim();
230 if trimmed.is_empty() || trimmed == "," {
231 end -= 1;
232 } else {
233 break;
234 }
235 }
236 end
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242 use crate::model::change::ChangeType;
243 use crate::model::identity::match_entities;
244
245 #[test]
246 fn test_json_line_positions() {
247 let content = r#"{
248 "name": "my-app",
249 "version": "1.0.0",
250 "scripts": {
251 "build": "tsc",
252 "test": "jest"
253 },
254 "description": "a test app"
255}
256"#;
257 let plugin = JsonParserPlugin;
258 let entities = plugin.extract_entities(content, "package.json");
259
260 assert_eq!(entities.len(), 4);
261
262 assert_eq!(entities[0].name, "name");
263 assert_eq!(entities[0].start_line, 2);
264 assert_eq!(entities[0].end_line, 2);
265
266 assert_eq!(entities[1].name, "version");
267 assert_eq!(entities[1].start_line, 3);
268 assert_eq!(entities[1].end_line, 3);
269
270 assert_eq!(entities[2].name, "scripts");
271 assert_eq!(entities[2].entity_type, "object");
272 assert_eq!(entities[2].start_line, 4);
273 assert_eq!(entities[2].end_line, 7);
274
275 assert_eq!(entities[3].name, "description");
276 assert_eq!(entities[3].start_line, 8);
277 assert_eq!(entities[3].end_line, 8);
278 }
279
280 #[test]
281 fn test_rename_detected_end_to_end() {
282 let before_content = "{\n \"timeout\": 30\n}\n";
283 let after_content = "{\n \"request_timeout\": 30\n}\n";
284 let plugin = JsonParserPlugin;
285 let before = plugin.extract_entities(before_content, "config.json");
286 let after = plugin.extract_entities(after_content, "config.json");
287 let result = match_entities(&before, &after, "config.json", None, None, None);
288 assert_eq!(result.changes.len(), 1);
289 assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
290 assert_eq!(result.changes[0].entity_name, "request_timeout");
291 }
292
293 #[test]
294 fn test_renamed_scalar_property_shares_structural_hash() {
295 let before_content = "{\n \"timeout\": 30\n}\n";
296 let after_content = "{\n \"request_timeout\": 30\n}\n";
297 let plugin = JsonParserPlugin;
298 let before = plugin.extract_entities(before_content, "config.json");
299 let after = plugin.extract_entities(after_content, "config.json");
300 assert_eq!(before.len(), 1);
301 assert_eq!(after.len(), 1);
302 assert_ne!(before[0].content_hash, after[0].content_hash);
304 assert_eq!(before[0].structural_hash, after[0].structural_hash);
306 }
307
308 #[test]
309 fn test_renamed_object_property_shares_structural_hash() {
310 let before_content = "{\n \"config\": {\n \"port\": 8080\n }\n}\n";
311 let after_content = "{\n \"settings\": {\n \"port\": 8080\n }\n}\n";
312 let plugin = JsonParserPlugin;
313 let before = plugin.extract_entities(before_content, "config.json");
314 let after = plugin.extract_entities(after_content, "config.json");
315 assert_eq!(before.len(), 1);
316 assert_eq!(after.len(), 1);
317 assert_ne!(before[0].content_hash, after[0].content_hash);
318 assert_eq!(before[0].structural_hash, after[0].structural_hash);
319 }
320}