1use super::{byte_to_line_range, line_starts, NodeKind, StructuralNode};
2use serde_json::Value;
3use std::collections::BTreeMap;
4
5fn find_in(haystack: &str, needle: &str, start: usize) -> Option<usize> {
6 haystack[start..].find(needle).map(|off| start + off)
7}
8
9fn skip_ws(source: &str, pos: usize) -> usize {
10 let mut p = pos;
11 for ch in source[p..].chars() {
12 if ch != ' ' && ch != '\n' && ch != '\r' && ch != '\t' {
13 break;
14 }
15 p += ch.len_utf8();
16 }
17 p
18}
19
20fn find_value_end(source: &str, start: usize) -> usize {
21 let mut p = start;
22 let mut depth: i32 = 0;
23 let mut in_string = false;
24 let mut escaped = false;
25
26 for ch in source[p..].chars() {
27 if escaped {
28 escaped = false;
29 p += ch.len_utf8();
30 continue;
31 }
32 if ch == '\\' && in_string {
33 escaped = true;
34 p += ch.len_utf8();
35 continue;
36 }
37 if ch == '"' {
38 in_string = !in_string;
39 p += ch.len_utf8();
40 continue;
41 }
42 if in_string {
43 p += ch.len_utf8();
44 continue;
45 }
46 match ch {
47 '{' | '[' => {
48 depth += 1;
49 }
50 '}' | ']' => {
51 if depth == 0 {
52 return p + ch.len_utf8();
53 }
54 depth -= 1;
55 }
56 ',' if depth == 0 => {
57 return p;
58 }
59 _ => {}
60 }
61 p += ch.len_utf8();
62 }
63 source.len()
64}
65
66#[allow(clippy::too_many_arguments)]
67fn walk_value(
68 source: &str,
69 value: &Value,
70 path: &[String],
71 pos: &mut usize,
72 file_id: u64,
73 ls: &[usize],
74 nodes: &mut Vec<StructuralNode>,
75 parent_id: Option<super::NodeId>,
76 depth: u32,
77) {
78 match value {
79 Value::Object(map) => {
80 *pos = skip_ws(source, *pos);
81 if !source[*pos..].starts_with('{') {
82 if let Some(brace) = find_in(source, "{", *pos) {
83 *pos = brace;
84 } else {
85 return;
86 }
87 }
88 *pos += 1;
89 let mut sorted: BTreeMap<&String, &Value> = BTreeMap::new();
90 for (k, v) in map {
91 sorted.insert(k, v);
92 }
93 for (key, val) in sorted {
94 *pos = skip_ws(source, *pos);
95 let quoted = format!("\"{key}\"");
96 if let Some(k_start) = find_in(source, "ed, *pos) {
97 *pos = k_start + quoted.len();
98 *pos = skip_ws(source, *pos);
99 if source[*pos..].starts_with(':') {
100 *pos += 1;
101 }
102 *pos = skip_ws(source, *pos);
103
104 let mut val_start = *pos;
105 let val_end = find_value_end(source, val_start);
106 *pos = val_end;
107 *pos = skip_ws(source, *pos);
108 if source[*pos..].starts_with(',') {
109 *pos += 1;
110 }
111
112 let mut child_path = path.to_vec();
113 child_path.push(key.clone());
114
115 let id = StructuralNode::make_id(file_id, NodeKind::JsonKey, &child_path);
116 let (line_s, line_e) = byte_to_line_range(ls, k_start, val_end);
117
118 nodes.push(StructuralNode {
119 id,
120 file_id,
121 kind: NodeKind::JsonKey,
122 label: key.clone(),
123 path: child_path.clone(),
124 byte_range: (k_start, val_end),
125 line_range: (line_s, line_e),
126 parent: parent_id,
127 depth,
128 });
129
130 walk_value(
131 source,
132 val,
133 &child_path,
134 &mut val_start,
135 file_id,
136 ls,
137 nodes,
138 Some(id),
139 depth + 1,
140 );
141 }
142 }
143 }
144 Value::Array(arr) => {
145 *pos = skip_ws(source, *pos);
146 if !source[*pos..].starts_with('[') {
147 if let Some(bracket) = find_in(source, "[", *pos) {
148 *pos = bracket;
149 } else {
150 return;
151 }
152 }
153 *pos += 1;
154 for (idx, item) in arr.iter().enumerate() {
155 *pos = skip_ws(source, *pos);
156 let mut item_start = *pos;
157 let item_end = find_value_end(source, item_start);
158 *pos = item_end;
159 *pos = skip_ws(source, *pos);
160 if source[*pos..].starts_with(',') {
161 *pos += 1;
162 }
163
164 let mut child_path = path.to_vec();
165 child_path.push(idx.to_string());
166
167 let id = StructuralNode::make_id(file_id, NodeKind::JsonKey, &child_path);
168 let (line_s, line_e) = byte_to_line_range(ls, item_start, item_end);
169
170 nodes.push(StructuralNode {
171 id,
172 file_id,
173 kind: NodeKind::JsonKey,
174 label: format!("[{idx}]"),
175 path: child_path.clone(),
176 byte_range: (item_start, item_end),
177 line_range: (line_s, line_e),
178 parent: parent_id,
179 depth,
180 });
181
182 walk_value(
183 source,
184 item,
185 &child_path,
186 &mut item_start,
187 file_id,
188 ls,
189 nodes,
190 Some(id),
191 depth + 1,
192 );
193 }
194 }
195 _ => {}
196 }
197}
198
199fn assign_parents(nodes: &mut [StructuralNode]) {
200 let n = nodes.len();
201 for i in 0..n {
202 let my_range = nodes[i].byte_range;
203 let my_depth = nodes[i].depth;
204 let my_path = nodes[i].path.clone();
205 for j in (0..i).rev() {
206 let other_range = nodes[j].byte_range;
207 if other_range.0 <= my_range.0
208 && my_range.1 <= other_range.1
209 && nodes[j].depth < my_depth
210 && my_path.starts_with(&nodes[j].path)
211 && nodes[j].path.len() + 1 == nodes[i].path.len()
212 {
213 nodes[i].parent = Some(nodes[j].id);
214 break;
215 }
216 }
217 }
218}
219
220pub fn parse(file_id: u64, source: &str) -> Vec<StructuralNode> {
222 let ls = line_starts(source);
223 let value: Value = match serde_json::from_str(source) {
224 Ok(v) => v,
225 Err(_) => return Vec::new(),
226 };
227
228 let mut nodes = Vec::new();
229 let mut pos = 0;
230 walk_value(
231 source,
232 &value,
233 &[],
234 &mut pos,
235 file_id,
236 &ls,
237 &mut nodes,
238 None,
239 0,
240 );
241 assign_parents(&mut nodes);
242 nodes
243}
244
245#[cfg(test)]
246mod tests {
247 use super::*;
248 use crate::structural::NodeKind;
249
250 const SAMPLE: &str = r#"{
251 "database": {
252 "host": "localhost",
253 "port": 5432
254 },
255 "items": [1, 2, 3]
256}"#;
257
258 #[test]
259 fn extracts_top_level_keys() {
260 let nodes = parse(1, SAMPLE);
261 let top: Vec<&StructuralNode> = nodes
262 .iter()
263 .filter(|n| n.kind == NodeKind::JsonKey && n.parent.is_none())
264 .collect();
265 let labels: Vec<&str> = top.iter().map(|n| n.label.as_str()).collect();
266 assert!(
267 labels.contains(&"database"),
268 "should find database key, got {labels:?}"
269 );
270 assert!(
271 labels.contains(&"items"),
272 "should find items key, got {labels:?}"
273 );
274 }
275
276 #[test]
277 fn extracts_nested_key() {
278 let nodes = parse(1, SAMPLE);
279 let host = nodes
280 .iter()
281 .find(|n| n.label == "host" && n.kind == NodeKind::JsonKey)
282 .unwrap();
283 assert!(host.parent.is_some(), "host should have a parent");
284 let parent = nodes.iter().find(|n| n.id == host.parent.unwrap()).unwrap();
285 assert_eq!(parent.label, "database");
286 }
287
288 #[test]
289 fn extracts_array_index() {
290 let nodes = parse(1, SAMPLE);
291 let array_nodes: Vec<&StructuralNode> = nodes
292 .iter()
293 .filter(|n| n.label == "[0]" || n.label == "[1]" || n.label == "[2]")
294 .collect();
295 assert_eq!(array_nodes.len(), 3, "expected 3 array element nodes");
296 }
297}