Skip to main content

argyph_parse/structural/
json.rs

1use super::{byte_to_line_range, line_starts, NodeKind, StructuralNode};
2use serde_json::Value;
3use std::collections::BTreeMap;
4
5fn find_in(haystack: &str, needle: &str, start: usize) -> Option<usize> {
6    haystack[start..].find(needle).map(|off| start + off)
7}
8
9fn skip_ws(source: &str, pos: usize) -> usize {
10    let mut p = pos;
11    for ch in source[p..].chars() {
12        if ch != ' ' && ch != '\n' && ch != '\r' && ch != '\t' {
13            break;
14        }
15        p += ch.len_utf8();
16    }
17    p
18}
19
20fn find_value_end(source: &str, start: usize) -> usize {
21    let mut p = start;
22    let mut depth: i32 = 0;
23    let mut in_string = false;
24    let mut escaped = false;
25
26    for ch in source[p..].chars() {
27        if escaped {
28            escaped = false;
29            p += ch.len_utf8();
30            continue;
31        }
32        if ch == '\\' && in_string {
33            escaped = true;
34            p += ch.len_utf8();
35            continue;
36        }
37        if ch == '"' {
38            in_string = !in_string;
39            p += ch.len_utf8();
40            continue;
41        }
42        if in_string {
43            p += ch.len_utf8();
44            continue;
45        }
46        match ch {
47            '{' | '[' => {
48                depth += 1;
49            }
50            '}' | ']' => {
51                if depth == 0 {
52                    return p + ch.len_utf8();
53                }
54                depth -= 1;
55            }
56            ',' if depth == 0 => {
57                return p;
58            }
59            _ => {}
60        }
61        p += ch.len_utf8();
62    }
63    source.len()
64}
65
66#[allow(clippy::too_many_arguments)]
67fn walk_value(
68    source: &str,
69    value: &Value,
70    path: &[String],
71    pos: &mut usize,
72    file_id: u64,
73    ls: &[usize],
74    nodes: &mut Vec<StructuralNode>,
75    parent_id: Option<super::NodeId>,
76    depth: u32,
77) {
78    match value {
79        Value::Object(map) => {
80            *pos = skip_ws(source, *pos);
81            if !source[*pos..].starts_with('{') {
82                if let Some(brace) = find_in(source, "{", *pos) {
83                    *pos = brace;
84                } else {
85                    return;
86                }
87            }
88            *pos += 1;
89            let mut sorted: BTreeMap<&String, &Value> = BTreeMap::new();
90            for (k, v) in map {
91                sorted.insert(k, v);
92            }
93            for (key, val) in sorted {
94                *pos = skip_ws(source, *pos);
95                let quoted = format!("\"{key}\"");
96                if let Some(k_start) = find_in(source, &quoted, *pos) {
97                    *pos = k_start + quoted.len();
98                    *pos = skip_ws(source, *pos);
99                    if source[*pos..].starts_with(':') {
100                        *pos += 1;
101                    }
102                    *pos = skip_ws(source, *pos);
103
104                    let mut val_start = *pos;
105                    let val_end = find_value_end(source, val_start);
106                    *pos = val_end;
107                    *pos = skip_ws(source, *pos);
108                    if source[*pos..].starts_with(',') {
109                        *pos += 1;
110                    }
111
112                    let mut child_path = path.to_vec();
113                    child_path.push(key.clone());
114
115                    let id = StructuralNode::make_id(file_id, NodeKind::JsonKey, &child_path);
116                    let (line_s, line_e) = byte_to_line_range(ls, k_start, val_end);
117
118                    nodes.push(StructuralNode {
119                        id,
120                        file_id,
121                        kind: NodeKind::JsonKey,
122                        label: key.clone(),
123                        path: child_path.clone(),
124                        byte_range: (k_start, val_end),
125                        line_range: (line_s, line_e),
126                        parent: parent_id,
127                        depth,
128                    });
129
130                    walk_value(
131                        source,
132                        val,
133                        &child_path,
134                        &mut val_start,
135                        file_id,
136                        ls,
137                        nodes,
138                        Some(id),
139                        depth + 1,
140                    );
141                }
142            }
143        }
144        Value::Array(arr) => {
145            *pos = skip_ws(source, *pos);
146            if !source[*pos..].starts_with('[') {
147                if let Some(bracket) = find_in(source, "[", *pos) {
148                    *pos = bracket;
149                } else {
150                    return;
151                }
152            }
153            *pos += 1;
154            for (idx, item) in arr.iter().enumerate() {
155                *pos = skip_ws(source, *pos);
156                let mut item_start = *pos;
157                let item_end = find_value_end(source, item_start);
158                *pos = item_end;
159                *pos = skip_ws(source, *pos);
160                if source[*pos..].starts_with(',') {
161                    *pos += 1;
162                }
163
164                let mut child_path = path.to_vec();
165                child_path.push(idx.to_string());
166
167                let id = StructuralNode::make_id(file_id, NodeKind::JsonKey, &child_path);
168                let (line_s, line_e) = byte_to_line_range(ls, item_start, item_end);
169
170                nodes.push(StructuralNode {
171                    id,
172                    file_id,
173                    kind: NodeKind::JsonKey,
174                    label: format!("[{idx}]"),
175                    path: child_path.clone(),
176                    byte_range: (item_start, item_end),
177                    line_range: (line_s, line_e),
178                    parent: parent_id,
179                    depth,
180                });
181
182                walk_value(
183                    source,
184                    item,
185                    &child_path,
186                    &mut item_start,
187                    file_id,
188                    ls,
189                    nodes,
190                    Some(id),
191                    depth + 1,
192                );
193            }
194        }
195        _ => {}
196    }
197}
198
199fn assign_parents(nodes: &mut [StructuralNode]) {
200    let n = nodes.len();
201    for i in 0..n {
202        let my_range = nodes[i].byte_range;
203        let my_depth = nodes[i].depth;
204        let my_path = nodes[i].path.clone();
205        for j in (0..i).rev() {
206            let other_range = nodes[j].byte_range;
207            if other_range.0 <= my_range.0
208                && my_range.1 <= other_range.1
209                && nodes[j].depth < my_depth
210                && my_path.starts_with(&nodes[j].path)
211                && nodes[j].path.len() + 1 == nodes[i].path.len()
212            {
213                nodes[i].parent = Some(nodes[j].id);
214                break;
215            }
216        }
217    }
218}
219
220/// Parse a JSON source into structural nodes.
221pub fn parse(file_id: u64, source: &str) -> Vec<StructuralNode> {
222    let ls = line_starts(source);
223    let value: Value = match serde_json::from_str(source) {
224        Ok(v) => v,
225        Err(_) => return Vec::new(),
226    };
227
228    let mut nodes = Vec::new();
229    let mut pos = 0;
230    walk_value(
231        source,
232        &value,
233        &[],
234        &mut pos,
235        file_id,
236        &ls,
237        &mut nodes,
238        None,
239        0,
240    );
241    assign_parents(&mut nodes);
242    nodes
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::structural::NodeKind;
249
250    const SAMPLE: &str = r#"{
251  "database": {
252    "host": "localhost",
253    "port": 5432
254  },
255  "items": [1, 2, 3]
256}"#;
257
258    #[test]
259    fn extracts_top_level_keys() {
260        let nodes = parse(1, SAMPLE);
261        let top: Vec<&StructuralNode> = nodes
262            .iter()
263            .filter(|n| n.kind == NodeKind::JsonKey && n.parent.is_none())
264            .collect();
265        let labels: Vec<&str> = top.iter().map(|n| n.label.as_str()).collect();
266        assert!(
267            labels.contains(&"database"),
268            "should find database key, got {labels:?}"
269        );
270        assert!(
271            labels.contains(&"items"),
272            "should find items key, got {labels:?}"
273        );
274    }
275
276    #[test]
277    fn extracts_nested_key() {
278        let nodes = parse(1, SAMPLE);
279        let host = nodes
280            .iter()
281            .find(|n| n.label == "host" && n.kind == NodeKind::JsonKey)
282            .unwrap();
283        assert!(host.parent.is_some(), "host should have a parent");
284        let parent = nodes.iter().find(|n| n.id == host.parent.unwrap()).unwrap();
285        assert_eq!(parent.label, "database");
286    }
287
288    #[test]
289    fn extracts_array_index() {
290        let nodes = parse(1, SAMPLE);
291        let array_nodes: Vec<&StructuralNode> = nodes
292            .iter()
293            .filter(|n| n.label == "[0]" || n.label == "[1]" || n.label == "[2]")
294            .collect();
295        assert_eq!(array_nodes.len(), 3, "expected 3 array element nodes");
296    }
297}