Skip to main content

argyph_parse/structural/
yaml.rs

1use super::{byte_to_line_range, line_starts, NodeKind, StructuralNode};
2use serde_yaml::Value;
3
4fn indent_of(source: &str, pos: usize) -> usize {
5    let line_start = source[..pos].rfind('\n').map_or(0, |l| l + 1);
6    let line = &source[line_start..pos];
7    line.chars().take_while(|c| *c == ' ').count()
8}
9
10fn find_yaml_sibling(source: &str, key: &str, start: usize) -> Option<usize> {
11    let indent = indent_of(source, start);
12    let rest = &source[start..];
13    let mut pos = 0;
14    for line in rest.lines() {
15        let line_indent = line.chars().take_while(|c| *c == ' ').count();
16        if line_indent == indent {
17            let trimmed = line.trim_start();
18            if let Some(colon_pos) = trimmed.find(':') {
19                let candidate = trimmed[..colon_pos].trim_end();
20                if candidate == key {
21                    return Some(start + pos + (line.len() - trimmed.len()));
22                }
23            }
24        }
25        pos += line.len() + 1;
26        if pos >= rest.len() {
27            break;
28        }
29    }
30    None
31}
32
33#[allow(clippy::too_many_arguments)]
34fn walk_mapping(
35    source: &str,
36    mapping: &serde_yaml::Mapping,
37    path: &[String],
38    start_scan: usize,
39    file_id: u64,
40    ls: &[usize],
41    nodes: &mut Vec<StructuralNode>,
42    parent_id: Option<super::NodeId>,
43    depth: u32,
44) {
45    let mut scan_pos = start_scan;
46
47    for (key_val, val) in mapping {
48        let key = match key_val {
49            Value::String(s) => s.clone(),
50            other => format!("{other:?}"),
51        };
52
53        let key_start = find_yaml_sibling(source, &key, scan_pos).unwrap_or(scan_pos);
54        scan_pos = key_start + key.len();
55
56        let val_end = match val {
57            Value::Mapping(_) => {
58                let after_colon = &source[scan_pos..];
59                if let Some(nl) = after_colon.find('\n') {
60                    scan_pos + nl + 1
61                } else {
62                    source.len()
63                }
64            }
65            _ => {
66                let rest = &source[scan_pos..];
67                rest.find('\n').map_or(source.len(), |nl| scan_pos + nl)
68            }
69        };
70
71        let mut child_path = path.to_vec();
72        child_path.push(key.clone());
73
74        let id = StructuralNode::make_id(file_id, NodeKind::YamlKey, &child_path);
75        let (line_s, line_e) = byte_to_line_range(ls, key_start, val_end);
76
77        nodes.push(StructuralNode {
78            id,
79            file_id,
80            kind: NodeKind::YamlKey,
81            label: key.clone(),
82            path: child_path.clone(),
83            byte_range: (key_start, val_end),
84            line_range: (line_s, line_e),
85            parent: parent_id,
86            depth,
87        });
88
89        if let Value::Mapping(inner) = val {
90            let inner_start = val_end;
91            walk_mapping(
92                source,
93                inner,
94                &child_path,
95                inner_start,
96                file_id,
97                ls,
98                nodes,
99                Some(id),
100                depth + 1,
101            );
102        }
103
104        scan_pos = val_end;
105    }
106}
107
108fn assign_parents(nodes: &mut [StructuralNode]) {
109    let n = nodes.len();
110    for i in 0..n {
111        let my_range = nodes[i].byte_range;
112        let my_depth = nodes[i].depth;
113        for j in (0..i).rev() {
114            let other_range = nodes[j].byte_range;
115            if other_range.0 <= my_range.0
116                && my_range.1 <= other_range.1
117                && nodes[j].depth < my_depth
118            {
119                nodes[i].parent = Some(nodes[j].id);
120                break;
121            }
122        }
123    }
124}
125
126/// Parse a YAML source into structural nodes.
127pub fn parse(file_id: u64, source: &str) -> Vec<StructuralNode> {
128    let ls = line_starts(source);
129    let value: Value = match serde_yaml::from_str(source) {
130        Ok(v) => v,
131        Err(_) => return Vec::new(),
132    };
133
134    let mut nodes = Vec::new();
135    if let Value::Mapping(map) = &value {
136        walk_mapping(source, map, &[], 0, file_id, &ls, &mut nodes, None, 0);
137    }
138    assign_parents(&mut nodes);
139    nodes
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use crate::structural::NodeKind;
146
147    const SAMPLE: &str = "server:\n  host: localhost\n  port: 8080\nlogging:\n  level: info\n";
148
149    #[test]
150    fn extracts_top_level_keys() {
151        let nodes = parse(1, SAMPLE);
152        let top: Vec<&StructuralNode> = nodes
153            .iter()
154            .filter(|n| n.kind == NodeKind::YamlKey && n.parent.is_none())
155            .collect();
156        let labels: Vec<&str> = top.iter().map(|n| n.label.as_str()).collect();
157        assert!(
158            labels.contains(&"server"),
159            "should find server, got {labels:?}"
160        );
161        assert!(
162            labels.contains(&"logging"),
163            "should find logging, got {labels:?}"
164        );
165    }
166
167    #[test]
168    fn extracts_nested_keys() {
169        let nodes = parse(1, SAMPLE);
170        let host = nodes
171            .iter()
172            .find(|n| n.label == "host" && n.kind == NodeKind::YamlKey)
173            .unwrap();
174        assert!(host.parent.is_some());
175        let parent = nodes.iter().find(|n| n.id == host.parent.unwrap()).unwrap();
176        assert_eq!(parent.label, "server");
177
178        let level = nodes
179            .iter()
180            .find(|n| n.label == "level" && n.kind == NodeKind::YamlKey)
181            .unwrap();
182        assert!(level.parent.is_some());
183        let parent = nodes
184            .iter()
185            .find(|n| n.id == level.parent.unwrap())
186            .unwrap();
187        assert_eq!(parent.label, "logging");
188    }
189}