Skip to main content

argyph_parse/structural/
toml_parser.rs

1use super::{byte_to_line_range, line_starts, NodeKind, StructuralNode};
2
3#[allow(dead_code, unused_imports)]
4use toml::Spanned;
5
6fn find_toml_key_span(source: &str, key: &str, start: usize) -> Option<(usize, usize)> {
7    let rest = &source[start..];
8    let pattern = format!("{key} =");
9    rest.find(&pattern).map(|off| {
10        let abs = start + off;
11        let end = abs + key.len();
12        (abs, end)
13    })
14}
15
16fn find_line_starting_with(source: &str, prefix: &str, start: usize) -> Option<usize> {
17    let rest = &source[start..];
18    let mut pos = 0;
19    for line in rest.lines() {
20        if line.trim_start().starts_with(prefix) {
21            return Some(start + pos);
22        }
23        pos += line.len() + 1;
24        if pos >= rest.len() {
25            break;
26        }
27    }
28    None
29}
30
31fn next_section_or_eof(source: &str, start: usize) -> usize {
32    let rest = &source[start..];
33    let mut pos = 0;
34    for line in rest.lines() {
35        let trimmed = line.trim_start();
36        if trimmed.starts_with('[') && !trimmed.starts_with("[[") {
37            return start + pos;
38        }
39        pos += line.len() + 1;
40        if pos >= rest.len() {
41            break;
42        }
43    }
44    source.len()
45}
46
47fn assign_parents(nodes: &mut [StructuralNode]) {
48    for i in 0..nodes.len() {
49        let my_range = nodes[i].byte_range;
50        let my_depth = nodes[i].depth;
51        for j in (0..i).rev() {
52            let other_range = nodes[j].byte_range;
53            if other_range.0 <= my_range.0
54                && my_range.1 <= other_range.1
55                && nodes[j].depth < my_depth
56            {
57                nodes[i].parent = Some(nodes[j].id);
58                break;
59            }
60        }
61    }
62}
63
64/// Parse a TOML source into structural nodes.
65pub fn parse(file_id: u64, source: &str) -> Vec<StructuralNode> {
66    let ls = line_starts(source);
67    let table: toml::Table = match toml::from_str(source) {
68        Ok(t) => t,
69        Err(_) => return Vec::new(),
70    };
71
72    let mut nodes = Vec::new();
73    let mut scan_pos = 0usize;
74
75    let mut section_order: Vec<(String, usize, usize)> = Vec::new();
76    {
77        let rest = source;
78        let mut pos = 0;
79        for line in rest.lines() {
80            let trimmed = line.trim_start();
81            if trimmed.starts_with('[') && !trimmed.starts_with("[[") {
82                let section_name = trimmed
83                    .trim_start_matches('[')
84                    .trim_end_matches(']')
85                    .trim()
86                    .to_string();
87                let start = source[..pos].rfind('\n').map_or(0, |n| n + 1);
88                section_order.push((section_name, start, pos));
89            }
90            pos += line.len() + 1;
91            if pos >= source.len() {
92                break;
93            }
94        }
95    }
96
97    for (key, value) in &table {
98        if value.is_table() {
99            let section_start =
100                find_line_starting_with(source, &format!("[{key}]"), 0).unwrap_or(scan_pos);
101            let section_end = next_section_or_eof(source, section_start + 1);
102
103            let path = vec![key.clone()];
104            let id = StructuralNode::make_id(file_id, NodeKind::TomlKey, &path);
105            let (line_s, line_e) = byte_to_line_range(&ls, section_start, section_end);
106
107            nodes.push(StructuralNode {
108                id,
109                file_id,
110                kind: NodeKind::TomlKey,
111                label: key.clone(),
112                path: path.clone(),
113                byte_range: (section_start, section_end),
114                line_range: (line_s, line_e),
115                parent: None,
116                depth: 0,
117            });
118
119            if let Some(inner) = value.as_table() {
120                let inner_scan = section_start + key.len() + 3;
121                let mut inner_pos = inner_scan;
122                for (ik, _iv) in inner {
123                    if let Some((k_start, k_end)) = find_toml_key_span(source, ik, inner_pos) {
124                        let line_end = source[k_end..]
125                            .find('\n')
126                            .map_or(source.len(), |off| k_end + off);
127                        let mut child_path = path.clone();
128                        child_path.push(ik.clone());
129
130                        let cid = StructuralNode::make_id(file_id, NodeKind::TomlKey, &child_path);
131                        let (cl_s, cl_e) = byte_to_line_range(&ls, k_start, line_end);
132                        nodes.push(StructuralNode {
133                            id: cid,
134                            file_id,
135                            kind: NodeKind::TomlKey,
136                            label: ik.clone(),
137                            path: child_path,
138                            byte_range: (k_start, line_end),
139                            line_range: (cl_s, cl_e),
140                            parent: None,
141                            depth: 1,
142                        });
143                        inner_pos = line_end + 1;
144                    }
145                }
146            }
147        } else if let Some((k_start, k_end)) = find_toml_key_span(source, key, scan_pos) {
148            let line_end = source[k_end..]
149                .find('\n')
150                .map_or(source.len(), |off| k_end + off);
151            let path = vec![key.clone()];
152            let id = StructuralNode::make_id(file_id, NodeKind::TomlKey, &path);
153            let (line_s, line_e) = byte_to_line_range(&ls, k_start, line_end);
154
155            nodes.push(StructuralNode {
156                id,
157                file_id,
158                kind: NodeKind::TomlKey,
159                label: key.clone(),
160                path,
161                byte_range: (k_start, line_end),
162                line_range: (line_s, line_e),
163                parent: None,
164                depth: 0,
165            });
166            scan_pos = line_end + 1;
167        }
168    }
169
170    assign_parents(&mut nodes);
171    nodes
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177    use crate::structural::NodeKind;
178
179    const SAMPLE: &str =
180        "title = \"x\"\n\n[server]\nhost = \"localhost\"\nport = 8080\n\n[logging]\nlevel = \"info\"\n";
181
182    #[test]
183    fn extracts_top_level_bare_key() {
184        let nodes = parse(1, SAMPLE);
185        let title = nodes.iter().find(|n| n.label == "title");
186        assert!(title.is_some(), "should find bare key 'title'");
187        assert_eq!(title.unwrap().kind, NodeKind::TomlKey);
188    }
189
190    #[test]
191    fn extracts_section_and_keys() {
192        let nodes = parse(1, SAMPLE);
193        let server = nodes
194            .iter()
195            .find(|n| n.label == "server" && n.kind == NodeKind::TomlKey)
196            .unwrap();
197        let host = nodes
198            .iter()
199            .find(|n| n.label == "host" && n.kind == NodeKind::TomlKey)
200            .unwrap();
201        assert!(
202            host.parent.is_some() || host.byte_range.0 >= server.byte_range.0,
203            "host should be within server section"
204        );
205        let logging = nodes
206            .iter()
207            .find(|n| n.label == "logging" && n.kind == NodeKind::TomlKey)
208            .unwrap();
209        let level = nodes
210            .iter()
211            .find(|n| n.label == "level" && n.kind == NodeKind::TomlKey)
212            .unwrap();
213        assert!(
214            level.parent.is_some() || level.byte_range.0 >= logging.byte_range.0,
215            "level should be within logging section"
216        );
217    }
218}