Skip to main content

xidl_parser/
parser.rs

1mod doc_comments;
2mod normalize;
3
4use crate::error::ParserResult;
5use std::collections::{HashMap, HashSet};
6use tree_sitter::{InputEdit, Node, Parser, Point, Tree};
7
8pub struct ParseContext<'a> {
9    pub source: &'a [u8],
10    pub symbols: HashMap<String, String>,
11    doc_consumed: HashSet<usize>,
12}
13
14pub trait IncludeResolver {
15    fn resolve(&mut self, parent_path: Option<&str>, path: &str) -> ParserResult<(String, String)>;
16}
17
18struct NoopIncludeResolver;
19impl IncludeResolver for NoopIncludeResolver {
20    fn resolve(
21        &mut self,
22        _parent_path: Option<&str>,
23        _path: &str,
24    ) -> ParserResult<(String, String)> {
25        Err(crate::error::ParseError::Message(
26            "Include resolution not supported in this context".to_string(),
27        ))
28    }
29}
30
31impl<'a> ParseContext<'a> {
32    pub fn new(source: &'a [u8]) -> Self {
33        Self {
34            source,
35            symbols: HashMap::new(),
36            doc_consumed: HashSet::new(),
37        }
38    }
39
40    pub fn node_text(&self, node: &Node) -> ParserResult<&str> {
41        Ok(node.utf8_text(self.source)?)
42    }
43
44    pub fn take_doc_comment(&mut self, node: &Node) -> Option<String> {
45        let start = node.start_byte();
46        if self.doc_consumed.contains(&start) {
47            return None;
48        }
49        let doc = doc_comments::extract(self.source, start);
50        if doc.is_some() {
51            self.doc_consumed.insert(start);
52        }
53        doc
54    }
55}
56
57pub trait FromTreeSitter<'a>: Sized {
58    fn from_node(node: Node<'a>, context: &mut ParseContext<'a>) -> ParserResult<Self>;
59}
60
61impl<'a> FromTreeSitter<'a> for String {
62    fn from_node(node: Node<'a>, context: &mut ParseContext<'a>) -> ParserResult<Self> {
63        Ok(context.node_text(&node)?.to_string())
64    }
65}
66
67impl<'a, T> FromTreeSitter<'a> for Box<T>
68where
69    T: FromTreeSitter<'a>,
70{
71    fn from_node(node: Node<'a>, context: &mut ParseContext<'a>) -> ParserResult<Self> {
72        Ok(Box::new(T::from_node(node, context)?))
73    }
74}
75
76pub fn parser_text(text: &str) -> ParserResult<crate::typed_ast::Specification> {
77    parser_text_with_resolver(text, None, &mut NoopIncludeResolver)
78}
79
80pub fn parser_text_with_resolver(
81    text: &str,
82    initial_path: Option<&str>,
83    resolver: &mut dyn IncludeResolver,
84) -> ParserResult<crate::typed_ast::Specification> {
85    use crate::typed_ast::Specification;
86
87    let mut parser = tree_sitter::Parser::new();
88    parser.set_language(&tree_sitter_idl::language()).unwrap();
89
90    let mut source = text.to_string();
91    let mut tree_source = normalize::source(text).into_owned();
92
93    let mut tree = parser.parse(&tree_source, None).ok_or_else(|| {
94        crate::error::ParseError::TreeSitterError("Failed to parse text".to_string())
95    })?;
96
97    if let Some(path) = initial_path {
98        let mut include_stack = Vec::new();
99        let source_len = tree_source.len();
100        expand_includes(
101            &mut source,
102            &mut tree_source,
103            &mut tree,
104            &mut parser,
105            resolver,
106            &mut include_stack,
107            path.to_string(),
108            0,
109            source_len,
110        )?;
111    }
112
113    let root_node = tree.root_node();
114    if root_node.has_error() {
115        return Err(crate::error::ParseError::TreeSitterError(
116            "Failed to parse text".to_string(),
117        ));
118    }
119    let mut context = ParseContext::new(source.as_bytes());
120
121    Specification::from_node(root_node, &mut context)
122}
123
124#[allow(clippy::too_many_arguments)]
125fn expand_includes(
126    source: &mut String,
127    tree_source: &mut String,
128    tree: &mut Tree,
129    parser: &mut Parser,
130    resolver: &mut dyn IncludeResolver,
131    include_stack: &mut Vec<String>,
132    current_path: String,
133    start_offset: usize,
134    mut end_offset: usize,
135) -> ParserResult<usize> {
136    if include_stack.contains(&current_path) {
137        let chain = include_stack.join(" -> ");
138        return Err(crate::error::ParseError::Message(format!(
139            "cyclic include detected: {} -> {}",
140            chain, current_path
141        )));
142    }
143    include_stack.push(current_path.clone());
144
145    let mut search_start = start_offset;
146    while let Some(node) = find_first_include_in_range(tree.root_node(), search_start, end_offset) {
147        let node_start = node.start_byte();
148        let node_end = node.end_byte();
149        let node_start_pos = node.start_position();
150        let node_end_pos = node.end_position();
151
152        let path_str = extract_include_path(node, tree_source)?;
153        let (actual_path, content) = resolver.resolve(Some(&current_path), &path_str)?;
154
155        let normalized_content = normalize::source(&content);
156        let new_len = normalized_content.len();
157
158        // Edit tree
159        let edit = InputEdit {
160            start_byte: node_start,
161            old_end_byte: node_end,
162            new_end_byte: node_start + new_len,
163            start_position: node_start_pos,
164            old_end_position: node_end_pos,
165            new_end_position: get_pos_after(node_start_pos, &normalized_content),
166        };
167        tree.edit(&edit);
168
169        // Update source and tree_source
170        source.replace_range(node_start..node_end, &content);
171        tree_source.replace_range(node_start..node_end, &normalized_content);
172
173        // Re-parse
174        *tree = parser.parse(&*tree_source, Some(tree)).ok_or_else(|| {
175            crate::error::ParseError::TreeSitterError("Failed to re-parse text".to_string())
176        })?;
177
178        if tree.root_node().has_error()
179            && has_error_in_range(tree.root_node(), node_start, node_start + new_len)
180        {
181            return Err(crate::error::ParseError::Message(format!(
182                "failed to parse include '{}'",
183                path_str
184            )));
185        }
186
187        // Recurse
188        let expanded_len = expand_includes(
189            source,
190            tree_source,
191            tree,
192            parser,
193            resolver,
194            include_stack,
195            actual_path,
196            node_start,
197            node_start + new_len,
198        )?;
199
200        let diff = (expanded_len as isize) - ((node_end - node_start) as isize);
201        end_offset = ((end_offset as isize) + diff) as usize;
202        search_start = node_start + expanded_len;
203    }
204
205    include_stack.pop();
206    Ok(end_offset - start_offset)
207}
208
209fn find_first_include_in_range(node: Node, start: usize, end: usize) -> Option<Node> {
210    if node.start_byte() >= end || node.end_byte() <= start {
211        return None;
212    }
213
214    if node.kind() == "preproc_include" && node.start_byte() >= start && node.end_byte() <= end {
215        return Some(node);
216    }
217
218    let mut cursor = node.walk();
219    for child in node.children(&mut cursor) {
220        if let Some(found) = find_first_include_in_range(child, start, end) {
221            return Some(found);
222        }
223    }
224    None
225}
226
227fn has_error_in_range(node: Node, start: usize, end: usize) -> bool {
228    if node.start_byte() >= end || node.end_byte() <= start {
229        return false;
230    }
231    if node.is_error() || node.is_missing() {
232        return true;
233    }
234    let mut cursor = node.walk();
235    for child in node.children(&mut cursor) {
236        if has_error_in_range(child, start, end) {
237            return true;
238        }
239    }
240    false
241}
242
243fn get_pos_after(start: Point, text: &str) -> Point {
244    let mut row = start.row;
245    let mut col = start.column;
246    for b in text.as_bytes() {
247        if *b == b'\n' {
248            row += 1;
249            col = 0;
250        } else {
251            col += 1;
252        }
253    }
254    Point { row, column: col }
255}
256
257fn extract_include_path(node: Node, source: &str) -> ParserResult<String> {
258    let mut cursor = node.walk();
259    for child in node.children(&mut cursor) {
260        match child.kind() {
261            "string_literal" => {
262                let text = child.utf8_text(source.as_bytes())?;
263                return Ok(text
264                    .trim_matches(|c| c == '"' || c == '<' || c == '>')
265                    .to_string());
266            }
267            "system_lib_string" => {
268                let text = child.utf8_text(source.as_bytes())?;
269                return Err(crate::error::ParseError::Message(format!(
270                    "unsupported include path syntax {}; only string literal includes are supported",
271                    text
272                )));
273            }
274            "identifier" => {
275                let text = child.utf8_text(source.as_bytes())?;
276                return Err(crate::error::ParseError::Message(format!(
277                    "unsupported include identifier '{}'; only string literal includes are supported",
278                    text
279                )));
280            }
281            _ => {}
282        }
283    }
284    Err(crate::error::ParseError::UnexpectedNode(
285        "missing include path".to_string(),
286    ))
287}
288
289pub fn normalize_source_for_tree_sitter(text: &str) -> std::borrow::Cow<'_, str> {
290    normalize::source(text)
291}
292
293#[cfg(test)]
294mod tests;