1mod doc_comments;
2mod normalize;
3
4use crate::error::ParserResult;
5use std::collections::{HashMap, HashSet};
6use tree_sitter::{InputEdit, Node, Parser, Point, Tree};
7
8pub struct ParseContext<'a> {
9 pub source: &'a [u8],
10 pub symbols: HashMap<String, String>,
11 doc_consumed: HashSet<usize>,
12}
13
14pub trait IncludeResolver {
15 fn resolve(&mut self, parent_path: Option<&str>, path: &str) -> ParserResult<(String, String)>;
16}
17
18struct NoopIncludeResolver;
19impl IncludeResolver for NoopIncludeResolver {
20 fn resolve(
21 &mut self,
22 _parent_path: Option<&str>,
23 _path: &str,
24 ) -> ParserResult<(String, String)> {
25 Err(crate::error::ParseError::Message(
26 "Include resolution not supported in this context".to_string(),
27 ))
28 }
29}
30
31impl<'a> ParseContext<'a> {
32 pub fn new(source: &'a [u8]) -> Self {
33 Self {
34 source,
35 symbols: HashMap::new(),
36 doc_consumed: HashSet::new(),
37 }
38 }
39
40 pub fn node_text(&self, node: &Node) -> ParserResult<&str> {
41 Ok(node.utf8_text(self.source)?)
42 }
43
44 pub fn take_doc_comment(&mut self, node: &Node) -> Option<String> {
45 let start = node.start_byte();
46 if self.doc_consumed.contains(&start) {
47 return None;
48 }
49 let doc = doc_comments::extract(self.source, start);
50 if doc.is_some() {
51 self.doc_consumed.insert(start);
52 }
53 doc
54 }
55}
56
57pub trait FromTreeSitter<'a>: Sized {
58 fn from_node(node: Node<'a>, context: &mut ParseContext<'a>) -> ParserResult<Self>;
59}
60
61impl<'a> FromTreeSitter<'a> for String {
62 fn from_node(node: Node<'a>, context: &mut ParseContext<'a>) -> ParserResult<Self> {
63 Ok(context.node_text(&node)?.to_string())
64 }
65}
66
67impl<'a, T> FromTreeSitter<'a> for Box<T>
68where
69 T: FromTreeSitter<'a>,
70{
71 fn from_node(node: Node<'a>, context: &mut ParseContext<'a>) -> ParserResult<Self> {
72 Ok(Box::new(T::from_node(node, context)?))
73 }
74}
75
76pub fn parser_text(text: &str) -> ParserResult<crate::typed_ast::Specification> {
77 parser_text_with_resolver(text, None, &mut NoopIncludeResolver)
78}
79
80pub fn parser_text_with_resolver(
81 text: &str,
82 initial_path: Option<&str>,
83 resolver: &mut dyn IncludeResolver,
84) -> ParserResult<crate::typed_ast::Specification> {
85 use crate::typed_ast::Specification;
86
87 let mut parser = tree_sitter::Parser::new();
88 parser.set_language(&tree_sitter_idl::language()).unwrap();
89
90 let mut source = text.to_string();
91 let mut tree_source = normalize::source(text).into_owned();
92
93 let mut tree = parser.parse(&tree_source, None).ok_or_else(|| {
94 crate::error::ParseError::TreeSitterError("Failed to parse text".to_string())
95 })?;
96
97 if let Some(path) = initial_path {
98 let mut include_stack = Vec::new();
99 let source_len = tree_source.len();
100 expand_includes(
101 &mut source,
102 &mut tree_source,
103 &mut tree,
104 &mut parser,
105 resolver,
106 &mut include_stack,
107 path.to_string(),
108 0,
109 source_len,
110 )?;
111 }
112
113 let root_node = tree.root_node();
114 if root_node.has_error() {
115 return Err(crate::error::ParseError::TreeSitterError(
116 "Failed to parse text".to_string(),
117 ));
118 }
119 let mut context = ParseContext::new(source.as_bytes());
120
121 Specification::from_node(root_node, &mut context)
122}
123
124#[allow(clippy::too_many_arguments)]
125fn expand_includes(
126 source: &mut String,
127 tree_source: &mut String,
128 tree: &mut Tree,
129 parser: &mut Parser,
130 resolver: &mut dyn IncludeResolver,
131 include_stack: &mut Vec<String>,
132 current_path: String,
133 start_offset: usize,
134 mut end_offset: usize,
135) -> ParserResult<usize> {
136 if include_stack.contains(¤t_path) {
137 let chain = include_stack.join(" -> ");
138 return Err(crate::error::ParseError::Message(format!(
139 "cyclic include detected: {} -> {}",
140 chain, current_path
141 )));
142 }
143 include_stack.push(current_path.clone());
144
145 let mut search_start = start_offset;
146 while let Some(node) = find_first_include_in_range(tree.root_node(), search_start, end_offset) {
147 let node_start = node.start_byte();
148 let node_end = node.end_byte();
149 let node_start_pos = node.start_position();
150 let node_end_pos = node.end_position();
151
152 let path_str = extract_include_path(node, tree_source)?;
153 let (actual_path, content) = resolver.resolve(Some(¤t_path), &path_str)?;
154
155 let normalized_content = normalize::source(&content);
156 let new_len = normalized_content.len();
157
158 let edit = InputEdit {
160 start_byte: node_start,
161 old_end_byte: node_end,
162 new_end_byte: node_start + new_len,
163 start_position: node_start_pos,
164 old_end_position: node_end_pos,
165 new_end_position: get_pos_after(node_start_pos, &normalized_content),
166 };
167 tree.edit(&edit);
168
169 source.replace_range(node_start..node_end, &content);
171 tree_source.replace_range(node_start..node_end, &normalized_content);
172
173 *tree = parser.parse(&*tree_source, Some(tree)).ok_or_else(|| {
175 crate::error::ParseError::TreeSitterError("Failed to re-parse text".to_string())
176 })?;
177
178 if tree.root_node().has_error()
179 && has_error_in_range(tree.root_node(), node_start, node_start + new_len)
180 {
181 return Err(crate::error::ParseError::Message(format!(
182 "failed to parse include '{}'",
183 path_str
184 )));
185 }
186
187 let expanded_len = expand_includes(
189 source,
190 tree_source,
191 tree,
192 parser,
193 resolver,
194 include_stack,
195 actual_path,
196 node_start,
197 node_start + new_len,
198 )?;
199
200 let diff = (expanded_len as isize) - ((node_end - node_start) as isize);
201 end_offset = ((end_offset as isize) + diff) as usize;
202 search_start = node_start + expanded_len;
203 }
204
205 include_stack.pop();
206 Ok(end_offset - start_offset)
207}
208
209fn find_first_include_in_range(node: Node, start: usize, end: usize) -> Option<Node> {
210 if node.start_byte() >= end || node.end_byte() <= start {
211 return None;
212 }
213
214 if node.kind() == "preproc_include" && node.start_byte() >= start && node.end_byte() <= end {
215 return Some(node);
216 }
217
218 let mut cursor = node.walk();
219 for child in node.children(&mut cursor) {
220 if let Some(found) = find_first_include_in_range(child, start, end) {
221 return Some(found);
222 }
223 }
224 None
225}
226
227fn has_error_in_range(node: Node, start: usize, end: usize) -> bool {
228 if node.start_byte() >= end || node.end_byte() <= start {
229 return false;
230 }
231 if node.is_error() || node.is_missing() {
232 return true;
233 }
234 let mut cursor = node.walk();
235 for child in node.children(&mut cursor) {
236 if has_error_in_range(child, start, end) {
237 return true;
238 }
239 }
240 false
241}
242
243fn get_pos_after(start: Point, text: &str) -> Point {
244 let mut row = start.row;
245 let mut col = start.column;
246 for b in text.as_bytes() {
247 if *b == b'\n' {
248 row += 1;
249 col = 0;
250 } else {
251 col += 1;
252 }
253 }
254 Point { row, column: col }
255}
256
257fn extract_include_path(node: Node, source: &str) -> ParserResult<String> {
258 let mut cursor = node.walk();
259 for child in node.children(&mut cursor) {
260 match child.kind() {
261 "string_literal" => {
262 let text = child.utf8_text(source.as_bytes())?;
263 return Ok(text
264 .trim_matches(|c| c == '"' || c == '<' || c == '>')
265 .to_string());
266 }
267 "system_lib_string" => {
268 let text = child.utf8_text(source.as_bytes())?;
269 return Err(crate::error::ParseError::Message(format!(
270 "unsupported include path syntax {}; only string literal includes are supported",
271 text
272 )));
273 }
274 "identifier" => {
275 let text = child.utf8_text(source.as_bytes())?;
276 return Err(crate::error::ParseError::Message(format!(
277 "unsupported include identifier '{}'; only string literal includes are supported",
278 text
279 )));
280 }
281 _ => {}
282 }
283 }
284 Err(crate::error::ParseError::UnexpectedNode(
285 "missing include path".to_string(),
286 ))
287}
288
289pub fn normalize_source_for_tree_sitter(text: &str) -> std::borrow::Cow<'_, str> {
290 normalize::source(text)
291}
292
293#[cfg(test)]
294mod tests;