1use std::{
4 collections::{HashMap, HashSet},
5 ops::Range,
6 path::{Path, PathBuf},
7 sync::Arc,
8};
9
10use super::source::{Source, SourceLoadError, SourceLoader, SourceResolver};
11use super::{FileId, ParseTree, Parser, SourceList, SourceMap};
12use crate::{
13 token_tree::{
14 typed::{self, AstNode as _},
15 AstSink,
16 },
17 Diagnostic, DiagnosticSet, GlyphMap, Node,
18};
19
20const MAX_INCLUDE_DEPTH: usize = 50;
21
22#[derive(Debug)]
56pub(crate) struct ParseContext {
57 root_id: FileId,
58 sources: Arc<SourceList>,
59 parsed_files: HashMap<FileId, (Node, Vec<Diagnostic>)>,
60 graph: IncludeGraph,
61}
62
63#[derive(Clone, Debug, Default)]
69struct IncludeGraph {
70 nodes: HashMap<FileId, Vec<(FileId, Range<usize>)>>,
72}
73
74pub struct IncludeStatement(pub(crate) typed::Include);
76
77struct IncludeError {
78 file: FileId,
79 statement_idx: usize,
81 range: Range<usize>,
82 kind: IncludeErrorKind,
83}
84
85enum IncludeErrorKind {
86 Cycle,
87 ToDeep,
88}
89
90impl IncludeStatement {
91 fn path(&self) -> &str {
95 &self.0.path().text
96 }
97
98 fn stmt_range(&self) -> Range<usize> {
100 self.0.range()
101 }
102
103 fn path_range(&self) -> Range<usize> {
105 self.0.path().range()
106 }
107}
108
109impl ParseContext {
110 pub(crate) fn parse(
120 path: PathBuf,
121 glyph_map: Option<&GlyphMap>,
122 resolver: Box<dyn SourceResolver>,
123 ) -> Result<Self, SourceLoadError> {
124 let mut sources = SourceLoader::new(resolver);
125 let root_id = sources.source_for_path(&path, None)?;
126 let mut queue = vec![root_id];
127 let mut parsed_files = HashMap::new();
128 let mut includes = IncludeGraph::default();
129
130 while let Some(id) = queue.pop() {
131 if parsed_files.contains_key(&id) {
133 continue;
134 }
135 let source = sources.get(&id).unwrap();
136 let (node, mut errors, include_stmts) = parse_src(source, glyph_map);
137 errors.iter_mut().for_each(|e| e.message.file = id);
138
139 parsed_files.insert(source.id(), (node, errors));
140 if include_stmts.is_empty() {
141 continue;
142 }
143
144 let source_id = source.id();
146
147 for include in &include_stmts {
148 match sources.source_for_path(Path::new(include.path()), Some(source_id)) {
149 Ok(included_id) => {
150 includes.add_edge(id, (included_id, include.stmt_range()));
151 queue.push(included_id);
152 }
153 Err(e) => {
154 let range = include.path_range();
155 parsed_files.get_mut(&id).unwrap().1.push(Diagnostic::error(
156 id,
157 range,
158 e.to_string(),
159 ));
160 }
161 }
162 }
163 }
164
165 Ok(ParseContext {
166 root_id,
167 sources: sources.into_inner(),
168 parsed_files,
169 graph: includes,
170 })
171 }
172
173 pub(crate) fn root_id(&self) -> FileId {
174 self.root_id
175 }
176
177 pub(crate) fn generate_parse_tree(self) -> (ParseTree, DiagnosticSet) {
181 let mut all_errors = self
182 .parsed_files
183 .iter()
184 .flat_map(|(_, (_, errs))| errs.iter())
185 .cloned()
186 .collect::<Vec<_>>();
187 let include_errors = self.graph.validate(self.root_id());
188 for IncludeError {
190 file, range, kind, ..
191 } in &include_errors
192 {
193 let message = match kind {
195 IncludeErrorKind::Cycle => "cyclical include statement",
196 IncludeErrorKind::ToDeep => "exceded maximum include depth",
197 };
198 all_errors.push(Diagnostic::error(*file, range.clone(), message));
199 }
200
201 let mut map = SourceMap::default();
202 let mut root = self.generate_recurse(self.root_id(), &include_errors, &mut map, 0);
203 let needs_update_positions = self.parsed_files.len() > 1;
204 drop(self.parsed_files);
207 if needs_update_positions {
208 root.update_positions_from_root();
209 }
210
211 let diagnostics = DiagnosticSet {
212 messages: all_errors,
213 sources: self.sources.clone(),
214 max_to_print: usize::MAX,
215 };
216
217 (
218 ParseTree {
219 root,
220 map: Arc::new(map),
221 sources: self.sources,
222 },
223 diagnostics,
224 )
225 }
226
227 fn generate_recurse(
229 &self,
230 id: FileId,
231 skip: &[IncludeError],
232 source_map: &mut SourceMap,
233 offset: usize,
234 ) -> Node {
235 let this_node = self.parsed_files[&id].0.clone();
236 let self_len = this_node.text_len();
237 let mut self_pos = 0;
238 let mut global_pos = offset;
239 let this_node = match self.graph.includes_for_file(id) {
240 Some(includes) => {
241 let mut edits = Vec::with_capacity(includes.len());
242
243 for (i, (child_id, stmt)) in includes.iter().enumerate() {
244 if skip
245 .iter()
246 .any(|err| err.file == id && err.statement_idx == i)
247 {
248 continue;
249 }
250 let pre_len = stmt.start - self_pos;
252 let pre_range = global_pos..global_pos + pre_len;
253 source_map.add_entry(pre_range, (id, self_pos));
254 self_pos = stmt.end;
255 global_pos += pre_len;
256 let child_node = self.generate_recurse(*child_id, skip, source_map, global_pos);
257 global_pos += child_node.text_len();
258 edits.push((stmt.clone(), child_node));
259 }
260 this_node.edit(edits, true)
261 }
262 None => this_node,
263 };
264 let remain_len = self_len - self_pos;
266 let remaining_range = global_pos..global_pos + remain_len;
267 source_map.add_entry(remaining_range, (id, self_pos));
268 this_node
269 }
270}
271
272impl IncludeGraph {
273 fn add_edge(&mut self, from: FileId, to: (FileId, Range<usize>)) {
274 self.nodes.entry(from).or_default().push(to);
275 }
276
277 fn includes_for_file(&self, file: FileId) -> Option<&[(FileId, Range<usize>)]> {
278 self.nodes.get(&file).map(|f| f.as_slice())
279 }
280
281 fn validate(&self, root: FileId) -> Vec<IncludeError> {
287 let edges = match self.nodes.get(&root) {
288 None => return Vec::new(),
289 Some(edges) => edges,
290 };
291
292 let mut stack = vec![(root, edges, 0_usize)];
293 let mut seen = HashSet::new();
294 let mut bad_edges = Vec::new();
295
296 while let Some((node, edges, cur_edge)) = stack.pop() {
297 if let Some((child, stmt)) = edges.get(cur_edge) {
298 stack.push((node, edges, cur_edge + 1));
300 if stack.len() >= MAX_INCLUDE_DEPTH - 1 {
301 bad_edges.push(IncludeError {
302 file: node,
303 statement_idx: cur_edge,
304 range: stmt.clone(),
305 kind: IncludeErrorKind::ToDeep,
306 });
307 continue;
308 }
309
310 if seen.insert(*child) {
312 if let Some(child_edges) = self.nodes.get(child) {
313 stack.push((*child, child_edges, 0));
314 }
315 } else if stack.iter().any(|(ancestor, _, _)| ancestor == child) {
316 bad_edges.push(IncludeError {
318 file: node,
319 statement_idx: cur_edge,
320 range: stmt.clone(),
321 kind: IncludeErrorKind::Cycle,
322 });
323 }
324 }
325 }
326 bad_edges
327 }
328}
329
330pub(crate) fn parse_src(
332 src: &Source,
333 glyph_map: Option<&GlyphMap>,
334) -> (Node, Vec<Diagnostic>, Vec<IncludeStatement>) {
335 let mut sink = AstSink::new(src.text(), src.id(), glyph_map);
336 {
337 let mut parser = Parser::new(src.text(), &mut sink);
338 super::grammar::root(&mut parser);
339 }
340 sink.finish()
341}
342
343#[cfg(test)]
344mod tests {
345 use super::*;
346 use crate::{
347 token_tree::{typed, TreeBuilder},
348 Kind,
349 };
350
351 fn make_ids<const N: usize>() -> [FileId; N] {
352 let mut result = [FileId::CURRENT_FILE; N];
353 result.iter_mut().for_each(|id| *id = FileId::next());
354 result
355 }
356
357 #[test]
359 fn cycle_detection() {
360 let [a, b, c, d] = make_ids();
361 let statement = {
362 let mut builder = TreeBuilder::default();
363 builder.start_node(Kind::IncludeNode);
364 builder.token(Kind::IncludeKw, "include");
365 builder.token(Kind::LParen, "(");
366 builder.token(Kind::Path, "file.fea");
367 builder.token(Kind::LParen, ")");
368 builder.token(Kind::Semi, ";");
369 builder.finish_node(false, None);
370 builder.finish()
371 };
372 let statement = typed::Include::cast(&statement.into()).unwrap();
373 let mut graph = IncludeGraph::default();
374 graph.add_edge(a, (b, statement.range()));
375 graph.add_edge(b, (c, statement.range()));
376 graph.add_edge(c, (d, statement.range()));
377 graph.add_edge(d, (b, statement.range()));
378
379 let result = graph.validate(a);
380 assert_eq!(result[0].file, d);
381 assert_eq!(result[0].range, 0..18);
382 }
383
384 #[test]
385 fn skip_cycle_in_build() {
386 let parse = ParseContext::parse(
387 "a".into(),
388 None,
389 Box::new(|path: &Path| match path.to_str().unwrap() {
390 "a" => Ok("include(bb);".into()),
391 "bb" => Ok("include(a);".into()),
392 _ => Err(SourceLoadError::new(
393 path.to_owned(),
394 std::io::Error::new(std::io::ErrorKind::NotFound, "oh no"),
395 )),
396 }),
397 )
398 .unwrap();
399 let (resolved, errs) = parse.generate_parse_tree();
400 assert_eq!(errs.len(), 1);
401 assert_eq!(resolved.root.text_len(), "include(bb);".len());
402 }
403
404 #[test]
405 fn assembly_basic() {
406 let file_a = "\
407 include(b);\n\
408 # hmm\n\
409 include(c);";
410 let file_b = "languagesystem dflt DFLT;\n";
411 let file_c = "feature kern {\n pos a b 20;\n } kern;";
412
413 let b_len = file_b.len();
414 let c_len = file_c.len();
415
416 let parse = ParseContext::parse(
417 "file_a".into(),
418 None,
419 Box::new(|path: &Path| match path.to_str().unwrap() {
420 "file_a" => Ok(file_a.into()),
421 "b" => Ok(file_b.into()),
422 "c" => Ok(file_c.into()),
423 _ => Err(SourceLoadError::new(
424 path.into(),
425 std::io::Error::new(std::io::ErrorKind::NotFound, "oh no"),
426 )),
427 }),
428 )
429 .unwrap();
430
431 let a_id = parse.sources.id_for_path("file_a").unwrap();
432 let b_id = parse.sources.id_for_path("b").unwrap();
433 let c_id = parse.sources.id_for_path("c").unwrap();
434
435 let (resolved, errs) = parse.generate_parse_tree();
436 assert!(errs.is_empty(), "{errs:?}");
437 let top_level_nodes = resolved
438 .root
439 .iter_children()
440 .filter_map(|n| n.as_node())
441 .collect::<Vec<_>>();
442 let inter_node_len = "\n# hmm\n".len();
443 assert_eq!(top_level_nodes.len(), 2);
444 assert_eq!(top_level_nodes[0].kind(), Kind::LanguageSystemNode);
445 assert_eq!(top_level_nodes[0].range(), 0..b_len - 1); let node_2_start = b_len + inter_node_len;
447 assert_eq!(
448 top_level_nodes[1].range(),
449 node_2_start..node_2_start + c_len,
450 );
451 assert_eq!(top_level_nodes[1].kind(), Kind::FeatureNode);
452
453 assert_eq!(resolved.map.resolve_range(10..15), (b_id, 10..15));
455 assert_eq!(resolved.map.resolve_range(29..33), (a_id, 14..18));
456 assert_eq!(resolved.map.resolve_range(49..52), (c_id, 16..19));
457 }
458}