1use std::{fmt::Write, io::Write as _};
2
3use std::{ops::Range, sync::Arc};
4
5use smol_str::SmolStr;
6
7use crate::parse::{FileId, IncludeStatement};
8use crate::{GlyphMap, Level, diagnostic::Diagnostic};
9
10use self::cursor::Cursor;
11use typed::AstNode as _;
12
13mod cursor;
14mod edit;
15mod rewrite;
16mod stack;
17mod token;
18pub mod typed;
19
20use rewrite::ReparseCtx;
21pub use token::Kind;
22
23#[derive(PartialEq, Eq, Clone, PartialOrd, Ord)]
27#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
28pub struct Node {
29 kind: Kind,
31
32 abs_pos: u32,
36 text_len: u32,
38 pub error: bool,
42 children: Arc<Vec<NodeOrToken>>,
45}
46
47#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
49#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
50pub struct Token {
51 pub kind: Kind,
53 abs_pos: u32,
55 pub text: SmolStr,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
61#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
62pub enum NodeOrToken {
63 Node(Node),
65 Token(Token),
67}
68
69#[derive(Clone, Debug, Default)]
70pub(crate) struct TreeBuilder {
71 parents: Vec<(Kind, usize)>,
75 children: Vec<NodeOrToken>,
76}
77
78pub(crate) struct AstSink<'a> {
80 text: &'a str,
81 file_id: FileId,
82 text_pos: usize,
83 builder: TreeBuilder,
84 reparse_buf: Vec<NodeOrToken>,
86 glyph_map: Option<&'a GlyphMap>,
87 errors: Vec<Diagnostic>,
88 include_statement_count: usize,
89 cur_node_contains_error: bool,
90}
91
92#[derive(Default)]
98pub struct ChildIter<'a>(Option<Cursor<'a>>);
99
100impl<'a> AstSink<'a> {
101 pub fn new(text: &'a str, file_id: FileId, glyph_map: Option<&'a GlyphMap>) -> Self {
102 AstSink {
103 file_id,
104 text,
105 text_pos: 0,
106 builder: TreeBuilder::default(),
107 glyph_map,
108 errors: Vec::new(),
109 cur_node_contains_error: false,
110 include_statement_count: 0,
111 reparse_buf: Default::default(),
112 }
113 }
114
115 pub(crate) fn token(&mut self, kind: Kind, len: usize) {
116 let token_text = &self.text[self.text_pos..self.text_pos + len];
117 let to_add = self.validate_token(kind, token_text);
118 self.builder.push_raw(to_add);
119 self.text_pos += len;
120 }
121
122 pub(crate) fn start_node(&mut self, kind: Kind) {
123 self.builder.start_node(kind);
124 }
125
126 pub(crate) fn finish_node(&mut self, kind: Option<Kind>) {
127 let cur_kind = kind
128 .or_else(|| self.builder.parents.last().map(|x| x.0))
129 .unwrap();
130 let kind = self.maybe_rewrite_current_node(cur_kind).or(kind);
131 self.builder.finish_node(self.cur_node_contains_error, kind);
132 self.cur_node_contains_error = false;
133 if self.builder.children.last().map(|n| n.kind()) == Some(Kind::IncludeNode) {
135 self.include_statement_count += 1;
136 }
137 }
138
139 pub(crate) fn current_node_has_error(&self) -> bool {
140 self.cur_node_contains_error
141 }
142
143 pub(crate) fn error(&mut self, mut error: Diagnostic) {
144 let is_hard_error = error.level == Level::Error;
145 error.message.file = self.file_id;
146 self.errors.push(error);
147 self.cur_node_contains_error = is_hard_error;
148 }
149
150 pub fn finish(self) -> (Node, Vec<Diagnostic>, Vec<IncludeStatement>) {
151 let mut node = self.builder.finish();
152 node.update_positions_from_root();
153 let mut includes = Vec::new();
154 if self.include_statement_count > 0 {
155 node.find_include_nodes(&mut includes, self.include_statement_count);
156 }
157 (node, self.errors, includes)
158 }
159
160 #[cfg(test)]
161 pub fn errors(&self) -> &[Diagnostic] {
162 &self.errors
163 }
164
165 fn validate_token(&mut self, kind: Kind, text: &str) -> NodeOrToken {
170 if kind == Kind::GlyphNameOrRange
171 && let Some(map) = self.glyph_map
172 {
173 if map.contains(text) {
174 return Token::new(Kind::GlyphName, text.into()).into();
175 }
176 match try_split_range(text, map) {
177 Ok(node) => return node.into(),
178 Err(message) => {
179 let range = self.text_pos..self.text_pos + text.len();
180 self.error(Diagnostic::error(FileId::CURRENT_FILE, range, message));
181 }
182 }
183 }
184 Token::new(kind, text.into()).into()
185 }
186
187 fn maybe_rewrite_current_node(&mut self, cur_kind: Kind) -> Option<Kind> {
201 match cur_kind {
202 _ if self.cur_node_contains_error => None,
203 Kind::GsubNodeNeedsRewrite => {
204 Some(self.rewrite_current_node(rewrite::reparse_contextual_sub_rule))
205 }
206 Kind::GposNodeNeedsRewrite => {
207 Some(self.rewrite_current_node(rewrite::reparse_contextual_pos_rule))
208 }
209 _ => None,
210 }
211 }
212
213 fn rewrite_current_node(&mut self, rewrite_fn: impl FnOnce(&mut ReparseCtx) -> Kind) -> Kind {
214 assert!(self.reparse_buf.is_empty());
215 self.builder.move_current_children(&mut self.reparse_buf);
216 let mut buf = std::mem::take(&mut self.reparse_buf);
218 let items_start_offset: usize = buf.iter().map(NodeOrToken::text_len).sum();
219
220 let mut reparse_ctx = ReparseCtx {
221 in_buf: &mut buf,
222 text_pos: self.text_pos - items_start_offset,
223 sink: self,
224 };
225 let new_kind = rewrite_fn(&mut reparse_ctx);
226 assert!(
228 reparse_ctx.in_buf.is_empty(),
229 "rewrite finished with unhandled items"
230 );
231 buf.clear();
232 std::mem::swap(&mut self.reparse_buf, &mut buf);
233 new_kind
234 }
235
236 fn push_raw(&mut self, n: NodeOrToken) {
237 self.builder.push_raw(n);
238 }
239}
240
241impl Node {
242 fn new(kind: Kind, children: Vec<NodeOrToken>, error: bool) -> Self {
243 let text_len = children.iter().map(|x| x.text_len() as u32).sum();
244 Node {
245 kind,
246 text_len,
247 abs_pos: 0,
248 children: children.into(),
249 error,
250 }
251 }
252
253 pub(crate) fn update_positions_from_root(&mut self) {
261 self.update_positions_recurse(0)
262 }
263
264 fn update_positions_recurse(&mut self, mut pos: usize) {
265 self.abs_pos = pos as _;
266 let children = Arc::make_mut(&mut self.children);
267
268 for child in children {
269 child.update_positions(pos);
270 pos += child.text_len();
271 }
272 }
273
274 pub(crate) fn cursor(&self) -> Cursor<'_> {
276 Cursor::new(self)
277 }
278
279 pub fn iter_tokens(&self) -> impl Iterator<Item = &Token> {
281 let mut cursor = self.cursor();
282 std::iter::from_fn(move || cursor.next_token())
283 }
284
285 pub fn iter_children(&self) -> ChildIter<'_> {
287 ChildIter(Some(self.cursor()))
288 }
289
290 pub fn kind(&self) -> Kind {
292 self.kind
293 }
294
295 pub fn text_len(&self) -> usize {
297 self.text_len as usize
298 }
299
300 pub fn range(&self) -> Range<usize> {
304 let start = self.abs_pos as usize;
305 start..start + (self.text_len as usize)
306 }
307
308 pub(crate) fn edit(&self, edits: Vec<(Range<usize>, Node)>, skip_parent: bool) -> Node {
316 edit::apply_edits(self, edits, skip_parent)
317 }
318
319 fn find_include_nodes(&self, collect: &mut Vec<IncludeStatement>, num: usize) {
320 for item in self.iter_children() {
321 if let Some(node) = item.as_node() {
322 if let Some(include) = typed::Include::cast(item) {
323 collect.push(IncludeStatement {
324 stmt: include,
325 scope: self.kind,
326 });
327 if collect.len() == num {
328 return;
329 }
330 } else {
331 node.find_include_nodes(collect, num);
332 }
333 }
334 }
335 }
336
337 #[doc(hidden)]
338 pub fn debug_print_structure(&self, include_tokens: bool) {
340 let mut cursor = self.cursor();
341 while let Some(thing) = cursor.current() {
342 match thing {
343 NodeOrToken::Node(node) => {
344 let depth = cursor.depth();
345 let _ = writeln!(
346 std::io::stderr(),
347 "{}{} ({}..{})",
348 &crate::util::SPACES[..depth * 2],
349 node.kind,
350 cursor.pos(),
351 cursor.pos() + node.text_len()
352 );
353 }
354 NodeOrToken::Token(t) if include_tokens => eprint!("{}", t.as_str()),
355 _ => (),
356 }
357 cursor.advance();
358 }
359 }
360
361 #[doc(hidden)]
362 pub fn simple_parse_tree(&self) -> String {
363 let mut result = String::new();
364 self.parse_tree_impl(0, &mut result).unwrap();
365 result
366 }
367
368 fn parse_tree_impl(&self, depth: usize, buf: &mut String) -> std::fmt::Result {
369 use crate::util::SPACES;
370 let mut pos = self.abs_pos;
371 writeln!(
372 buf,
373 "{}{}@[{}; {})",
374 &SPACES[..depth * 2],
375 self.kind,
376 pos,
377 pos + self.text_len
378 )?;
379 let depth = depth + 1;
380 for child in self.iter_children() {
381 match child {
382 NodeOrToken::Token(Token { kind, text, .. }) => {
383 let spaces = &SPACES[..depth * 2];
384 write!(buf, "{spaces}{kind}@{pos}")?;
385 if kind.is_trivia() {
386 writeln!(buf, " \"{}\"", text.escape_debug())?;
387 } else {
388 writeln!(buf, " \"{text}\"")?;
389 }
390 pos += text.len() as u32;
391 }
392 NodeOrToken::Node(node) => {
393 node.parse_tree_impl(depth + 1, buf)?;
394 pos += node.text_len;
395 }
396 }
397 }
398 Ok(())
399 }
400}
401
402impl<'a> Iterator for ChildIter<'a> {
403 type Item = &'a NodeOrToken;
404
405 fn next(&mut self) -> Option<Self::Item> {
406 let current = self.0.as_ref()?.current();
407 self.0.as_mut()?.step_over();
408 current
409 }
410}
411
412impl TreeBuilder {
413 pub(crate) fn start_node(&mut self, kind: Kind) {
414 let len = self.children.len();
415 self.parents.push((kind, len));
416 }
417
418 pub(crate) fn token(&mut self, kind: Kind, text: impl Into<SmolStr>) {
419 let token = Token::new(kind, text.into());
420 self.push_raw(token.into());
421 }
422
423 fn push_raw(&mut self, item: NodeOrToken) {
424 self.children.push(item)
425 }
426
427 fn move_current_children(&mut self, to_buf: &mut Vec<NodeOrToken>) {
431 if let Some(idx) = self.parents.last().map(|(_, idx)| idx).copied() {
432 to_buf.extend(self.children.drain(idx..));
433 }
434 }
435
436 pub(crate) fn finish_node(&mut self, error: bool, new_kind: Option<Kind>) {
437 let (kind, first_child) = self.parents.pop().unwrap();
438 let kind = new_kind.unwrap_or(kind);
439 let node = Node::new(kind, self.children.split_off(first_child), error);
440 self.push_raw(node.into());
441 }
442
443 pub(crate) fn finish(mut self) -> Node {
444 assert_eq!(self.children.len(), 1);
445 self.children.pop().unwrap().into_node().unwrap()
446 }
447}
448
449impl NodeOrToken {
450 fn update_positions(&mut self, pos: usize) {
451 match self {
452 NodeOrToken::Token(t) => t.abs_pos = pos as _,
453 NodeOrToken::Node(n) => n.update_positions_recurse(pos),
454 }
455 }
456
457 pub fn is_token(&self) -> bool {
459 matches!(self, NodeOrToken::Token(_))
460 }
461
462 pub fn token_text(&self) -> Option<&str> {
464 self.as_token().map(Token::as_str)
465 }
466
467 pub fn kind(&self) -> Kind {
469 match self {
470 NodeOrToken::Node(n) => n.kind,
471 NodeOrToken::Token(t) => t.kind,
472 }
473 }
474
475 pub fn is_glyph_or_glyph_class(&self) -> bool {
477 matches!(
478 self.kind(),
479 Kind::GlyphName | Kind::Cid | Kind::GlyphClass | Kind::NamedGlyphClass
480 )
481 }
482
483 pub fn range(&self) -> Range<usize> {
487 match self {
488 NodeOrToken::Token(t) => t.range(),
489 NodeOrToken::Node(n) => n.range(),
490 }
491 }
492
493 pub fn text_len(&self) -> usize {
495 match self {
496 NodeOrToken::Node(n) => n.text_len as usize,
497 NodeOrToken::Token(t) => t.text.len(),
498 }
499 }
500
501 pub fn into_node(self) -> Option<Node> {
503 match self {
504 NodeOrToken::Node(node) => Some(node),
505 NodeOrToken::Token(_) => None,
506 }
507 }
508
509 pub fn as_node(&self) -> Option<&Node> {
511 match self {
512 NodeOrToken::Node(node) => Some(node),
513 NodeOrToken::Token(_) => None,
514 }
515 }
516
517 pub fn as_token(&self) -> Option<&Token> {
519 match self {
520 NodeOrToken::Node(_) => None,
521 NodeOrToken::Token(token) => Some(token),
522 }
523 }
524}
525
526impl From<Node> for NodeOrToken {
527 fn from(src: Node) -> NodeOrToken {
528 NodeOrToken::Node(src)
529 }
530}
531
532impl From<Token> for NodeOrToken {
533 fn from(src: Token) -> NodeOrToken {
534 NodeOrToken::Token(src)
535 }
536}
537
538impl Token {
539 fn new(kind: Kind, text: SmolStr) -> Self {
540 Token {
541 kind,
542 text,
543 abs_pos: 0,
544 }
545 }
546
547 pub fn as_str(&self) -> &str {
549 &self.text
550 }
551
552 pub fn range(&self) -> Range<usize> {
554 self.abs_pos as usize..self.abs_pos as usize + self.text.len()
555 }
556}
557
558fn try_split_range(text: &str, glyph_map: &GlyphMap) -> Result<Node, String> {
560 let mut solution = None;
561
562 for idx in text
564 .bytes()
565 .enumerate()
566 .filter_map(|(idx, b)| (b == b'-').then_some(idx))
567 {
568 let (head, tail) = text.split_at(idx);
569 if glyph_map.contains(head)
570 && glyph_map.contains(tail.trim_start_matches('-'))
571 && let Some(prev_idx) = solution.replace(idx)
572 {
573 let (head1, tail1) = text.split_at(prev_idx);
574 let (head2, tail2) = text.split_at(idx);
575 let message = format!(
576 "the name '{}' contains multiple possible glyph ranges ({} to {} and {} to {}). Please insert spaces around the '-' to clarify your intent.",
577 text,
578 head1,
579 tail1.trim_end_matches('-'),
580 head2,
581 tail2.trim_end_matches('-')
582 );
583 return Err(message);
584 }
585 }
586
587 solution
589 .map(|idx| {
590 let mut builder = TreeBuilder::default();
591 builder.start_node(Kind::GlyphRange);
592 let (head, tail) = text.split_at(idx);
593 builder.token(Kind::GlyphName, head);
594 builder.token(Kind::Hyphen, "-");
595 builder.token(Kind::GlyphName, tail.trim_start_matches('-'));
596 builder.finish_node(false, None);
597 builder.finish()
598 })
599 .ok_or_else(|| format!("'{text}' is neither a known glyph or a range of known glyphs",))
600}
601
602impl Node {
603 fn debug_impl(&self, f: &mut std::fmt::Formatter, depth: usize) -> std::fmt::Result {
604 use crate::util::SPACES;
605
606 let ws = &SPACES[..depth * 2];
607 write!(
608 f,
609 "\n{ws}{}: abs {} len {} children {}",
610 self.kind,
611 self.abs_pos,
612 self.text_len,
613 self.children.len()
614 )?;
615 let ws = &SPACES[..(depth + 1) * 2];
616 for child in self.iter_children() {
617 match child {
618 NodeOrToken::Token(t) => write!(f, "\n{}'{}' {}", ws, t.text, t.kind)?,
619 NodeOrToken::Node(n) => n.debug_impl(f, depth + 1)?,
620 }
621 }
622 Ok(())
623 }
624}
625
626impl std::fmt::Debug for Node {
627 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
628 self.debug_impl(f, 0)
629 }
630}
631
632#[cfg(test)]
633mod tests {
634
635 use super::*;
636 static SAMPLE_FEA: &str = include_str!("../test-data/fonttools-tests/mini.fea");
637
638 #[test]
639 fn token_iter() {
640 let (ast, _errs) = crate::parse::parse_string(SAMPLE_FEA);
641 let reconstruct = ast
642 .root()
643 .iter_tokens()
644 .map(Token::as_str)
645 .collect::<String>();
646 crate::assert_eq_str!(SAMPLE_FEA, reconstruct);
647 }
648}