1use std::{fmt::Write, io::Write as _};
2
3use std::{ops::Range, sync::Arc};
4
5use smol_str::SmolStr;
6
7use crate::parse::{FileId, IncludeStatement};
8use crate::{diagnostic::Diagnostic, GlyphMap, Level};
9
10use self::cursor::Cursor;
11use typed::AstNode as _;
12
13mod cursor;
14mod edit;
15mod rewrite;
16mod stack;
17mod token;
18pub mod typed;
19
20use rewrite::ReparseCtx;
21pub use token::Kind;
22
23#[derive(PartialEq, Eq, Clone, PartialOrd, Ord)]
27#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
28pub struct Node {
29 kind: Kind,
31
32 abs_pos: u32,
36 text_len: u32,
38 pub error: bool,
42 children: Arc<Vec<NodeOrToken>>,
45}
46
47#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
49#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
50pub struct Token {
51 pub kind: Kind,
53 abs_pos: u32,
55 pub text: SmolStr,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
61#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
62pub enum NodeOrToken {
63 Node(Node),
65 Token(Token),
67}
68
69#[derive(Clone, Debug, Default)]
70pub(crate) struct TreeBuilder {
71 parents: Vec<(Kind, usize)>,
75 children: Vec<NodeOrToken>,
76}
77
78pub(crate) struct AstSink<'a> {
80 text: &'a str,
81 file_id: FileId,
82 text_pos: usize,
83 builder: TreeBuilder,
84 reparse_buf: Vec<NodeOrToken>,
86 glyph_map: Option<&'a GlyphMap>,
87 errors: Vec<Diagnostic>,
88 include_statement_count: usize,
89 cur_node_contains_error: bool,
90}
91
92#[derive(Default)]
98pub struct ChildIter<'a>(Option<Cursor<'a>>);
99
100impl<'a> AstSink<'a> {
101 pub fn new(text: &'a str, file_id: FileId, glyph_map: Option<&'a GlyphMap>) -> Self {
102 AstSink {
103 file_id,
104 text,
105 text_pos: 0,
106 builder: TreeBuilder::default(),
107 glyph_map,
108 errors: Vec::new(),
109 cur_node_contains_error: false,
110 include_statement_count: 0,
111 reparse_buf: Default::default(),
112 }
113 }
114
115 pub(crate) fn token(&mut self, kind: Kind, len: usize) {
116 let token_text = &self.text[self.text_pos..self.text_pos + len];
117 let to_add = self.validate_token(kind, token_text);
118 self.builder.push_raw(to_add);
119 self.text_pos += len;
120 }
121
122 pub(crate) fn start_node(&mut self, kind: Kind) {
123 self.builder.start_node(kind);
124 }
125
126 pub(crate) fn finish_node(&mut self, kind: Option<Kind>) {
127 let cur_kind = kind
128 .or_else(|| self.builder.parents.last().map(|x| x.0))
129 .unwrap();
130 let kind = self.maybe_rewrite_current_node(cur_kind).or(kind);
131 self.builder.finish_node(self.cur_node_contains_error, kind);
132 self.cur_node_contains_error = false;
133 if self.builder.children.last().map(|n| n.kind()) == Some(Kind::IncludeNode) {
135 self.include_statement_count += 1;
136 }
137 }
138
139 pub(crate) fn current_node_has_error(&self) -> bool {
140 self.cur_node_contains_error
141 }
142
143 pub(crate) fn error(&mut self, mut error: Diagnostic) {
144 let is_hard_error = error.level == Level::Error;
145 error.message.file = self.file_id;
146 self.errors.push(error);
147 self.cur_node_contains_error = is_hard_error;
148 }
149
150 pub fn finish(self) -> (Node, Vec<Diagnostic>, Vec<IncludeStatement>) {
151 let mut node = self.builder.finish();
152 node.update_positions_from_root();
153 let mut includes = Vec::new();
154 if self.include_statement_count > 0 {
155 node.find_include_nodes(&mut includes, self.include_statement_count);
156 }
157 (node, self.errors, includes)
158 }
159
160 #[cfg(test)]
161 pub fn errors(&self) -> &[Diagnostic] {
162 &self.errors
163 }
164
165 fn validate_token(&mut self, kind: Kind, text: &str) -> NodeOrToken {
170 if kind == Kind::GlyphNameOrRange {
171 if let Some(map) = self.glyph_map {
172 if map.contains(text) {
173 return Token::new(Kind::GlyphName, text.into()).into();
174 }
175 match try_split_range(text, map) {
176 Ok(node) => return node.into(),
177 Err(message) => {
178 let range = self.text_pos..self.text_pos + text.len();
179 self.error(Diagnostic::error(FileId::CURRENT_FILE, range, message));
180 }
181 }
182 }
183 }
184 Token::new(kind, text.into()).into()
185 }
186
187 fn maybe_rewrite_current_node(&mut self, cur_kind: Kind) -> Option<Kind> {
201 match cur_kind {
202 _ if self.cur_node_contains_error => None,
203 Kind::GsubNodeNeedsRewrite => {
204 Some(self.rewrite_current_node(rewrite::reparse_contextual_sub_rule))
205 }
206 Kind::GposNodeNeedsRewrite => {
207 Some(self.rewrite_current_node(rewrite::reparse_contextual_pos_rule))
208 }
209 _ => None,
210 }
211 }
212
213 fn rewrite_current_node(&mut self, rewrite_fn: impl FnOnce(&mut ReparseCtx) -> Kind) -> Kind {
214 assert!(self.reparse_buf.is_empty());
215 self.builder.move_current_children(&mut self.reparse_buf);
216 let mut buf = std::mem::take(&mut self.reparse_buf);
218 let items_start_offset: usize = buf.iter().map(NodeOrToken::text_len).sum();
219
220 let mut reparse_ctx = ReparseCtx {
221 in_buf: &mut buf,
222 text_pos: self.text_pos - items_start_offset,
223 sink: self,
224 };
225 let new_kind = rewrite_fn(&mut reparse_ctx);
226 assert!(
228 reparse_ctx.in_buf.is_empty(),
229 "rewrite finished with unhandled items"
230 );
231 buf.clear();
232 std::mem::swap(&mut self.reparse_buf, &mut buf);
233 new_kind
234 }
235
236 fn push_raw(&mut self, n: NodeOrToken) {
237 self.builder.push_raw(n);
238 }
239}
240
241impl Node {
242 fn new(kind: Kind, children: Vec<NodeOrToken>, error: bool) -> Self {
243 let text_len = children.iter().map(|x| x.text_len() as u32).sum();
244 Node {
245 kind,
246 text_len,
247 abs_pos: 0,
248 children: children.into(),
249 error,
250 }
251 }
252
253 pub(crate) fn update_positions_from_root(&mut self) {
261 self.update_positions_recurse(0)
262 }
263
264 fn update_positions_recurse(&mut self, mut pos: usize) {
265 self.abs_pos = pos as _;
266 let children = Arc::make_mut(&mut self.children);
267
268 for child in children {
269 child.update_positions(pos);
270 pos += child.text_len();
271 }
272 }
273
274 pub(crate) fn cursor(&self) -> Cursor {
276 Cursor::new(self)
277 }
278
279 pub fn iter_tokens(&self) -> impl Iterator<Item = &Token> {
281 let mut cursor = self.cursor();
282 std::iter::from_fn(move || cursor.next_token())
283 }
284
285 pub fn iter_children(&self) -> ChildIter {
287 ChildIter(Some(self.cursor()))
288 }
289
290 pub fn kind(&self) -> Kind {
292 self.kind
293 }
294
295 pub fn text_len(&self) -> usize {
297 self.text_len as usize
298 }
299
300 pub fn range(&self) -> Range<usize> {
304 let start = self.abs_pos as usize;
305 start..start + (self.text_len as usize)
306 }
307
308 pub(crate) fn edit(&self, edits: Vec<(Range<usize>, Node)>, skip_parent: bool) -> Node {
316 edit::apply_edits(self, edits, skip_parent)
317 }
318
319 fn find_include_nodes(&self, collect: &mut Vec<IncludeStatement>, num: usize) {
320 for item in self.iter_children() {
321 if let Some(node) = item.as_node() {
322 if let Some(include) = typed::Include::cast(item) {
323 collect.push(IncludeStatement(include));
324 if collect.len() == num {
325 return;
326 }
327 } else {
328 node.find_include_nodes(collect, num);
329 }
330 }
331 }
332 }
333
334 #[doc(hidden)]
335 pub fn debug_print_structure(&self, include_tokens: bool) {
337 let mut cursor = self.cursor();
338 while let Some(thing) = cursor.current() {
339 match thing {
340 NodeOrToken::Node(node) => {
341 let depth = cursor.depth();
342 let _ = writeln!(
343 std::io::stderr(),
344 "{}{} ({}..{})",
345 &crate::util::SPACES[..depth * 2],
346 node.kind,
347 cursor.pos(),
348 cursor.pos() + node.text_len()
349 );
350 }
351 NodeOrToken::Token(t) if include_tokens => eprint!("{}", t.as_str()),
352 _ => (),
353 }
354 cursor.advance();
355 }
356 }
357
358 #[doc(hidden)]
359 pub fn simple_parse_tree(&self) -> String {
360 let mut result = String::new();
361 self.parse_tree_impl(0, &mut result).unwrap();
362 result
363 }
364
365 fn parse_tree_impl(&self, depth: usize, buf: &mut String) -> std::fmt::Result {
366 use crate::util::SPACES;
367 let mut pos = self.abs_pos;
368 writeln!(
369 buf,
370 "{}{}@[{}; {})",
371 &SPACES[..depth * 2],
372 self.kind,
373 pos,
374 pos + self.text_len
375 )?;
376 let depth = depth + 1;
377 for child in self.iter_children() {
378 match child {
379 NodeOrToken::Token(Token { kind, text, .. }) => {
380 let spaces = &SPACES[..depth * 2];
381 write!(buf, "{}{}@{}", spaces, kind, pos)?;
382 if kind.is_trivia() {
383 writeln!(buf, " \"{}\"", text.escape_debug())?;
384 } else {
385 writeln!(buf, " \"{}\"", text)?;
386 }
387 pos += text.len() as u32;
388 }
389 NodeOrToken::Node(node) => {
390 node.parse_tree_impl(depth + 1, buf)?;
391 pos += node.text_len;
392 }
393 }
394 }
395 Ok(())
396 }
397}
398
399impl<'a> Iterator for ChildIter<'a> {
400 type Item = &'a NodeOrToken;
401
402 fn next(&mut self) -> Option<Self::Item> {
403 let current = self.0.as_ref()?.current();
404 self.0.as_mut()?.step_over();
405 current
406 }
407}
408
409impl TreeBuilder {
410 pub(crate) fn start_node(&mut self, kind: Kind) {
411 let len = self.children.len();
412 self.parents.push((kind, len));
413 }
414
415 pub(crate) fn token(&mut self, kind: Kind, text: impl Into<SmolStr>) {
416 let token = Token::new(kind, text.into());
417 self.push_raw(token.into());
418 }
419
420 fn push_raw(&mut self, item: NodeOrToken) {
421 self.children.push(item)
422 }
423
424 fn move_current_children(&mut self, to_buf: &mut Vec<NodeOrToken>) {
428 if let Some(idx) = self.parents.last().map(|(_, idx)| idx).copied() {
429 to_buf.extend(self.children.drain(idx..));
430 }
431 }
432
433 pub(crate) fn finish_node(&mut self, error: bool, new_kind: Option<Kind>) {
434 let (kind, first_child) = self.parents.pop().unwrap();
435 let kind = new_kind.unwrap_or(kind);
436 let node = Node::new(kind, self.children.split_off(first_child), error);
437 self.push_raw(node.into());
438 }
439
440 pub(crate) fn finish(mut self) -> Node {
441 assert_eq!(self.children.len(), 1);
442 self.children.pop().unwrap().into_node().unwrap()
443 }
444}
445
446impl NodeOrToken {
447 fn update_positions(&mut self, pos: usize) {
448 match self {
449 NodeOrToken::Token(t) => t.abs_pos = pos as _,
450 NodeOrToken::Node(n) => n.update_positions_recurse(pos),
451 }
452 }
453
454 pub fn is_token(&self) -> bool {
456 matches!(self, NodeOrToken::Token(_))
457 }
458
459 pub fn token_text(&self) -> Option<&str> {
461 self.as_token().map(Token::as_str)
462 }
463
464 pub fn kind(&self) -> Kind {
466 match self {
467 NodeOrToken::Node(n) => n.kind,
468 NodeOrToken::Token(t) => t.kind,
469 }
470 }
471
472 pub fn is_glyph_or_glyph_class(&self) -> bool {
474 matches!(
475 self.kind(),
476 Kind::GlyphName | Kind::Cid | Kind::GlyphClass | Kind::NamedGlyphClass
477 )
478 }
479
480 pub fn range(&self) -> Range<usize> {
484 match self {
485 NodeOrToken::Token(t) => t.range(),
486 NodeOrToken::Node(n) => n.range(),
487 }
488 }
489
490 pub fn text_len(&self) -> usize {
492 match self {
493 NodeOrToken::Node(n) => n.text_len as usize,
494 NodeOrToken::Token(t) => t.text.len(),
495 }
496 }
497
498 pub fn into_node(self) -> Option<Node> {
500 match self {
501 NodeOrToken::Node(node) => Some(node),
502 NodeOrToken::Token(_) => None,
503 }
504 }
505
506 pub fn as_node(&self) -> Option<&Node> {
508 match self {
509 NodeOrToken::Node(node) => Some(node),
510 NodeOrToken::Token(_) => None,
511 }
512 }
513
514 pub fn as_token(&self) -> Option<&Token> {
516 match self {
517 NodeOrToken::Node(_) => None,
518 NodeOrToken::Token(token) => Some(token),
519 }
520 }
521}
522
523impl From<Node> for NodeOrToken {
524 fn from(src: Node) -> NodeOrToken {
525 NodeOrToken::Node(src)
526 }
527}
528
529impl From<Token> for NodeOrToken {
530 fn from(src: Token) -> NodeOrToken {
531 NodeOrToken::Token(src)
532 }
533}
534
535impl Token {
536 fn new(kind: Kind, text: SmolStr) -> Self {
537 Token {
538 kind,
539 text,
540 abs_pos: 0,
541 }
542 }
543
544 pub fn as_str(&self) -> &str {
546 &self.text
547 }
548
549 pub fn range(&self) -> Range<usize> {
551 self.abs_pos as usize..self.abs_pos as usize + self.text.len()
552 }
553}
554
555fn try_split_range(text: &str, glyph_map: &GlyphMap) -> Result<Node, String> {
557 let mut solution = None;
558
559 for idx in text
561 .bytes()
562 .enumerate()
563 .filter_map(|(idx, b)| (b == b'-').then_some(idx))
564 {
565 let (head, tail) = text.split_at(idx);
566 if glyph_map.contains(head) && glyph_map.contains(tail.trim_start_matches('-')) {
567 if let Some(prev_idx) = solution.replace(idx) {
568 let (head1, tail1) = text.split_at(prev_idx);
569 let (head2, tail2) = text.split_at(idx);
570 let message = format!("the name '{}' contains multiple possible glyph ranges ({} to {} and {} to {}). Please insert spaces around the '-' to clarify your intent.", text, head1, tail1.trim_end_matches('-'), head2, tail2.trim_end_matches('-'));
571 return Err(message);
572 }
573 }
574 }
575
576 solution
578 .map(|idx| {
579 let mut builder = TreeBuilder::default();
580 builder.start_node(Kind::GlyphRange);
581 let (head, tail) = text.split_at(idx);
582 builder.token(Kind::GlyphName, head);
583 builder.token(Kind::Hyphen, "-");
584 builder.token(Kind::GlyphName, tail.trim_start_matches('-'));
585 builder.finish_node(false, None);
586 builder.finish()
587 })
588 .ok_or_else(|| {
589 format!(
590 "'{}' is neither a known glyph or a range of known glyphs",
591 text
592 )
593 })
594}
595
596impl Node {
597 fn debug_impl(&self, f: &mut std::fmt::Formatter, depth: usize) -> std::fmt::Result {
598 use crate::util::SPACES;
599
600 let ws = &SPACES[..depth * 2];
601 write!(
602 f,
603 "\n{ws}{}: abs {} len {} children {}",
604 self.kind,
605 self.abs_pos,
606 self.text_len,
607 self.children.len()
608 )?;
609 let ws = &SPACES[..(depth + 1) * 2];
610 for child in self.iter_children() {
611 match child {
612 NodeOrToken::Token(t) => write!(f, "\n{}'{}' {}", ws, t.text, t.kind)?,
613 NodeOrToken::Node(n) => n.debug_impl(f, depth + 1)?,
614 }
615 }
616 Ok(())
617 }
618}
619
620impl std::fmt::Debug for Node {
621 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
622 self.debug_impl(f, 0)
623 }
624}
625
626#[cfg(test)]
627mod tests {
628
629 use super::*;
630 static SAMPLE_FEA: &str = include_str!("../test-data/fonttools-tests/mini.fea");
631
632 #[test]
633 fn token_iter() {
634 let (ast, _errs) = crate::parse::parse_string(SAMPLE_FEA);
635 let reconstruct = ast
636 .root()
637 .iter_tokens()
638 .map(Token::as_str)
639 .collect::<String>();
640 crate::assert_eq_str!(SAMPLE_FEA, reconstruct);
641 }
642}