use std::{fmt::Write, io::Write as _};
use std::{ops::Range, sync::Arc};
use smol_str::SmolStr;
use crate::parse::{FileId, IncludeStatement};
use crate::{diagnostic::Diagnostic, GlyphMap, Level};
use self::cursor::Cursor;
use typed::AstNode as _;
mod cursor;
mod edit;
mod rewrite;
mod stack;
mod token;
pub mod typed;
use rewrite::ReparseCtx;
pub use token::Kind;
#[derive(PartialEq, Eq, Clone, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Node {
kind: Kind,
abs_pos: u32,
text_len: u32,
pub error: bool,
children: Arc<Vec<NodeOrToken>>,
}
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Token {
pub kind: Kind,
abs_pos: u32,
pub text: SmolStr,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum NodeOrToken {
Node(Node),
Token(Token),
}
#[derive(Clone, Debug, Default)]
pub(crate) struct TreeBuilder {
parents: Vec<(Kind, usize)>,
children: Vec<NodeOrToken>,
}
pub(crate) struct AstSink<'a> {
text: &'a str,
file_id: FileId,
text_pos: usize,
builder: TreeBuilder,
reparse_buf: Vec<NodeOrToken>,
glyph_map: Option<&'a GlyphMap>,
errors: Vec<Diagnostic>,
include_statement_count: usize,
cur_node_contains_error: bool,
}
#[derive(Default)]
pub struct ChildIter<'a>(Option<Cursor<'a>>);
impl<'a> AstSink<'a> {
pub fn new(text: &'a str, file_id: FileId, glyph_map: Option<&'a GlyphMap>) -> Self {
AstSink {
file_id,
text,
text_pos: 0,
builder: TreeBuilder::default(),
glyph_map,
errors: Vec::new(),
cur_node_contains_error: false,
include_statement_count: 0,
reparse_buf: Default::default(),
}
}
pub(crate) fn token(&mut self, kind: Kind, len: usize) {
let token_text = &self.text[self.text_pos..self.text_pos + len];
let to_add = self.validate_token(kind, token_text);
self.builder.push_raw(to_add);
self.text_pos += len;
}
pub(crate) fn start_node(&mut self, kind: Kind) {
self.builder.start_node(kind);
}
pub(crate) fn finish_node(&mut self, kind: Option<Kind>) {
let cur_kind = kind
.or_else(|| self.builder.parents.last().map(|x| x.0))
.unwrap();
let kind = self.maybe_rewrite_current_node(cur_kind).or(kind);
self.builder.finish_node(self.cur_node_contains_error, kind);
self.cur_node_contains_error = false;
if self.builder.children.last().map(|n| n.kind()) == Some(Kind::IncludeNode) {
self.include_statement_count += 1;
}
}
pub(crate) fn current_node_has_error(&self) -> bool {
self.cur_node_contains_error
}
pub(crate) fn error(&mut self, mut error: Diagnostic) {
let is_hard_error = error.level == Level::Error;
error.message.file = self.file_id;
self.errors.push(error);
self.cur_node_contains_error = is_hard_error;
}
pub fn finish(self) -> (Node, Vec<Diagnostic>, Vec<IncludeStatement>) {
let mut node = self.builder.finish();
node.update_positions_from_root();
let mut includes = Vec::new();
if self.include_statement_count > 0 {
node.find_include_nodes(&mut includes, self.include_statement_count);
}
(node, self.errors, includes)
}
#[cfg(test)]
pub fn errors(&self) -> &[Diagnostic] {
&self.errors
}
fn validate_token(&mut self, kind: Kind, text: &str) -> NodeOrToken {
if kind == Kind::GlyphNameOrRange {
if let Some(map) = self.glyph_map {
if map.contains(text) {
return Token::new(Kind::GlyphName, text.into()).into();
}
match try_split_range(text, map) {
Ok(node) => return node.into(),
Err(message) => {
let range = self.text_pos..self.text_pos + text.len();
self.error(Diagnostic::error(FileId::CURRENT_FILE, range, message));
}
}
}
}
Token::new(kind, text.into()).into()
}
fn maybe_rewrite_current_node(&mut self, cur_kind: Kind) -> Option<Kind> {
match cur_kind {
_ if self.cur_node_contains_error => None,
Kind::GsubNodeNeedsRewrite => {
Some(self.rewrite_current_node(rewrite::reparse_contextual_sub_rule))
}
Kind::GposNodeNeedsRewrite => {
Some(self.rewrite_current_node(rewrite::reparse_contextual_pos_rule))
}
_ => None,
}
}
fn rewrite_current_node(&mut self, rewrite_fn: impl FnOnce(&mut ReparseCtx) -> Kind) -> Kind {
assert!(self.reparse_buf.is_empty());
self.builder.move_current_children(&mut self.reparse_buf);
let mut buf = std::mem::take(&mut self.reparse_buf);
let items_start_offset: usize = buf.iter().map(NodeOrToken::text_len).sum();
let mut reparse_ctx = ReparseCtx {
in_buf: &mut buf,
text_pos: self.text_pos - items_start_offset,
sink: self,
};
let new_kind = rewrite_fn(&mut reparse_ctx);
assert!(
reparse_ctx.in_buf.is_empty(),
"rewrite finished with unhandled items"
);
buf.clear();
std::mem::swap(&mut self.reparse_buf, &mut buf);
new_kind
}
fn push_raw(&mut self, n: NodeOrToken) {
self.builder.push_raw(n);
}
}
impl Node {
fn new(kind: Kind, children: Vec<NodeOrToken>, error: bool) -> Self {
let text_len = children.iter().map(|x| x.text_len() as u32).sum();
Node {
kind,
text_len,
abs_pos: 0,
children: children.into(),
error,
}
}
pub(crate) fn update_positions_from_root(&mut self) {
self.update_positions_recurse(0)
}
fn update_positions_recurse(&mut self, mut pos: usize) {
self.abs_pos = pos as _;
let children = Arc::make_mut(&mut self.children);
for child in children {
child.update_positions(pos);
pos += child.text_len();
}
}
pub(crate) fn cursor(&self) -> Cursor<'_> {
Cursor::new(self)
}
pub fn iter_tokens(&self) -> impl Iterator<Item = &Token> {
let mut cursor = self.cursor();
std::iter::from_fn(move || cursor.next_token())
}
pub fn iter_children(&self) -> ChildIter<'_> {
ChildIter(Some(self.cursor()))
}
pub fn kind(&self) -> Kind {
self.kind
}
pub fn text_len(&self) -> usize {
self.text_len as usize
}
pub fn range(&self) -> Range<usize> {
let start = self.abs_pos as usize;
start..start + (self.text_len as usize)
}
pub(crate) fn edit(&self, edits: Vec<(Range<usize>, Node)>, skip_parent: bool) -> Node {
edit::apply_edits(self, edits, skip_parent)
}
fn find_include_nodes(&self, collect: &mut Vec<IncludeStatement>, num: usize) {
for item in self.iter_children() {
if let Some(node) = item.as_node() {
if let Some(include) = typed::Include::cast(item) {
collect.push(IncludeStatement(include));
if collect.len() == num {
return;
}
} else {
node.find_include_nodes(collect, num);
}
}
}
}
#[doc(hidden)]
pub fn debug_print_structure(&self, include_tokens: bool) {
let mut cursor = self.cursor();
while let Some(thing) = cursor.current() {
match thing {
NodeOrToken::Node(node) => {
let depth = cursor.depth();
let _ = writeln!(
std::io::stderr(),
"{}{} ({}..{})",
&crate::util::SPACES[..depth * 2],
node.kind,
cursor.pos(),
cursor.pos() + node.text_len()
);
}
NodeOrToken::Token(t) if include_tokens => eprint!("{}", t.as_str()),
_ => (),
}
cursor.advance();
}
}
#[doc(hidden)]
pub fn simple_parse_tree(&self) -> String {
let mut result = String::new();
self.parse_tree_impl(0, &mut result).unwrap();
result
}
fn parse_tree_impl(&self, depth: usize, buf: &mut String) -> std::fmt::Result {
use crate::util::SPACES;
let mut pos = self.abs_pos;
writeln!(
buf,
"{}{}@[{}; {})",
&SPACES[..depth * 2],
self.kind,
pos,
pos + self.text_len
)?;
let depth = depth + 1;
for child in self.iter_children() {
match child {
NodeOrToken::Token(Token { kind, text, .. }) => {
let spaces = &SPACES[..depth * 2];
write!(buf, "{spaces}{kind}@{pos}")?;
if kind.is_trivia() {
writeln!(buf, " \"{}\"", text.escape_debug())?;
} else {
writeln!(buf, " \"{text}\"")?;
}
pos += text.len() as u32;
}
NodeOrToken::Node(node) => {
node.parse_tree_impl(depth + 1, buf)?;
pos += node.text_len;
}
}
}
Ok(())
}
}
impl<'a> Iterator for ChildIter<'a> {
type Item = &'a NodeOrToken;
fn next(&mut self) -> Option<Self::Item> {
let current = self.0.as_ref()?.current();
self.0.as_mut()?.step_over();
current
}
}
impl TreeBuilder {
pub(crate) fn start_node(&mut self, kind: Kind) {
let len = self.children.len();
self.parents.push((kind, len));
}
pub(crate) fn token(&mut self, kind: Kind, text: impl Into<SmolStr>) {
let token = Token::new(kind, text.into());
self.push_raw(token.into());
}
fn push_raw(&mut self, item: NodeOrToken) {
self.children.push(item)
}
fn move_current_children(&mut self, to_buf: &mut Vec<NodeOrToken>) {
if let Some(idx) = self.parents.last().map(|(_, idx)| idx).copied() {
to_buf.extend(self.children.drain(idx..));
}
}
pub(crate) fn finish_node(&mut self, error: bool, new_kind: Option<Kind>) {
let (kind, first_child) = self.parents.pop().unwrap();
let kind = new_kind.unwrap_or(kind);
let node = Node::new(kind, self.children.split_off(first_child), error);
self.push_raw(node.into());
}
pub(crate) fn finish(mut self) -> Node {
assert_eq!(self.children.len(), 1);
self.children.pop().unwrap().into_node().unwrap()
}
}
impl NodeOrToken {
fn update_positions(&mut self, pos: usize) {
match self {
NodeOrToken::Token(t) => t.abs_pos = pos as _,
NodeOrToken::Node(n) => n.update_positions_recurse(pos),
}
}
pub fn is_token(&self) -> bool {
matches!(self, NodeOrToken::Token(_))
}
pub fn token_text(&self) -> Option<&str> {
self.as_token().map(Token::as_str)
}
pub fn kind(&self) -> Kind {
match self {
NodeOrToken::Node(n) => n.kind,
NodeOrToken::Token(t) => t.kind,
}
}
pub fn is_glyph_or_glyph_class(&self) -> bool {
matches!(
self.kind(),
Kind::GlyphName | Kind::Cid | Kind::GlyphClass | Kind::NamedGlyphClass
)
}
pub fn range(&self) -> Range<usize> {
match self {
NodeOrToken::Token(t) => t.range(),
NodeOrToken::Node(n) => n.range(),
}
}
pub fn text_len(&self) -> usize {
match self {
NodeOrToken::Node(n) => n.text_len as usize,
NodeOrToken::Token(t) => t.text.len(),
}
}
pub fn into_node(self) -> Option<Node> {
match self {
NodeOrToken::Node(node) => Some(node),
NodeOrToken::Token(_) => None,
}
}
pub fn as_node(&self) -> Option<&Node> {
match self {
NodeOrToken::Node(node) => Some(node),
NodeOrToken::Token(_) => None,
}
}
pub fn as_token(&self) -> Option<&Token> {
match self {
NodeOrToken::Node(_) => None,
NodeOrToken::Token(token) => Some(token),
}
}
}
impl From<Node> for NodeOrToken {
fn from(src: Node) -> NodeOrToken {
NodeOrToken::Node(src)
}
}
impl From<Token> for NodeOrToken {
fn from(src: Token) -> NodeOrToken {
NodeOrToken::Token(src)
}
}
impl Token {
fn new(kind: Kind, text: SmolStr) -> Self {
Token {
kind,
text,
abs_pos: 0,
}
}
pub fn as_str(&self) -> &str {
&self.text
}
pub fn range(&self) -> Range<usize> {
self.abs_pos as usize..self.abs_pos as usize + self.text.len()
}
}
fn try_split_range(text: &str, glyph_map: &GlyphMap) -> Result<Node, String> {
let mut solution = None;
for idx in text
.bytes()
.enumerate()
.filter_map(|(idx, b)| (b == b'-').then_some(idx))
{
let (head, tail) = text.split_at(idx);
if glyph_map.contains(head) && glyph_map.contains(tail.trim_start_matches('-')) {
if let Some(prev_idx) = solution.replace(idx) {
let (head1, tail1) = text.split_at(prev_idx);
let (head2, tail2) = text.split_at(idx);
let message = format!("the name '{}' contains multiple possible glyph ranges ({} to {} and {} to {}). Please insert spaces around the '-' to clarify your intent.", text, head1, tail1.trim_end_matches('-'), head2, tail2.trim_end_matches('-'));
return Err(message);
}
}
}
solution
.map(|idx| {
let mut builder = TreeBuilder::default();
builder.start_node(Kind::GlyphRange);
let (head, tail) = text.split_at(idx);
builder.token(Kind::GlyphName, head);
builder.token(Kind::Hyphen, "-");
builder.token(Kind::GlyphName, tail.trim_start_matches('-'));
builder.finish_node(false, None);
builder.finish()
})
.ok_or_else(|| format!("'{text}' is neither a known glyph or a range of known glyphs",))
}
impl Node {
fn debug_impl(&self, f: &mut std::fmt::Formatter, depth: usize) -> std::fmt::Result {
use crate::util::SPACES;
let ws = &SPACES[..depth * 2];
write!(
f,
"\n{ws}{}: abs {} len {} children {}",
self.kind,
self.abs_pos,
self.text_len,
self.children.len()
)?;
let ws = &SPACES[..(depth + 1) * 2];
for child in self.iter_children() {
match child {
NodeOrToken::Token(t) => write!(f, "\n{}'{}' {}", ws, t.text, t.kind)?,
NodeOrToken::Node(n) => n.debug_impl(f, depth + 1)?,
}
}
Ok(())
}
}
impl std::fmt::Debug for Node {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
self.debug_impl(f, 0)
}
}
#[cfg(test)]
mod tests {
use super::*;
static SAMPLE_FEA: &str = include_str!("../test-data/fonttools-tests/mini.fea");
#[test]
fn token_iter() {
let (ast, _errs) = crate::parse::parse_string(SAMPLE_FEA);
let reconstruct = ast
.root()
.iter_tokens()
.map(Token::as_str)
.collect::<String>();
crate::assert_eq_str!(SAMPLE_FEA, reconstruct);
}
}