tex_engine/tex/
token.rs

1/*!
2    [`Token`]s are the basic "characters" a TeX Engine operates on.
3
4    There are two kinds of [`Token`]s:
5    - A *Character Token* is a pair of a character (of type `Char:`[`CharType`]) and its [`CategoryCode`],
6        represented by [`BaseToken::Char`].
7    - A *Control Sequence Token* is a [`TeXStr`] (a string of characters) and is represented by [`BaseToken::CS`],
8        usually generated by `\` followed by a string of letters (or a single non-letter character).
9 */
10
11use std::fmt::{Debug, Display, Formatter};
12use crate::tex::catcodes::CategoryCode;
13use crate::tex::commands::Command;
14use crate::utils::Ptr;
15use crate::utils::strings::{CharType, TeXStr};
16
17/// A [`BaseToken`] bundles the actually TeX-relevant data of a [`Token`], which is cloned often
18/// and required by all [`Token`] implementations
19#[derive(Clone,PartialEq)]
20pub enum BaseToken<C:CharType> {
21    /// A control sequence token with the provided name
22    CS(TeXStr<C>),
23    /// An active character token with the provided character
24    Char(C, CategoryCode)
25}
26impl<C:CharType> Display for BaseToken<C> {
27    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
28        match self {
29            BaseToken::Char(c, _) => write!(f, "{}", c.char_str()),
30            BaseToken::CS(n) => write!(f, "\\{}", n)
31        }
32    }
33}
34impl<C:CharType> Debug for BaseToken<C> {
35    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
36        use ansi_term::Colour::*;
37        use CategoryCode::*;
38        match self {
39            BaseToken::Char(c, cc) => write!(f, "'{}'", match cc {
40                BeginGroup => Green.bold().paint(c.char_str()),
41                EndGroup => Green.paint(c.char_str()),
42                Active => Red.paint(c.char_str()),
43                Space => ansi_term::Style::new().paint(c.char_str()),
44                Parameter => Yellow.paint(c.char_str()),
45                AlignmentTab => Blue.paint(c.char_str()),
46                MathShift => Purple.paint(c.char_str()),
47                Subscript => Cyan.paint(c.char_str()),
48                Superscript => Cyan.bold().paint(c.char_str()),
49                Letter => White.bold().paint(c.char_str()),
50                Comment => White.paint(c.char_str()),
51                _ => ansi_term::Style::new().paint(c.char_str())
52            }),
53            BaseToken::CS(n) => write!(f, "{}{}", Red.paint("\\"), Red.paint(n.to_string()))
54        }
55    }
56}
57
58pub trait Token:PartialEq+Clone+Display+Debug+'static{
59    type Char:CharType;
60    /// The actual TeX-relevant data of the [`Token`]
61    fn base(&self) -> &BaseToken<Self::Char>;
62    /// The [`CategoryCode`] of the [`Token`]
63    fn catcode(&self) -> CategoryCode {
64        match self.base() {
65            BaseToken::Char(_, cat) => *cat,
66            BaseToken::CS(_) => CategoryCode::Escape
67        }
68    }
69    /// Construct a new [`Token`] from a [`BaseToken`]
70    fn new(base:BaseToken<Self::Char>,sourceref:Option<(Ptr<String>,(usize,usize),(usize,usize))>) -> Self;
71    /// Clone this [`Token`] with a new [`SourceReference`] of type [`SourceReference::Expansion`]
72    /// (if the [`Token`] is a [`TokenWithSourceref`]).
73    fn with_ref(&self, token:&Ptr<Self>, cmd:&Ptr<Command<Self>>) -> Self;
74    fn sourceref_trace(&self) -> Option<String>;
75
76    /// Parse a string into a list of [`Token`]s of [`CategoryCode::Other`]
77    fn from_str(s:String) -> Vec<Self>;
78}
79
80impl<C:CharType> Token for BaseToken<C> {
81    type Char = C;
82    fn base(&self) -> &BaseToken<C> { self }
83    fn new(base:BaseToken<C>,sourceref:Option<(Ptr<String>,(usize,usize),(usize,usize))>) -> Self { base }
84    fn with_ref(&self, _: &Ptr<Self>, _: &Ptr<Command<Self>>) -> Self {
85        self.clone()
86    }
87    fn sourceref_trace(&self) -> Option<String> { None }
88    fn from_str(s:String) -> Vec<Self> {
89        let mut ret = Vec::with_capacity(s.len());
90        let mut iter = s.as_bytes().to_vec().into_iter();
91        while let Some(c) = C::from_u8_iter(&mut iter) {
92            ret.push(BaseToken::Char(c, CategoryCode::Other))
93        }
94        ret
95    }
96}
97
98/// A list of [`Token`]s
99pub struct TokenList<T:Token>(pub Vec<T>);
100impl<T:Token> Into<TokenList<T>> for Vec<T> {
101    fn into(self) -> TokenList<T> { TokenList(self) }
102}
103impl<T:Token> Into<Vec<T>> for TokenList<T> {
104    fn into(self) -> Vec<T> { self.0 }
105}
106impl<T:Token> Display for TokenList<T> {
107    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
108        for t in &self.0 {
109            write!(f, "{}", t.base())?;
110        }
111        Ok(())
112    }
113}
114impl<T:Token> Debug for TokenList<T> {
115    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
116        for t in &self.0 {
117            write!(f, "{:?}", t.base())?;
118        }
119        Ok(())
120    }
121}
122
123/// A [`Token`] bundling a [`BaseToken`] with a [`SourceReference`].
124#[derive(Clone)]
125pub struct TokenWithSourceref<C:CharType>{
126    /// The actual TeX-relevant data of the [`Token`]
127    pub base: BaseToken<C>,
128    /// The [`SourceReference`] of the [`Token`]
129    pub sourceref: Option<SourceReference<Self>>
130}
131impl<C:CharType> Token for TokenWithSourceref<C> {
132    type Char = C;
133    fn base(&self) -> &BaseToken<C> { &self.base }
134    fn new(base:BaseToken<C>,sourceref:Option<(Ptr<String>,(usize,usize),(usize,usize))>) -> Self {
135        Self { base, sourceref: sourceref.map(|tr| SourceReference::File {
136            file: tr.0,
137            start: tr.1,
138            end: tr.2
139        }) }
140    }
141    fn with_ref(&self, token:&Ptr<Self>, cmd:&Ptr<Command<Self>>) -> Self {
142        Self {
143            base: self.base.clone(),
144            sourceref: Some(SourceReference::Expansion {token:token.clone(),cmd:cmd.clone()})
145        }
146    }
147    fn sourceref_trace(&self) -> Option<String> {
148        match &self.sourceref {
149            Some(SourceReference::File {file, start,end}) => Some(format!("File {} at {}:{} - {}:{}", file, start.0,start.1,end.0,end.1)),
150            Some(SourceReference::Expansion {token, ..}) => {
151                let mut trace = format!("Expanded from {}",token);
152                match token.sourceref_trace() {
153                    Some(s) => {
154                        trace.push_str("\n - ");
155                        trace.push_str(&s)
156                    },
157                    None => ()
158                }
159                Some(trace)
160            },
161            None => None
162        }
163    }
164    fn from_str(s: String) -> Vec<Self> {
165        BaseToken::from_str(s).into_iter().map(|b| Self { base: b, sourceref: None }).collect()
166    }
167}
168impl<C:CharType> PartialEq for TokenWithSourceref<C> {
169    fn eq(&self, other: &Self) -> bool { self.base == other.base }
170}
171impl<C:CharType> Display for TokenWithSourceref<C> {
172    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { Display::fmt(&self.base,f) }
173}
174impl<C:CharType> Debug for TokenWithSourceref<C> {
175    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
176        match self.sourceref {
177            Some(ref s) => write!(f, "{:?} at {:?}", self.base, s),
178            None => write!(f, "{:?}", self.base)
179        }
180    }
181}
182
183
184/// A [`SourceReference`] allows tracking the origin of a [`Token`]. It is either a file reference
185/// or a reference to an expansion: The result of expanding a control sequence token.
186#[derive(Clone)]
187pub enum SourceReference<T:Token> {
188    /// A reference to a file, with the file name and the start and end position of the
189    /// [`Token`] in the file.
190    File{file: Ptr<String>,start:(usize,usize),end:(usize,usize)},
191    /// A reference to an expansion, with the [`Token`] that was expanded via [`Command`].
192    Expansion{token: Ptr<T>,cmd:Ptr<Command<T>>}
193}
194impl<T:Token> Debug for SourceReference<T> {
195    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
196        match self {
197            SourceReference::File{file,start,end} => write!(f, "File {}; {:?} - {:?}", file, start, end),
198            SourceReference::Expansion{token,cmd} => write!(f, "Expansion of {} via {:?}", token, cmd)
199        }
200    }
201}