untex/
token.rs

1#![warn(missing_docs)]
2
3use crate::CharStream;
4use ansi_term::{Colour, Style};
5use lazy_static::lazy_static;
6use std::fmt;
7
8/// Enumerates all the possible atoms that can be found in a TeX file.
9#[derive(Debug, PartialEq, Clone)]
10pub enum TokenKind {
11    /// A commended part
12    Comment,
13    /// A linebreak, optionally followed by any number of tabulates or spaces
14    Linebreak,
15    /// Anything that could be a command (please use a space after a command to properly end it)
16    Command,
17    /// Math escaped, either with simple $ $ or double $$ $$ dollar signs
18    Math,
19    /// Anything else, that is assume to be printed out when the TeX file is compiled into PDF
20    Text,
21    /// An error occured when parsing the TeX file
22    Error, // Syntax error
23}
24
25lazy_static! {
26    pub static ref text_style: Style = Style::new();
27    pub static ref linebreak_style: Style = Style::new().on(Colour::Red);
28    pub static ref command_style: Colour = Colour::Blue;
29    pub static ref comment_style: Colour = Colour::Green;
30    pub static ref error_style: Style = Colour::Red.bold();
31    pub static ref math_style: Style = Colour::Green.bold();
32}
33
34/// A Token is ... TODO
35#[derive(PartialEq, Clone, Debug)]
36pub struct Token<'source> {
37    pub slice: &'source str,
38    pub kind: TokenKind,
39}
40
41impl<'source> Token<'source> {
42    pub fn new(slice: &'source str, kind: TokenKind) -> Self {
43        Self { slice, kind }
44    }
45}
46
47impl<'source> fmt::Display for Token<'source> {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        match self.kind {
50            TokenKind::Comment => write!(f, "{}", comment_style.paint(self.slice)),
51            TokenKind::Linebreak => write!(f, "{}", linebreak_style.paint(self.slice)),
52            TokenKind::Command => write!(f, "{}", command_style.paint(self.slice)),
53            TokenKind::Math => write!(f, "{}", math_style.paint(self.slice)),
54            TokenKind::Text => write!(f, "{}", text_style.paint(self.slice)),
55            TokenKind::Error => write!(f, "{}", error_style.paint(self.slice)),
56        }
57    }
58}
59
60/// TODO
61#[derive(Debug)]
62pub struct TokenStream<'source> {
63    char_stream: CharStream<'source>,
64    start: usize,
65    current_token_kind: TokenKind,
66}
67
68impl<'source> TokenStream<'source> {
69    pub fn new(char_stream: CharStream<'source>) -> Self {
70        Self {
71            char_stream,
72            start: 0,
73            current_token_kind: TokenKind::Error,
74        }
75    }
76
77    #[inline]
78    fn lineno(&self) -> usize {
79        self.char_stream.lineno
80    }
81
82    #[inline]
83    fn current_kind(&self) -> TokenKind {
84        TokenKind::Command
85    }
86
87    #[inline]
88    fn last_char(&self) -> Option<(usize, char)> {
89        self.char_stream.last_char
90    }
91
92    #[inline]
93    fn next_char(&mut self) -> Option<(usize, char)> {
94        self.char_stream.next()
95    }
96
97    #[inline]
98    fn current_char(&mut self) -> Option<(usize, char)> {
99        if let Some(c) = self.last_char() {
100            Some(c)
101        } else {
102            self.next_char()
103        }
104    }
105
106    #[inline]
107    fn slice(&self, start: usize, end: usize) -> &'source str {
108        &self.char_stream.source[start..end]
109    }
110
111    #[inline]
112    fn current_slice(&self) -> &'source str {
113        let end = match self.last_char() {
114            None => self.char_stream.source.len(),
115            Some((i, _)) => i,
116        };
117        self.slice(self.start, end)
118    }
119
120    #[inline]
121    fn current_token(&self) -> Token<'source> {
122        Token::new(self.current_slice(), self.current_token_kind.clone())
123    }
124}
125
126impl<'source> From<CharStream<'source>> for TokenStream<'source> {
127    fn from(char_stream: CharStream<'source>) -> TokenStream<'source> {
128        TokenStream::new(char_stream)
129    }
130}
131
132impl<'source> Iterator for TokenStream<'source> {
133    type Item = Token<'source>;
134
135    fn next(&mut self) -> Option<Self::Item> {
136        match self.current_char() {
137            None => None,
138            Some((i, c)) => {
139                self.start = i; // Start index for current Token
140                match c {
141                    '\n' => {
142                        // A linebreak is ended by anything that is not as space, a tabulate or a carriage return
143                        loop {
144                            match self.next_char() {
145                                Some((_, c)) if c == ' ' || c == '\r' || c == '\t' => continue,
146                                _ => break,
147                            }
148                        }
149                        self.current_token_kind = TokenKind::Linebreak;
150                        Some(self.current_token())
151                    }
152                    '%' => {
153                        // A comment is ended by a linebreak
154                        loop {
155                            match self.next_char() {
156                                Some((_, c)) if c == '\n' => break,
157                                None => break,
158                                _ => continue,
159                            }
160                        }
161                        self.current_token_kind = TokenKind::Comment;
162                        Some(self.current_token())
163                    }
164                    '\\' => {
165                        // A command is quite complicated...
166                        self.current_token_kind = TokenKind::Command;
167
168                        match self.next_char() {
169                            None => {
170                                self.current_token_kind = TokenKind::Error;
171                                Some(self.current_token())
172                            }
173                            Some((_, c)) => match c {
174                                'a'..='z' | 'A'..='Z' => {
175                                    // First we read the command name
176                                    loop {
177                                        match self.next_char() {
178                                            None => return Some(self.current_token()), // It was last character
179                                            Some((_, c)) => match c {
180                                                'a'..='z' | 'A'..='Z' => continue,
181                                                '{' | '[' => break,
182                                                _ => return Some(self.current_token()), // Anything else after the name ends the command
183                                            },
184                                        }
185                                    }
186
187                                    // Then we look for optional or mandatory arguments
188                                    loop {
189                                        let brac = self.last_char().unwrap().1;
190                                        match brac {
191                                            '{' | '[' => {
192                                                let mut level = 1; // Used to check if we have nested brackets // braces
193                                                loop {
194                                                    // [ + 2 = ], { + 2 = } in ascii
195                                                    let c_brac = ((brac as u8) + 2) as char;
196                                                    // So `c_brac` closes `brac`
197
198                                                    match self.next_char() {
199                                                        None => break,
200                                                        Some((_, c)) => {
201                                                            if c == brac {
202                                                                level += 1;
203                                                            } else if c == c_brac {
204                                                                level -= 1;
205                                                                if level == 0 {
206                                                                    break;
207                                                                }
208                                                            } else if c == '\\' {
209                                                                // In this case, we need to skip
210                                                                // '\{' or '\[ or ...
211                                                                if self.next_char().is_none() {
212                                                                    break;
213                                                                }
214                                                            }
215                                                        }
216                                                    }
217                                                }
218
219                                                if level != 0 {
220                                                    self.current_token_kind = TokenKind::Error;
221                                                    return Some(self.current_token());
222                                                }
223
224                                                if self.next_char().is_none() {
225                                                    break;
226                                                }
227                                            }
228                                            _ => break,
229                                        }
230                                    }
231                                    Some(self.current_token())
232                                }
233                                _ => {
234                                    // '\' is just used tp escape character
235                                    self.next_char();
236                                    self.next_char();
237                                    Some(self.current_token())
238                                }
239                            },
240                        }
241                    }
242                    '$' => {
243                        // A math escaped env is either surrounded by one or two dollar signs
244                        self.current_token_kind = TokenKind::Math;
245
246                        match self.next_char() {
247                            None => {
248                                self.current_token_kind = TokenKind::Error;
249                                return Some(self.current_token());
250                            }
251                            Some((_, c)) => {
252                                // Lookin for next dollar sign
253                                loop {
254                                    match self.next_char() {
255                                        Some((_, ch)) if ch == '$' => {
256                                            self.next_char();
257                                            break;
258                                        }
259                                        None => {
260                                            self.current_token_kind = TokenKind::Error;
261                                            return Some(self.current_token());
262                                        }
263                                        _ => continue,
264                                    }
265                                }
266
267                                // Need double dollars
268                                if c == '$' {
269                                    match self.current_char() {
270                                        Some((_, ch)) if ch == '$' => {
271                                            self.next_char();
272                                        }
273                                        _ => {
274                                            self.current_token_kind = TokenKind::Error;
275                                            return Some(self.current_token());
276                                        }
277                                    }
278                                }
279                            }
280                        }
281                        Some(self.current_token())
282                    }
283                    _ => {
284                        // A text is ended by any other starting token (Comment, ...)
285                        loop {
286                            match self.next_char() {
287                                None => break,
288                                Some((_, c)) if c == '\n' || c == '%' || c == '\\' || c == '$' => {
289                                    break
290                                }
291                                _ => continue,
292                            }
293                        }
294                        self.current_token_kind = TokenKind::Text;
295                        Some(self.current_token())
296                    }
297                }
298            }
299        }
300    }
301}