untex/token.rs
1#![warn(missing_docs)]
2
3use crate::CharStream;
4use ansi_term::{Colour, Style};
5use lazy_static::lazy_static;
6use std::fmt;
7
8/// Enumerates all the possible atoms that can be found in a TeX file.
9#[derive(Debug, PartialEq, Clone)]
10pub enum TokenKind {
11 /// A commended part
12 Comment,
13 /// A linebreak, optionally followed by any number of tabulates or spaces
14 Linebreak,
15 /// Anything that could be a command (please use a space after a command to properly end it)
16 Command,
17 /// Math escaped, either with simple $ $ or double $$ $$ dollar signs
18 Math,
19 /// Anything else, that is assume to be printed out when the TeX file is compiled into PDF
20 Text,
21 /// An error occured when parsing the TeX file
22 Error, // Syntax error
23}
24
25lazy_static! {
26 pub static ref text_style: Style = Style::new();
27 pub static ref linebreak_style: Style = Style::new().on(Colour::Red);
28 pub static ref command_style: Colour = Colour::Blue;
29 pub static ref comment_style: Colour = Colour::Green;
30 pub static ref error_style: Style = Colour::Red.bold();
31 pub static ref math_style: Style = Colour::Green.bold();
32}
33
34/// A Token is ... TODO
35#[derive(PartialEq, Clone, Debug)]
36pub struct Token<'source> {
37 pub slice: &'source str,
38 pub kind: TokenKind,
39}
40
41impl<'source> Token<'source> {
42 pub fn new(slice: &'source str, kind: TokenKind) -> Self {
43 Self { slice, kind }
44 }
45}
46
47impl<'source> fmt::Display for Token<'source> {
48 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49 match self.kind {
50 TokenKind::Comment => write!(f, "{}", comment_style.paint(self.slice)),
51 TokenKind::Linebreak => write!(f, "{}", linebreak_style.paint(self.slice)),
52 TokenKind::Command => write!(f, "{}", command_style.paint(self.slice)),
53 TokenKind::Math => write!(f, "{}", math_style.paint(self.slice)),
54 TokenKind::Text => write!(f, "{}", text_style.paint(self.slice)),
55 TokenKind::Error => write!(f, "{}", error_style.paint(self.slice)),
56 }
57 }
58}
59
60/// TODO
61#[derive(Debug)]
62pub struct TokenStream<'source> {
63 char_stream: CharStream<'source>,
64 start: usize,
65 current_token_kind: TokenKind,
66}
67
68impl<'source> TokenStream<'source> {
69 pub fn new(char_stream: CharStream<'source>) -> Self {
70 Self {
71 char_stream,
72 start: 0,
73 current_token_kind: TokenKind::Error,
74 }
75 }
76
77 #[inline]
78 fn lineno(&self) -> usize {
79 self.char_stream.lineno
80 }
81
82 #[inline]
83 fn current_kind(&self) -> TokenKind {
84 TokenKind::Command
85 }
86
87 #[inline]
88 fn last_char(&self) -> Option<(usize, char)> {
89 self.char_stream.last_char
90 }
91
92 #[inline]
93 fn next_char(&mut self) -> Option<(usize, char)> {
94 self.char_stream.next()
95 }
96
97 #[inline]
98 fn current_char(&mut self) -> Option<(usize, char)> {
99 if let Some(c) = self.last_char() {
100 Some(c)
101 } else {
102 self.next_char()
103 }
104 }
105
106 #[inline]
107 fn slice(&self, start: usize, end: usize) -> &'source str {
108 &self.char_stream.source[start..end]
109 }
110
111 #[inline]
112 fn current_slice(&self) -> &'source str {
113 let end = match self.last_char() {
114 None => self.char_stream.source.len(),
115 Some((i, _)) => i,
116 };
117 self.slice(self.start, end)
118 }
119
120 #[inline]
121 fn current_token(&self) -> Token<'source> {
122 Token::new(self.current_slice(), self.current_token_kind.clone())
123 }
124}
125
126impl<'source> From<CharStream<'source>> for TokenStream<'source> {
127 fn from(char_stream: CharStream<'source>) -> TokenStream<'source> {
128 TokenStream::new(char_stream)
129 }
130}
131
132impl<'source> Iterator for TokenStream<'source> {
133 type Item = Token<'source>;
134
135 fn next(&mut self) -> Option<Self::Item> {
136 match self.current_char() {
137 None => None,
138 Some((i, c)) => {
139 self.start = i; // Start index for current Token
140 match c {
141 '\n' => {
142 // A linebreak is ended by anything that is not as space, a tabulate or a carriage return
143 loop {
144 match self.next_char() {
145 Some((_, c)) if c == ' ' || c == '\r' || c == '\t' => continue,
146 _ => break,
147 }
148 }
149 self.current_token_kind = TokenKind::Linebreak;
150 Some(self.current_token())
151 }
152 '%' => {
153 // A comment is ended by a linebreak
154 loop {
155 match self.next_char() {
156 Some((_, c)) if c == '\n' => break,
157 None => break,
158 _ => continue,
159 }
160 }
161 self.current_token_kind = TokenKind::Comment;
162 Some(self.current_token())
163 }
164 '\\' => {
165 // A command is quite complicated...
166 self.current_token_kind = TokenKind::Command;
167
168 match self.next_char() {
169 None => {
170 self.current_token_kind = TokenKind::Error;
171 Some(self.current_token())
172 }
173 Some((_, c)) => match c {
174 'a'..='z' | 'A'..='Z' => {
175 // First we read the command name
176 loop {
177 match self.next_char() {
178 None => return Some(self.current_token()), // It was last character
179 Some((_, c)) => match c {
180 'a'..='z' | 'A'..='Z' => continue,
181 '{' | '[' => break,
182 _ => return Some(self.current_token()), // Anything else after the name ends the command
183 },
184 }
185 }
186
187 // Then we look for optional or mandatory arguments
188 loop {
189 let brac = self.last_char().unwrap().1;
190 match brac {
191 '{' | '[' => {
192 let mut level = 1; // Used to check if we have nested brackets // braces
193 loop {
194 // [ + 2 = ], { + 2 = } in ascii
195 let c_brac = ((brac as u8) + 2) as char;
196 // So `c_brac` closes `brac`
197
198 match self.next_char() {
199 None => break,
200 Some((_, c)) => {
201 if c == brac {
202 level += 1;
203 } else if c == c_brac {
204 level -= 1;
205 if level == 0 {
206 break;
207 }
208 } else if c == '\\' {
209 // In this case, we need to skip
210 // '\{' or '\[ or ...
211 if self.next_char().is_none() {
212 break;
213 }
214 }
215 }
216 }
217 }
218
219 if level != 0 {
220 self.current_token_kind = TokenKind::Error;
221 return Some(self.current_token());
222 }
223
224 if self.next_char().is_none() {
225 break;
226 }
227 }
228 _ => break,
229 }
230 }
231 Some(self.current_token())
232 }
233 _ => {
234 // '\' is just used tp escape character
235 self.next_char();
236 self.next_char();
237 Some(self.current_token())
238 }
239 },
240 }
241 }
242 '$' => {
243 // A math escaped env is either surrounded by one or two dollar signs
244 self.current_token_kind = TokenKind::Math;
245
246 match self.next_char() {
247 None => {
248 self.current_token_kind = TokenKind::Error;
249 return Some(self.current_token());
250 }
251 Some((_, c)) => {
252 // Lookin for next dollar sign
253 loop {
254 match self.next_char() {
255 Some((_, ch)) if ch == '$' => {
256 self.next_char();
257 break;
258 }
259 None => {
260 self.current_token_kind = TokenKind::Error;
261 return Some(self.current_token());
262 }
263 _ => continue,
264 }
265 }
266
267 // Need double dollars
268 if c == '$' {
269 match self.current_char() {
270 Some((_, ch)) if ch == '$' => {
271 self.next_char();
272 }
273 _ => {
274 self.current_token_kind = TokenKind::Error;
275 return Some(self.current_token());
276 }
277 }
278 }
279 }
280 }
281 Some(self.current_token())
282 }
283 _ => {
284 // A text is ended by any other starting token (Comment, ...)
285 loop {
286 match self.next_char() {
287 None => break,
288 Some((_, c)) if c == '\n' || c == '%' || c == '\\' || c == '$' => {
289 break
290 }
291 _ => continue,
292 }
293 }
294 self.current_token_kind = TokenKind::Text;
295 Some(self.current_token())
296 }
297 }
298 }
299 }
300 }
301}