1#[derive(Clone, Debug)]
2pub(crate) enum TokenType {
3 Put,
4 Bold,
5 Strong,
6 Italic,
7 Emphasis,
8 LinkName,
9 LinkDir,
10 Attr,
11 Sub,
12 Sup,
13 Span,
14 Code,
15 Strike,
16 Under,
17 Header,
18 Html,
19 ListEl,
20 NumberedListEl,
21 LineBreak,
22 Para,
23 UList,
24 OList,
25 ListBlock,
26 Image,
27 Raw,
28 TableRow,
29 TableCell,
30 TableHeader,
31 Table,
32}
33
34#[derive(Clone, Debug)]
35pub(crate) struct Token {
36 pub(crate) class: TokenType,
37 pub(crate) content: String,
38 pub(crate) subtokens: Vec<Token>,
39 pub(crate) attributes: String,
40}
41
42impl Token {
43 pub(crate) fn new() -> Self {
44 Self { class: TokenType::Put, content: String::new(), subtokens: Vec::new(), attributes: String::new() }
45 }
46 pub(crate) fn n_para() -> Self {
47 Self { class: TokenType::Para, content: String::new(), subtokens: Vec::new(), attributes: String::new() }
48 }
49 pub(crate) fn init(class: TokenType, content: String) -> Self {
50 Self { class: class, content: content, subtokens: Vec::new(), attributes: String::new() }
51 }
52 pub(crate) fn init_sub(class: TokenType, tcontent: Vec<Self>, content: String) -> Self {
53 Self { class: class, content: content, subtokens: tcontent, attributes: String::new() }
54 }
55 pub(crate) fn tokenize_content(self: &mut Self, borders: usize) {
56 self.subtokens = tokenize(&self.content[borders..self.content.len()-borders]).0;
57 }
58 pub(crate) fn tokenize_unclosed(self: &mut Self, borders: usize) {
59 self.subtokens = tokenize(&self.content[borders..self.content.len()]).0;
60 }
61}
62
63pub(crate) fn tokenize(input: &str) -> (Vec<Token>, String) {
64 let mut tokens:Vec<Token> = vec![];
65 let mut current_token: Token = Token::new();
66 let mut escaping = false;
67
68 let mut warnings = String::new();
69
70 let mut nlist_wait_space = false;
71
72 let mut strong_wait = false; for (pos, cha) in input.char_indices() {
74 if cha == '\\'{
75 if escaping {
76 escaping = false;
77 current_token.content += &cha.to_string();
78 } else {
79 escaping = true;
80 match current_token.class {
81 TokenType::Bold | TokenType::Italic | TokenType::Strong | TokenType::Emphasis | TokenType::LinkName | TokenType::Sub | TokenType::Sup | TokenType::Code | TokenType::Span | TokenType::Under | TokenType::Strike => current_token.content += &cha.to_string(),
82 _ => (),
83 }
84 }
85 } else {
86 match current_token.class {
87 TokenType::Put => {
88 match cha {
89 '*' => {
90 if !escaping {
91 push_token(&mut tokens, ¤t_token);
92 current_token = Token::init(TokenType::Bold, cha.to_string());
93 } else { current_token.content += &cha.to_string(); }
94 },
95 '_' => {
96 if !escaping {
97 push_token(&mut tokens, ¤t_token);
98 current_token = Token::init(TokenType::Italic, cha.to_string());
99 } else { current_token.content += &cha.to_string(); }
100 },
101 '=' => {
102 if escaping {
103 push_token(&mut tokens, ¤t_token);
104 current_token = Token::init(TokenType::Raw, String::new());
105 } else { current_token.content += &cha.to_string(); }
106 },
107 '[' => {
108 if !escaping {
109 push_token(&mut tokens, ¤t_token);
110 current_token = Token::init(TokenType::LinkName, cha.to_string());
111 } else { current_token.content += &cha.to_string(); }
112 },
113 '~' => {
114 if !escaping {
115 push_token(&mut tokens, ¤t_token);
116 current_token = Token::init(TokenType::Sub, cha.to_string());
117 } else { current_token.content += &cha.to_string(); }
118 },
119 '^' => {
120 if !escaping {
121 push_token(&mut tokens, ¤t_token);
122 current_token = Token::init(TokenType::Sup, cha.to_string());
123 } else { current_token.content += &cha.to_string(); }
124 },
125 '!' => {
126 if !escaping {
127 push_token(&mut tokens, ¤t_token);
128 current_token = Token::init(TokenType::Image, cha.to_string());
129 } else { current_token.content += &cha.to_string(); }
130 },
131 '`' => {
132 if !escaping {
133 push_token(&mut tokens, ¤t_token);
134 current_token = Token::init(TokenType::Code, cha.to_string());
135 } else { current_token.content += &cha.to_string(); }
136 },
137 '@' => {
138 if !escaping {
139 push_token(&mut tokens, ¤t_token);
140 current_token = Token::init(TokenType::Span, cha.to_string());
141 } else { current_token.content += &cha.to_string(); }
142 },
143 '-' => {
144 if !escaping {
145 push_token(&mut tokens, ¤t_token);
146 current_token = Token::init(TokenType::Under, cha.to_string());
147 } else { current_token.content += &cha.to_string(); }
148 },
149 '#' => {
150 if pos == 0 { current_token = Token::init(TokenType::Header, cha.to_string()); }
151 else { current_token.content += &cha.to_string(); }
152 },
153 '<' => {
154 if !escaping {
155 push_token(&mut tokens, ¤t_token);
156 current_token = Token::init(TokenType::Html, cha.to_string());
157 } else { current_token.content += &cha.to_string(); }
158 },
159 '(' => {
160 if !escaping {
161 match tokens.last() {
162 None => current_token.content += &cha.to_string(),
163 Some(last_token) => {
164 match last_token.class {
165 TokenType::LinkName => current_token = Token::init(TokenType::LinkDir, cha.to_string()),
166 _ => current_token.content += &cha.to_string(),
167 }
168 }
169 }
170 } else { current_token.content += &cha.to_string(); }
171 },
172 '{' => {
173 if !escaping {
174 push_token(&mut tokens, ¤t_token);
175 match tokens.last() {
176 None => current_token = Token::init(TokenType::Attr, cha.to_string()),
177 Some(last_token) => {
178 match last_token.class {
179 TokenType::Put => {
180 tokens.pop();
181 current_token.content += &cha.to_string();
182 },
183 _ => current_token = Token::init(TokenType::Attr, cha.to_string()),
184 }
185 }
186 }
187 } else { current_token.content += &cha.to_string(); }
188 },
189 '|' => {
190 if !escaping {
191 if tokens.is_empty() && current_token.content.is_empty() {
192 push_token(&mut tokens, ¤t_token);
193 current_token = Token::init(TokenType::TableRow, String::new());
194 } else { current_token.content += &cha.to_string() }
195 } else { current_token.content += &cha.to_string() }
196 }
197 'n' => {
198 if escaping {
199 push_token(&mut tokens, ¤t_token);
200 current_token = Token::init(TokenType::LineBreak, String::from("BR"));
201 push_token(&mut tokens, ¤t_token);
202 current_token = Token::new();
203 } else {
204 current_token.content += &cha.to_string();
205 }
206 },
207 _ => current_token.content += &cha.to_string(),
208 }
209 },
210 TokenType::TableRow => current_token.content += &cha.to_string(),
211 TokenType::Raw => {
212 match cha {
213 '=' => {
214 if !escaping {
215 push_token(&mut tokens, ¤t_token);
216 current_token = Token::new();
217 }
218 },
219 _ => current_token.content += &cha.to_string()
220 }
221 }
222 TokenType::Bold => {
223 current_token.content += &cha.to_string();
224 match cha {
225 '*' => {
226 if current_token.content == "**" && !escaping { current_token.class = TokenType::Strong; }
227 else if !escaping {
228 current_token.tokenize_content(1);
229 push_token(&mut tokens, ¤t_token);
230 current_token = Token::new();
231 }
232 },
233 ' ' => {
234 if current_token.content == "* " && !escaping {
235 if pos != 1 { current_token.class = TokenType::Put; }
236 else {
237 current_token.class = TokenType::ListEl;
238 push_token(&mut tokens, ¤t_token);
239 current_token = Token::new();
240 }
241 }
242 },
243 _ => (),
244 }
245 },
246 TokenType::Strong => {
247 current_token.content += &cha.to_string();
248 match cha {
249 '*' => {
250 if !(pos == 2 && current_token.content == "***") {
251 if !escaping && !strong_wait { strong_wait = true; }
252 else if !escaping && strong_wait {
253 current_token.tokenize_content(2);
254 push_token(&mut tokens, ¤t_token);
255 current_token = Token::new();
256 strong_wait = false;
257 } else { strong_wait = false; }
258 } else {
259 current_token.class = TokenType::ListEl;
260 }
261 },
262 ' ' => {
263 if current_token.content == "** " && !escaping {
264 if pos != 2 { current_token.class = TokenType::Put; }
265 else {
266 current_token.class = TokenType::ListEl;
267 push_token(&mut tokens, ¤t_token);
268 current_token = Token::new();
269 }
270 }
271 },
272 _ => (),
273 }
274 },
275 TokenType::ListEl => {
276 current_token.content += &cha.to_string();
277 match cha {
278 '*' => (),
279 ' ' => {
280 push_token(&mut tokens, ¤t_token);
281 current_token = Token::new();
282 },
283 _ => current_token.class = TokenType::Put,
284 }
285 }
286 TokenType::Italic => {
287 current_token.content += &cha.to_string();
288 match cha {
289 '_' => {
290 if current_token.content == "__" && !escaping { current_token.class = TokenType::Emphasis; }
291 else if !escaping {
292 current_token.tokenize_content(1);
293 push_token(&mut tokens, ¤t_token);
294 current_token = Token::new();
295 }
296 },
297 ' ' => if current_token.content == "_ " && !escaping { current_token.class = TokenType::Put },
298 _ => (),
299 }
300 },
301 TokenType::Emphasis => {
302 current_token.content += &cha.to_string();
303 match cha {
304 '_' => {
305 if !escaping && !strong_wait { strong_wait = true; }
306 else if !escaping && strong_wait {
307 current_token.tokenize_content(2);
308 push_token(&mut tokens, ¤t_token);
309 current_token = Token::new();
310 strong_wait = false;
311 } else { strong_wait = false; }
312 },
313 ' ' => if current_token.content == "__ " && !escaping { current_token.class = TokenType::Put },
314 _ => (),
315 }
316 },
317 TokenType::Sub => {
318 current_token.content += &cha.to_string();
319 match cha {
320 '~' => {
321 if !escaping {
322 if current_token.content == "~~" {
323 current_token.class = TokenType::Strike;
324 } else {
325 current_token.tokenize_content(1);
326 push_token(&mut tokens, ¤t_token);
327 current_token = Token::new();
328 }
329 }
330 },
331 ' ' => if current_token.content == "~ " && !escaping { current_token.class = TokenType::Put },
332 _ => (),
333 }
334 },
335 TokenType::Image => {
336 current_token.content += &cha.to_string();
337 match cha {
338 '!' => {
339 if !escaping {
340 if current_token.content == "!!" {
341 current_token.class = TokenType::Put;
342 } else {
343 push_token(&mut tokens, ¤t_token);
344 current_token = Token::new();
345 }
346 }
347 },
348 ' ' => {
349 if !escaping {
350 if current_token.content == "! " {
351 current_token.class = TokenType::Put;
352 }
353 }
354 },
355 _ => (),
356 }
357 },
358 TokenType::Sup => {
359 current_token.content += &cha.to_string();
360 match cha {
361 '^' => {
362 if !escaping {
363 current_token.tokenize_content(1);
364 push_token(&mut tokens, ¤t_token);
365 current_token = Token::new();
366 }
367 },
368 ' ' => if current_token.content == "^ " && !escaping { current_token.class = TokenType::Put },
369 _ => (),
370 }
371 },
372 TokenType::Span => {
373 current_token.content += &cha.to_string();
374 match cha {
375 '@' => {
376 if !escaping {
377 current_token.tokenize_content(1);
378 push_token(&mut tokens, ¤t_token);
379 current_token = Token::new();
380 }
381 },
382 ' ' => if current_token.content == "@ " && !escaping { current_token.class = TokenType::Put },
383 _ => (),
384 }
385 },
386 TokenType::Code => {
387 current_token.content += &cha.to_string();
388 match cha {
389 '`' => {
390 if !escaping {
391 current_token.tokenize_content(1);
392 push_token(&mut tokens, ¤t_token);
393 current_token = Token::new();
394 }
395 },
396 _ => (),
397 }
398 },
399 TokenType::Strike => {
400 current_token.content += &cha.to_string();
401 match cha {
402 '~' => {
403 if !escaping && !strong_wait { strong_wait = true; }
404 else if !escaping && strong_wait {
405 current_token.tokenize_content(2);
406 push_token(&mut tokens, ¤t_token);
407 current_token = Token::new();
408 strong_wait = false;
409 } else { strong_wait = false; }
410 },
411 ' ' => if current_token.content == "~~ " && !escaping { current_token.class = TokenType::Put },
412 _ => (),
413 }
414 },
415 TokenType::Under => {
416 if current_token.content == "-" {
417 current_token.content += &cha.to_string();
418 match cha {
419 '-' => (),
420 _ => current_token.class = TokenType::Put,
421 }
422 }
423 else {
424 current_token.content += &cha.to_string();
425 match cha {
426 '-' => {
427 if !escaping && !strong_wait { strong_wait = true; }
428 else if !escaping && strong_wait {
429 current_token.tokenize_content(2);
430 push_token(&mut tokens, ¤t_token);
431 current_token = Token::new();
432 strong_wait = false;
433 } else { strong_wait = false; }
434 },
435 ' ' => if current_token.content == "-- " && !escaping { current_token.class = TokenType::Put },
436 _ => (),
437 }
438 }
439 },
440 TokenType::Html => {
441 current_token.content += &cha.to_string();
442 match cha {
443 '>' => {
444 if !escaping {
445 current_token.tokenize_content(1);
446 push_token(&mut tokens, ¤t_token);
447 current_token = Token::new();
448 }
449 },
450 ' ' => if current_token.content == "< " && !escaping { current_token.class = TokenType::Put },
451 _ => (),
452 }
453 },
454 TokenType::LinkName => {
455 current_token.content += &cha.to_string();
456 match cha {
457 ']' => {
458 if !escaping {
459 current_token.tokenize_content(1);
460 push_token(&mut tokens, ¤t_token);
461 current_token = Token::new();
462 }
463 },
464 _ => (),
465 }
466 },
467 TokenType::LinkDir => {
468 current_token.content += &cha.to_string();
469 match cha {
470 ')' => {
471 if !escaping {
472 push_token(&mut tokens, ¤t_token);
473 current_token = Token::new();
474 }
475 },
476 _ => (),
477 }
478 },
479 TokenType::Attr => {
480 current_token.content += &cha.to_string();
481 match cha {
482 '}' => {
483 if !escaping {
484 match tokens.last_mut() {
485 None => {
486 push_token(&mut tokens, ¤t_token);
487 },
488 Some(last_token) => {
489 last_token.attributes = current_token.content.clone();
490 },
491 }
492 current_token = Token::new();
493 }
494 },
495 _ => (),
496 }
497 },
498 TokenType::Header => {
499 match cha {
500 '#' => {
501 nlist_wait_space = false;
502 current_token.content += &cha.to_string();
503 },
504 '{' => {
505 nlist_wait_space = false;
506 push_token(&mut tokens, ¤t_token);
507 current_token = Token::init(TokenType::Attr, cha.to_string());
508 },
509 ' ' => {
510 if !nlist_wait_space {
511 push_token(&mut tokens, ¤t_token);
512 current_token = Token::new();
513 } else {
514 current_token.content += " ";
515 current_token.class = TokenType::NumberedListEl;
516 push_token(&mut tokens, ¤t_token);
517 current_token = Token::new();
518 }
519 nlist_wait_space = false;
520 },
521 '.' => {
522 if !nlist_wait_space { nlist_wait_space = true; }
523 else {
524 push_token(&mut tokens, ¤t_token);
525 current_token = Token::new();
526 current_token.content += &cha.to_string();
527 nlist_wait_space = false;
528 }
529 }
530 _ => {
531 nlist_wait_space = false;
532 push_token(&mut tokens, ¤t_token);
533 current_token = Token::new();
534 current_token.content += &cha.to_string();
535 }
536 }
537 },
538 _ => panic!("Reached undefined token type {:?}", current_token.class),
539 }
540 }
541 if escaping && cha != '\\' { escaping = false; }
542 }
543 if !current_token.content.is_empty() {
544 match current_token.class {
545 TokenType::Put | TokenType::TableRow => (),
546 _ => warnings += &format!("WARNING: Unclosed {:?} token at {}\n", current_token.class, current_token.content),
547 }
548 match current_token.class {
549 TokenType::Bold | TokenType::Italic | TokenType::Sub | TokenType::Sup | TokenType::LinkName | TokenType::LinkDir | TokenType::Attr | TokenType::Image | TokenType::Html | TokenType::Code | TokenType::Span => {
550 push_token(&mut tokens, &Token::init(TokenType::Put, current_token.content[0..1].to_string()));
551 current_token.tokenize_unclosed(1);
552 tokens.append(&mut current_token.subtokens);
553 },
554 TokenType::Strong | TokenType::Emphasis | TokenType::Strike => {
555 push_token(&mut tokens, &Token::init(TokenType::Put, current_token.content[0..2].to_string()));
556 current_token.tokenize_unclosed(2);
557 tokens.append(&mut current_token.subtokens);
558 },
559 TokenType::Under => {
560 if current_token.content == "-" {
561 push_token(&mut tokens, &Token::init(TokenType::Put, current_token.content));
562 tokens.append(&mut current_token.subtokens);
563 } else {
564 push_token(&mut tokens, &Token::init(TokenType::Put, current_token.content[0..2].to_string()));
565 current_token.tokenize_unclosed(2);
566 tokens.append(&mut current_token.subtokens);
567 }
568 },
569 TokenType::Put | TokenType::TableRow => push_token(&mut tokens, ¤t_token),
570 _ => {
571 push_token(&mut tokens, ¤t_token);
572 warnings += "The unclosing of the last token was impossible to handle for Kami, so the raw text has been outputted. Please contact the project maintainer about this.\n";
573 }
574 }
575 }
576 (tokens, warnings)
577}
578
579pub(crate) fn push_token(list: &mut Vec<Token>, token: &Token) {
580 if token.content != "" || !token.subtokens.is_empty() { list.push(token.clone()); }
581}
582
583