mdxt/inline/math/
parse.rs

1use super::entity::{Entity, matrix::try_parse_matrix, parse_raw_data};
2use super::validate::is_valid;
3use super::{ZERO_ARG_FUNCTIONS, ONE_ARG_FUNCTIONS, TWO_ARG_FUNCTIONS, THREE_ARG_FUNCTIONS, FIVE_ARG_FUNCTIONS};
4use crate::utils::{get_curly_brace_end_index, into_v32, is_alphabet, remove_whitespaces};
5
6pub fn md_to_math(content: &[u32]) -> Vec<Entity> {
7
8    let mut last_index = 0;
9    let mut curr_index = 0;
10    let mut is_reading_alphabets = false;
11    let mut result = vec![];
12
13    while curr_index < content.len() {
14
15        if is_alphabet(&content[curr_index]) && !is_reading_alphabets {
16
17            if last_index < curr_index {
18                let string = remove_whitespaces(&content[last_index..curr_index]);
19
20                if !string.is_empty() {
21
22                    for entity in parse_raw_data(&string) {
23                        result.push(entity);
24                    }
25
26                }
27
28            }
29
30            last_index = curr_index;
31            is_reading_alphabets = true;
32        }
33
34        else if !is_alphabet(&content[curr_index]) && is_reading_alphabets {
35            let curr_word = &content[last_index..curr_index];
36            let (arguments, end_index) = get_arguments(content, curr_index);
37
38            if is_valid(curr_word, &arguments) {
39                result.push(parse(curr_word, &arguments));
40                curr_index = end_index;
41                last_index = end_index + 1;
42            }
43
44            is_reading_alphabets = false;
45        }
46
47        curr_index += 1;
48    }
49
50    if last_index < curr_index {
51        curr_index = curr_index.min(content.len());
52
53        if is_reading_alphabets {
54            let curr_word = &content[last_index..curr_index];
55            let (arguments, _) = get_arguments(content, curr_index);
56
57            if is_valid(curr_word, &arguments) {
58                result.push(parse(curr_word, &arguments));
59            }
60
61            else {
62                let string = remove_whitespaces(&content[last_index..curr_index]);
63
64                if !string.is_empty() {
65
66                    for entity in parse_raw_data(&string) {
67                        result.push(entity);
68                    }
69
70                }
71
72            }
73
74        }
75
76        else {
77            let string = remove_whitespaces(&content[last_index..curr_index]);
78
79            if !string.is_empty() {
80
81                for entity in parse_raw_data(&string) {
82                    result.push(entity);
83                }
84
85            }
86
87        }
88
89    }
90
91    result
92}
93
94pub fn parse(word: &[u32], arguments: &Vec<Vec<u32>>) -> Entity {
95
96    if is_space(word) {
97        Entity::Space(word.len() - 4)
98    }
99
100    // "mat"
101    else if word == &[109, 97, 116] {
102        match try_parse_matrix(arguments) {
103            Ok(elements) => Entity::new_matrix(elements),
104            Err(e) => Entity::RawString(into_v32(&format!("Error: {e}"))),
105        }
106    }
107
108    else if ZERO_ARG_FUNCTIONS.contains(word) && arguments.is_empty() {
109
110        if *word == into_v32("br") {
111            Entity::new_br()
112        }
113
114        // This branch is generated by a python code. see `parse.py`
115        else if word[0] == 'A' as u32 { Entity::new_character(913) }
116
117        else if word[0] == 'B' as u32 { Entity::new_character(914) }
118
119        else if word[0] == 'C' as u32 { Entity::new_character(935) }
120
121        else if word[0] == 'D' as u32 { Entity::new_character(916) }
122
123        else if word[0] == 'E' as u32 {
124
125            if word[1] == 'p' as u32 { Entity::new_character(917) }
126
127            else { Entity::new_character(919) }
128
129        }
130
131        else if word[0] == 'G' as u32 { Entity::new_character(915) }
132
133        else if word[0] == 'I' as u32 { Entity::new_character(921) }
134
135        else if word[0] == 'K' as u32 { Entity::new_character(922) }
136
137        else if word[0] == 'L' as u32 { Entity::new_character(923) }
138
139        else if word[0] == 'M' as u32 { Entity::new_character(924) }
140
141        else if word[0] == 'N' as u32 { Entity::new_character(925) }
142
143        else if word[0] == 'O' as u32 {
144
145            if word[2] == 'e' as u32 { Entity::new_character(937) }
146
147            else { Entity::new_character(927) }
148
149        }
150
151        else if word[0] == 'P' as u32 {
152
153            if word[1] == 'h' as u32 { Entity::new_character(934) }
154
155            else if word[1] == 'i' as u32 { Entity::new_character(928) }
156
157            else { Entity::new_character(936) }
158
159        }
160
161        else if word[0] == 'R' as u32 { Entity::new_character(929) }
162
163        else if word[0] == 'S' as u32 { Entity::new_character(931) }
164
165        else if word[0] == 'T' as u32 {
166
167            if word[1] == 'a' as u32 { Entity::new_character(932) }
168
169            else { Entity::new_character(920) }
170
171        }
172
173        else if word[0] == 'U' as u32 { Entity::new_character(933) }
174
175        else if word[0] == 'X' as u32 { Entity::new_character(926) }
176
177        else if word[0] == 'Z' as u32 { Entity::new_character(918) }
178
179        else if word[0] == 'a' as u32 {
180
181            if word[1] == 'l' as u32 { Entity::new_character(945) }
182
183            else if word[1] == 'n' as u32 { Entity::new_character(8743) }
184
185            else { Entity::new_character(8776) }
186
187        }
188
189        else if word[0] == 'b' as u32 {
190
191            if word[1] == 'e' as u32 {
192
193                if word[2] == 'c' as u32 { Entity::new_character(8757) }
194
195                else { Entity::new_character(946) }
196
197            }
198
199            else { Entity::new_character(8729) }
200
201        }
202
203        else if word[0] == 'c' as u32 {
204
205            if word[1] == 'a' as u32 { Entity::new_character(8745) }
206
207            else if word[1] == 'h' as u32 { Entity::new_character(967) }
208
209            else if word[1] == 'i' as u32 { Entity::new_character(8728) }
210
211            else { Entity::new_character(8746) }
212
213        }
214
215        else if word[0] == 'd' as u32 {
216
217            if word[1] == 'e' as u32 { Entity::new_character(948) }
218
219            else {
220
221                if word[2] == 't' as u32 { Entity::new_character(8901) }
222
223                else { Entity::new_character(8595) }
224
225            }
226
227        }
228
229        else if word[0] == 'e' as u32 {
230
231            if word[1] == 'm' as u32 { Entity::new_character(8709) }
232
233            else if word[1] == 'p' as u32 { Entity::new_character(949) }
234
235            else if word[1] == 'q' as u32 { Entity::new_character(8801) }
236
237            else if word[1] == 't' as u32 { Entity::new_character(951) }
238
239            else { Entity::new_character(8707) }
240
241        }
242
243        else if word[0] == 'f' as u32 { Entity::new_character(8704) }
244
245        else if word[0] == 'g' as u32 {
246
247            if word[1] == 'a' as u32 { Entity::new_character(947) }
248
249            else if word[1] == 'e' as u32 {
250
251                if word.len() == 2 { Entity::new_character(8805) }
252
253                else { Entity::new_character(8805) }
254
255            }
256
257            else if word[1] == 'g' as u32 { Entity::new_character(8811) }
258
259            else { Entity::new_character(62) }
260
261        }
262
263        else if word[0] == 'i' as u32 {
264
265            if word[1] == 'n' as u32 {
266
267                if word.len() == 2 { Entity::new_character(8712) }
268
269                else {
270
271                    if word.len() == 3 { Entity::new_character(8734) }
272
273                    else if word[3] == 'i' as u32 { Entity::new_character(8734) }
274
275                    else { Entity::new_character(8734) }
276
277                }
278
279            }
280
281            else { Entity::new_character(953) }
282
283        }
284
285        else if word[0] == 'k' as u32 { Entity::new_character(954) }
286
287        else if word[0] == 'l' as u32 {
288
289            if word[1] == 'a' as u32 { Entity::new_character(955) }
290
291            else if word[1] == 'c' as u32 { Entity::new_character(123) }
292
293            else if word[1] == 'e' as u32 {
294
295                if word.len() == 2 { Entity::new_character(8804) }
296
297                else if word[2] == 'f' as u32 { Entity::new_character(8592) }
298
299                else { Entity::new_character(8804) }
300
301            }
302
303            else if word[1] == 'l' as u32 { Entity::new_character(8810) }
304
305            else { Entity::new_character(60) }
306
307        }
308
309        else if word[0] == 'm' as u32 {
310
311            if word[1] == 'p' as u32 { Entity::new_character(8723) }
312
313            else { Entity::new_character(956) }
314
315        }
316
317        else if word[0] == 'n' as u32 {
318
319            if word[1] == 'a' as u32 { Entity::new_character(8711) }
320
321            else if word[1] == 'e' as u32 {
322
323                if word.len() == 2 { Entity::new_character(8800) }
324
325                else {
326
327                    if word.len() == 3 { Entity::new_character(8800) }
328
329                    else { Entity::new_character(8802) }
330
331                }
332
333            }
334
335            else if word[1] == 'i' as u32 { Entity::new_character(8715) }
336
337            else if word[1] == 'o' as u32 {
338
339                if word[3] == 'i' as u32 { Entity::new_character(8713) }
340
341                else { Entity::new_character(8716) }
342
343            }
344
345            else if word[1] == 's' as u32 {
346
347                if word[3] == 'b' as u32 {
348
349                    if word.len() == 4 { Entity::new_character(8836) }
350
351                    else { Entity::new_character(8840) }
352
353                }
354
355                else {
356
357                    if word.len() == 4 { Entity::new_character(8837) }
358
359                    else { Entity::new_character(8841) }
360
361                }
362
363            }
364
365            else {
366
367                if word.len() == 2 { Entity::new_character(957) }
368
369                else { Entity::new_character(8709) }
370
371            }
372
373        }
374
375        else if word[0] == 'o' as u32 {
376
377            if word[1] == 'd' as u32 {
378
379                if word[2] == 'i' as u32 { Entity::new_character(8856) }
380
381                else { Entity::new_character(8857) }
382
383            }
384
385            else if word[1] == 'm' as u32 {
386
387                if word[2] == 'e' as u32 { Entity::new_character(969) }
388
389                else {
390
391                    if word[3] == 'c' as u32 { Entity::new_character(959) }
392
393                    else { Entity::new_character(8854) }
394
395                }
396
397            }
398
399            else if word[1] == 'p' as u32 { Entity::new_character(8853) }
400
401            else if word[1] == 'r' as u32 { Entity::new_character(8744) }
402
403            else { Entity::new_character(8855) }
404
405        }
406
407        else if word[0] == 'p' as u32 {
408
409            if word[1] == 'a' as u32 { Entity::new_character(8706) }
410
411            else if word[1] == 'h' as u32 { Entity::new_character(966) }
412
413            else if word[1] == 'i' as u32 { Entity::new_character(960) }
414
415            else if word[1] == 'm' as u32 { Entity::new_character(177) }
416
417            else if word[1] == 'r' as u32 { Entity::new_character(8733) }
418
419            else { Entity::new_character(968) }
420
421        }
422
423        else if word[0] == 'q' as u32 { Entity::new_character(8718) }
424
425        else if word[0] == 'r' as u32 {
426
427            if word[1] == 'c' as u32 { Entity::new_character(125) }
428
429            else if word[1] == 'h' as u32 { Entity::new_character(961) }
430
431            else { Entity::new_character(8594) }
432
433        }
434
435        else if word[0] == 's' as u32 {
436
437            if word[1] == 'i' as u32 {
438
439                if word[2] == 'g' as u32 { Entity::new_character(963) }
440
441                else { Entity::new_character(8771) }
442
443            }
444
445            else if word[1] == 't' as u32 { Entity::new_character(8902) }
446
447            else {
448
449                if word[2] == 'b' as u32 {
450
451                    if word.len() == 3 { Entity::new_character(8834) }
452
453                    else { Entity::new_character(8838) }
454
455                }
456
457                else {
458
459                    if word.len() == 3 { Entity::new_character(8835) }
460
461                    else { Entity::new_character(8839) }
462
463                }
464
465            }
466
467        }
468
469        else if word[0] == 't' as u32 {
470
471            if word[1] == 'a' as u32 { Entity::new_character(964) }
472
473            else if word[1] == 'h' as u32 {
474
475                if word[3] == 'r' as u32 { Entity::new_character(8756) }
476
477                else { Entity::new_character(952) }
478
479            }
480
481            else if word[1] == 'i' as u32 { Entity::new_character(215) }
482
483            else { Entity::new_character(8710) }
484
485        }
486
487        else if word[0] == 'u' as u32 {
488
489            if word[2] == 'a' as u32 { Entity::new_character(8593) }
490
491            else { Entity::new_character(965) }
492
493        }
494
495        else if word[0] == 'x' as u32 { Entity::new_character(958) }
496
497        else { Entity::new_character(950) }
498
499    }
500
501    else if ONE_ARG_FUNCTIONS.contains(word) && arguments.len() == 1 {
502
503        if *word == into_v32("sqrt") {
504            Entity::new_root(vec![], md_to_math(&arguments[0]))
505        }
506
507        else if *word == into_v32("text") {
508            Entity::RawString(arguments[0].clone())
509        }
510
511        else if *word == into_v32("lim") || *word == into_v32("limit") {
512            Entity::new_underover(
513                vec![Entity::new_identifier(into_v32("lim"))],
514                md_to_math(&arguments[0]),
515                vec![],
516                false
517            )
518        }
519
520        else {
521            let operator = if *word == into_v32("hat") {
522                '^' as u32
523            } else if *word == into_v32("bar") {
524                '-' as u32
525            } else if *word == into_v32("dot") {
526                8901
527            } else if *word == into_v32("tilde") {
528                '~' as u32
529            } else if *word == into_v32("vec") {
530                8594
531            } else {
532                unreachable!()
533            };
534
535            Entity::new_underover(
536                md_to_math(&arguments[0]),
537                vec![],
538                vec![Entity::new_character(operator)],
539                false
540            )
541        }
542
543    }
544
545    else if TWO_ARG_FUNCTIONS.contains(word) && arguments.len() == 2 {
546
547        if *word == into_v32("sqrt") || *word == into_v32("root") {
548            Entity::new_root(md_to_math(&arguments[0]), md_to_math(&arguments[1]))
549        }
550
551        else if *word == into_v32("frac") {
552            Entity::new_fraction(
553                md_to_math(&arguments[0]),
554                md_to_math(&arguments[1]),
555                false,
556                false
557            )
558        }
559
560        else if *word == into_v32("cfrac") {
561            Entity::new_fraction(
562                md_to_math(&arguments[0]),
563                md_to_math(&arguments[1]),
564                true,
565                false
566            )
567        }
568
569        else if *word == into_v32("bincoeff") {
570            Entity::new_fraction(
571                md_to_math(&arguments[0]),
572                md_to_math(&arguments[1]),
573                false,
574                true
575            )
576        }
577
578        else if *word == into_v32("sub") {
579            Entity::new_script(
580                md_to_math(&arguments[0]),
581                vec![],
582                vec![],
583                vec![],
584                md_to_math(&arguments[1])
585            )
586        }
587
588        else if *word == into_v32("sup") {
589            Entity::new_script(
590                md_to_math(&arguments[0]),
591                vec![],
592                md_to_math(&arguments[1]),
593                vec![],
594                vec![]
595            )
596        }
597
598        else {
599            let operator = if *word == into_v32("sum") {
600                "∑"
601            } else if *word == into_v32("prod") {
602                "∏"
603            } else if *word == into_v32("int") {
604                "∫"
605            } else if *word == into_v32("iint") {
606                "∬"
607            } else if *word == into_v32("iiint") {
608                "∭"
609            } else if *word == into_v32("oint") {
610                "∮"
611            } else {
612                unreachable!()
613            };
614
615            Entity::new_underover(
616                vec![Entity::new_operator(into_v32(operator))],
617                md_to_math(&arguments[0]),
618                md_to_math(&arguments[1]),
619                true
620            )
621        }
622
623    }
624
625    else if THREE_ARG_FUNCTIONS.contains(word) && arguments.len() == 3 {
626
627        if *word == into_v32("subsup") {
628            Entity::new_script(
629                md_to_math(&arguments[0]),
630                vec![],
631                md_to_math(&arguments[2]),
632                vec![],
633                md_to_math(&arguments[1]),
634            )
635        }
636
637        else {
638            unreachable!()
639        }
640
641    }
642
643    else if FIVE_ARG_FUNCTIONS.contains(word) && arguments.len() == 5 {
644
645        if *word == into_v32("multiscript") {
646            Entity::new_script(
647                md_to_math(&arguments[0]),
648                md_to_math(&arguments[1]),
649                md_to_math(&arguments[2]),
650                md_to_math(&arguments[3]),
651                md_to_math(&arguments[4]),
652            )
653        }
654
655        else {
656            unreachable!()
657        }
658
659    }
660
661    else {
662        unreachable!()
663    }
664
665}
666
667pub fn get_arguments(content: &[u32], mut index: usize) -> (Vec<Vec<u32>>, usize) {  // (Vec<argument>, end_index)
668
669    let mut result = vec![];
670
671    if index >= content.len() {
672        return (result, index);
673    }
674
675    loop {
676
677        match content.get(index) {
678            Some(c) if *c == ' ' as u32 || *c == '\n' as u32 => {
679                index += 1;
680                continue;
681            }
682            _ => {}
683        }
684
685        if content.get(index) == Some(&('{' as u32)) {
686            let arg_end_index = match get_curly_brace_end_index(content, index) {
687                Some(end_index) => end_index,
688                None => {
689                    return (result, index - 1);
690                }
691            };
692
693            result.push(content[(index + 1)..arg_end_index].to_vec());
694            index = arg_end_index + 1;
695        }
696
697        else {
698            return (result, index - 1);
699        }
700
701    }
702
703}
704
705// 's'+ 'pace'
706pub fn is_space(word: &[u32]) -> bool {
707    word.len() > 4
708    && &word[(word.len() - 5)..] == &[115, 112, 97, 99, 101]  // into_v32("space")
709    && word[0..(word.len() - 5)].iter().all(|c| *c == 's' as u32)
710}
711
712#[cfg(test)]
713mod testbench {
714    use super::{get_arguments, is_space};
715    use crate::utils::into_v32;
716
717    #[test]
718    fn get_arguments_test() {
719        let test_cases = vec![
720            // (test_case, start_index, arguments, end_index)
721            ("a{b}{c}", 1, vec!["b", "c"], 6),
722            ("a {b}{c}", 1, vec!["b", "c"], 7),
723            ("a {b}{c}", 2, vec!["b", "c"], 7),
724            ("a {b} {c}", 1, vec!["b", "c"], 8),
725            ("a {b} {c}", 2, vec!["b", "c"], 8),
726            ("a {b} {c", 1, vec!["b"], 5),
727            ("a{b{d}}{c}", 1, vec!["b{d}", "c"], 9),
728            ("", 0, vec![], 0),
729        ];
730
731        let test_cases = test_cases.into_iter().map(
732            |(test_case, start_index, arguments, end_index)|
733            (
734                into_v32(&test_case),
735                start_index,
736                arguments.iter().map(
737                    |argument|
738                    into_v32(argument)
739                ).collect(),
740                end_index
741            )
742        ).collect::<Vec<(Vec<u32>, usize, Vec<Vec<u32>>, usize)>>();
743
744        for (test_case, start_index, arguments, end_index) in test_cases.into_iter() {
745            assert_eq!(get_arguments(&test_case, start_index), (arguments, end_index));
746        }
747
748    }
749
750    #[test]
751    fn space_test() {
752        assert!(is_space(&into_v32("space")));
753        assert!(is_space(&into_v32("sspace")));
754        assert!(is_space(&into_v32("ssspace")));
755        assert!(is_space(&into_v32("sssssssssssssssssspace")));
756
757        assert!(!is_space(&into_v32("pace")));
758        assert!(!is_space(&into_v32("espace")));
759        assert!(!is_space(&into_v32("sespace")));
760        assert!(!is_space(&into_v32("")));
761    }
762
763}