1use super::entity::{Entity, matrix::try_parse_matrix, parse_raw_data};
2use super::validate::is_valid;
3use super::{ZERO_ARG_FUNCTIONS, ONE_ARG_FUNCTIONS, TWO_ARG_FUNCTIONS, THREE_ARG_FUNCTIONS, FIVE_ARG_FUNCTIONS};
4use crate::utils::{get_curly_brace_end_index, into_v32, is_alphabet, remove_whitespaces};
5
6pub fn md_to_math(content: &[u32]) -> Vec<Entity> {
7
8 let mut last_index = 0;
9 let mut curr_index = 0;
10 let mut is_reading_alphabets = false;
11 let mut result = vec![];
12
13 while curr_index < content.len() {
14
15 if is_alphabet(&content[curr_index]) && !is_reading_alphabets {
16
17 if last_index < curr_index {
18 let string = remove_whitespaces(&content[last_index..curr_index]);
19
20 if !string.is_empty() {
21
22 for entity in parse_raw_data(&string) {
23 result.push(entity);
24 }
25
26 }
27
28 }
29
30 last_index = curr_index;
31 is_reading_alphabets = true;
32 }
33
34 else if !is_alphabet(&content[curr_index]) && is_reading_alphabets {
35 let curr_word = &content[last_index..curr_index];
36 let (arguments, end_index) = get_arguments(content, curr_index);
37
38 if is_valid(curr_word, &arguments) {
39 result.push(parse(curr_word, &arguments));
40 curr_index = end_index;
41 last_index = end_index + 1;
42 }
43
44 is_reading_alphabets = false;
45 }
46
47 curr_index += 1;
48 }
49
50 if last_index < curr_index {
51 curr_index = curr_index.min(content.len());
52
53 if is_reading_alphabets {
54 let curr_word = &content[last_index..curr_index];
55 let (arguments, _) = get_arguments(content, curr_index);
56
57 if is_valid(curr_word, &arguments) {
58 result.push(parse(curr_word, &arguments));
59 }
60
61 else {
62 let string = remove_whitespaces(&content[last_index..curr_index]);
63
64 if !string.is_empty() {
65
66 for entity in parse_raw_data(&string) {
67 result.push(entity);
68 }
69
70 }
71
72 }
73
74 }
75
76 else {
77 let string = remove_whitespaces(&content[last_index..curr_index]);
78
79 if !string.is_empty() {
80
81 for entity in parse_raw_data(&string) {
82 result.push(entity);
83 }
84
85 }
86
87 }
88
89 }
90
91 result
92}
93
94pub fn parse(word: &[u32], arguments: &Vec<Vec<u32>>) -> Entity {
95
96 if is_space(word) {
97 Entity::Space(word.len() - 4)
98 }
99
100 else if word == &[109, 97, 116] {
102 match try_parse_matrix(arguments) {
103 Ok(elements) => Entity::new_matrix(elements),
104 Err(e) => Entity::RawString(into_v32(&format!("Error: {e}"))),
105 }
106 }
107
108 else if ZERO_ARG_FUNCTIONS.contains(word) && arguments.is_empty() {
109
110 if *word == into_v32("br") {
111 Entity::new_br()
112 }
113
114 else if word[0] == 'A' as u32 { Entity::new_character(913) }
116
117 else if word[0] == 'B' as u32 { Entity::new_character(914) }
118
119 else if word[0] == 'C' as u32 { Entity::new_character(935) }
120
121 else if word[0] == 'D' as u32 { Entity::new_character(916) }
122
123 else if word[0] == 'E' as u32 {
124
125 if word[1] == 'p' as u32 { Entity::new_character(917) }
126
127 else { Entity::new_character(919) }
128
129 }
130
131 else if word[0] == 'G' as u32 { Entity::new_character(915) }
132
133 else if word[0] == 'I' as u32 { Entity::new_character(921) }
134
135 else if word[0] == 'K' as u32 { Entity::new_character(922) }
136
137 else if word[0] == 'L' as u32 { Entity::new_character(923) }
138
139 else if word[0] == 'M' as u32 { Entity::new_character(924) }
140
141 else if word[0] == 'N' as u32 { Entity::new_character(925) }
142
143 else if word[0] == 'O' as u32 {
144
145 if word[2] == 'e' as u32 { Entity::new_character(937) }
146
147 else { Entity::new_character(927) }
148
149 }
150
151 else if word[0] == 'P' as u32 {
152
153 if word[1] == 'h' as u32 { Entity::new_character(934) }
154
155 else if word[1] == 'i' as u32 { Entity::new_character(928) }
156
157 else { Entity::new_character(936) }
158
159 }
160
161 else if word[0] == 'R' as u32 { Entity::new_character(929) }
162
163 else if word[0] == 'S' as u32 { Entity::new_character(931) }
164
165 else if word[0] == 'T' as u32 {
166
167 if word[1] == 'a' as u32 { Entity::new_character(932) }
168
169 else { Entity::new_character(920) }
170
171 }
172
173 else if word[0] == 'U' as u32 { Entity::new_character(933) }
174
175 else if word[0] == 'X' as u32 { Entity::new_character(926) }
176
177 else if word[0] == 'Z' as u32 { Entity::new_character(918) }
178
179 else if word[0] == 'a' as u32 {
180
181 if word[1] == 'l' as u32 { Entity::new_character(945) }
182
183 else if word[1] == 'n' as u32 { Entity::new_character(8743) }
184
185 else { Entity::new_character(8776) }
186
187 }
188
189 else if word[0] == 'b' as u32 {
190
191 if word[1] == 'e' as u32 {
192
193 if word[2] == 'c' as u32 { Entity::new_character(8757) }
194
195 else { Entity::new_character(946) }
196
197 }
198
199 else { Entity::new_character(8729) }
200
201 }
202
203 else if word[0] == 'c' as u32 {
204
205 if word[1] == 'a' as u32 { Entity::new_character(8745) }
206
207 else if word[1] == 'h' as u32 { Entity::new_character(967) }
208
209 else if word[1] == 'i' as u32 { Entity::new_character(8728) }
210
211 else { Entity::new_character(8746) }
212
213 }
214
215 else if word[0] == 'd' as u32 {
216
217 if word[1] == 'e' as u32 { Entity::new_character(948) }
218
219 else {
220
221 if word[2] == 't' as u32 { Entity::new_character(8901) }
222
223 else { Entity::new_character(8595) }
224
225 }
226
227 }
228
229 else if word[0] == 'e' as u32 {
230
231 if word[1] == 'm' as u32 { Entity::new_character(8709) }
232
233 else if word[1] == 'p' as u32 { Entity::new_character(949) }
234
235 else if word[1] == 'q' as u32 { Entity::new_character(8801) }
236
237 else if word[1] == 't' as u32 { Entity::new_character(951) }
238
239 else { Entity::new_character(8707) }
240
241 }
242
243 else if word[0] == 'f' as u32 { Entity::new_character(8704) }
244
245 else if word[0] == 'g' as u32 {
246
247 if word[1] == 'a' as u32 { Entity::new_character(947) }
248
249 else if word[1] == 'e' as u32 {
250
251 if word.len() == 2 { Entity::new_character(8805) }
252
253 else { Entity::new_character(8805) }
254
255 }
256
257 else if word[1] == 'g' as u32 { Entity::new_character(8811) }
258
259 else { Entity::new_character(62) }
260
261 }
262
263 else if word[0] == 'i' as u32 {
264
265 if word[1] == 'n' as u32 {
266
267 if word.len() == 2 { Entity::new_character(8712) }
268
269 else {
270
271 if word.len() == 3 { Entity::new_character(8734) }
272
273 else if word[3] == 'i' as u32 { Entity::new_character(8734) }
274
275 else { Entity::new_character(8734) }
276
277 }
278
279 }
280
281 else { Entity::new_character(953) }
282
283 }
284
285 else if word[0] == 'k' as u32 { Entity::new_character(954) }
286
287 else if word[0] == 'l' as u32 {
288
289 if word[1] == 'a' as u32 { Entity::new_character(955) }
290
291 else if word[1] == 'c' as u32 { Entity::new_character(123) }
292
293 else if word[1] == 'e' as u32 {
294
295 if word.len() == 2 { Entity::new_character(8804) }
296
297 else if word[2] == 'f' as u32 { Entity::new_character(8592) }
298
299 else { Entity::new_character(8804) }
300
301 }
302
303 else if word[1] == 'l' as u32 { Entity::new_character(8810) }
304
305 else { Entity::new_character(60) }
306
307 }
308
309 else if word[0] == 'm' as u32 {
310
311 if word[1] == 'p' as u32 { Entity::new_character(8723) }
312
313 else { Entity::new_character(956) }
314
315 }
316
317 else if word[0] == 'n' as u32 {
318
319 if word[1] == 'a' as u32 { Entity::new_character(8711) }
320
321 else if word[1] == 'e' as u32 {
322
323 if word.len() == 2 { Entity::new_character(8800) }
324
325 else {
326
327 if word.len() == 3 { Entity::new_character(8800) }
328
329 else { Entity::new_character(8802) }
330
331 }
332
333 }
334
335 else if word[1] == 'i' as u32 { Entity::new_character(8715) }
336
337 else if word[1] == 'o' as u32 {
338
339 if word[3] == 'i' as u32 { Entity::new_character(8713) }
340
341 else { Entity::new_character(8716) }
342
343 }
344
345 else if word[1] == 's' as u32 {
346
347 if word[3] == 'b' as u32 {
348
349 if word.len() == 4 { Entity::new_character(8836) }
350
351 else { Entity::new_character(8840) }
352
353 }
354
355 else {
356
357 if word.len() == 4 { Entity::new_character(8837) }
358
359 else { Entity::new_character(8841) }
360
361 }
362
363 }
364
365 else {
366
367 if word.len() == 2 { Entity::new_character(957) }
368
369 else { Entity::new_character(8709) }
370
371 }
372
373 }
374
375 else if word[0] == 'o' as u32 {
376
377 if word[1] == 'd' as u32 {
378
379 if word[2] == 'i' as u32 { Entity::new_character(8856) }
380
381 else { Entity::new_character(8857) }
382
383 }
384
385 else if word[1] == 'm' as u32 {
386
387 if word[2] == 'e' as u32 { Entity::new_character(969) }
388
389 else {
390
391 if word[3] == 'c' as u32 { Entity::new_character(959) }
392
393 else { Entity::new_character(8854) }
394
395 }
396
397 }
398
399 else if word[1] == 'p' as u32 { Entity::new_character(8853) }
400
401 else if word[1] == 'r' as u32 { Entity::new_character(8744) }
402
403 else { Entity::new_character(8855) }
404
405 }
406
407 else if word[0] == 'p' as u32 {
408
409 if word[1] == 'a' as u32 { Entity::new_character(8706) }
410
411 else if word[1] == 'h' as u32 { Entity::new_character(966) }
412
413 else if word[1] == 'i' as u32 { Entity::new_character(960) }
414
415 else if word[1] == 'm' as u32 { Entity::new_character(177) }
416
417 else if word[1] == 'r' as u32 { Entity::new_character(8733) }
418
419 else { Entity::new_character(968) }
420
421 }
422
423 else if word[0] == 'q' as u32 { Entity::new_character(8718) }
424
425 else if word[0] == 'r' as u32 {
426
427 if word[1] == 'c' as u32 { Entity::new_character(125) }
428
429 else if word[1] == 'h' as u32 { Entity::new_character(961) }
430
431 else { Entity::new_character(8594) }
432
433 }
434
435 else if word[0] == 's' as u32 {
436
437 if word[1] == 'i' as u32 {
438
439 if word[2] == 'g' as u32 { Entity::new_character(963) }
440
441 else { Entity::new_character(8771) }
442
443 }
444
445 else if word[1] == 't' as u32 { Entity::new_character(8902) }
446
447 else {
448
449 if word[2] == 'b' as u32 {
450
451 if word.len() == 3 { Entity::new_character(8834) }
452
453 else { Entity::new_character(8838) }
454
455 }
456
457 else {
458
459 if word.len() == 3 { Entity::new_character(8835) }
460
461 else { Entity::new_character(8839) }
462
463 }
464
465 }
466
467 }
468
469 else if word[0] == 't' as u32 {
470
471 if word[1] == 'a' as u32 { Entity::new_character(964) }
472
473 else if word[1] == 'h' as u32 {
474
475 if word[3] == 'r' as u32 { Entity::new_character(8756) }
476
477 else { Entity::new_character(952) }
478
479 }
480
481 else if word[1] == 'i' as u32 { Entity::new_character(215) }
482
483 else { Entity::new_character(8710) }
484
485 }
486
487 else if word[0] == 'u' as u32 {
488
489 if word[2] == 'a' as u32 { Entity::new_character(8593) }
490
491 else { Entity::new_character(965) }
492
493 }
494
495 else if word[0] == 'x' as u32 { Entity::new_character(958) }
496
497 else { Entity::new_character(950) }
498
499 }
500
501 else if ONE_ARG_FUNCTIONS.contains(word) && arguments.len() == 1 {
502
503 if *word == into_v32("sqrt") {
504 Entity::new_root(vec![], md_to_math(&arguments[0]))
505 }
506
507 else if *word == into_v32("text") {
508 Entity::RawString(arguments[0].clone())
509 }
510
511 else if *word == into_v32("lim") || *word == into_v32("limit") {
512 Entity::new_underover(
513 vec![Entity::new_identifier(into_v32("lim"))],
514 md_to_math(&arguments[0]),
515 vec![],
516 false
517 )
518 }
519
520 else {
521 let operator = if *word == into_v32("hat") {
522 '^' as u32
523 } else if *word == into_v32("bar") {
524 '-' as u32
525 } else if *word == into_v32("dot") {
526 8901
527 } else if *word == into_v32("tilde") {
528 '~' as u32
529 } else if *word == into_v32("vec") {
530 8594
531 } else {
532 unreachable!()
533 };
534
535 Entity::new_underover(
536 md_to_math(&arguments[0]),
537 vec![],
538 vec![Entity::new_character(operator)],
539 false
540 )
541 }
542
543 }
544
545 else if TWO_ARG_FUNCTIONS.contains(word) && arguments.len() == 2 {
546
547 if *word == into_v32("sqrt") || *word == into_v32("root") {
548 Entity::new_root(md_to_math(&arguments[0]), md_to_math(&arguments[1]))
549 }
550
551 else if *word == into_v32("frac") {
552 Entity::new_fraction(
553 md_to_math(&arguments[0]),
554 md_to_math(&arguments[1]),
555 false,
556 false
557 )
558 }
559
560 else if *word == into_v32("cfrac") {
561 Entity::new_fraction(
562 md_to_math(&arguments[0]),
563 md_to_math(&arguments[1]),
564 true,
565 false
566 )
567 }
568
569 else if *word == into_v32("bincoeff") {
570 Entity::new_fraction(
571 md_to_math(&arguments[0]),
572 md_to_math(&arguments[1]),
573 false,
574 true
575 )
576 }
577
578 else if *word == into_v32("sub") {
579 Entity::new_script(
580 md_to_math(&arguments[0]),
581 vec![],
582 vec![],
583 vec![],
584 md_to_math(&arguments[1])
585 )
586 }
587
588 else if *word == into_v32("sup") {
589 Entity::new_script(
590 md_to_math(&arguments[0]),
591 vec![],
592 md_to_math(&arguments[1]),
593 vec![],
594 vec![]
595 )
596 }
597
598 else {
599 let operator = if *word == into_v32("sum") {
600 "∑"
601 } else if *word == into_v32("prod") {
602 "∏"
603 } else if *word == into_v32("int") {
604 "∫"
605 } else if *word == into_v32("iint") {
606 "∬"
607 } else if *word == into_v32("iiint") {
608 "∭"
609 } else if *word == into_v32("oint") {
610 "∮"
611 } else {
612 unreachable!()
613 };
614
615 Entity::new_underover(
616 vec![Entity::new_operator(into_v32(operator))],
617 md_to_math(&arguments[0]),
618 md_to_math(&arguments[1]),
619 true
620 )
621 }
622
623 }
624
625 else if THREE_ARG_FUNCTIONS.contains(word) && arguments.len() == 3 {
626
627 if *word == into_v32("subsup") {
628 Entity::new_script(
629 md_to_math(&arguments[0]),
630 vec![],
631 md_to_math(&arguments[2]),
632 vec![],
633 md_to_math(&arguments[1]),
634 )
635 }
636
637 else {
638 unreachable!()
639 }
640
641 }
642
643 else if FIVE_ARG_FUNCTIONS.contains(word) && arguments.len() == 5 {
644
645 if *word == into_v32("multiscript") {
646 Entity::new_script(
647 md_to_math(&arguments[0]),
648 md_to_math(&arguments[1]),
649 md_to_math(&arguments[2]),
650 md_to_math(&arguments[3]),
651 md_to_math(&arguments[4]),
652 )
653 }
654
655 else {
656 unreachable!()
657 }
658
659 }
660
661 else {
662 unreachable!()
663 }
664
665}
666
667pub fn get_arguments(content: &[u32], mut index: usize) -> (Vec<Vec<u32>>, usize) { let mut result = vec![];
670
671 if index >= content.len() {
672 return (result, index);
673 }
674
675 loop {
676
677 match content.get(index) {
678 Some(c) if *c == ' ' as u32 || *c == '\n' as u32 => {
679 index += 1;
680 continue;
681 }
682 _ => {}
683 }
684
685 if content.get(index) == Some(&('{' as u32)) {
686 let arg_end_index = match get_curly_brace_end_index(content, index) {
687 Some(end_index) => end_index,
688 None => {
689 return (result, index - 1);
690 }
691 };
692
693 result.push(content[(index + 1)..arg_end_index].to_vec());
694 index = arg_end_index + 1;
695 }
696
697 else {
698 return (result, index - 1);
699 }
700
701 }
702
703}
704
705pub fn is_space(word: &[u32]) -> bool {
707 word.len() > 4
708 && &word[(word.len() - 5)..] == &[115, 112, 97, 99, 101] && word[0..(word.len() - 5)].iter().all(|c| *c == 's' as u32)
710}
711
712#[cfg(test)]
713mod testbench {
714 use super::{get_arguments, is_space};
715 use crate::utils::into_v32;
716
717 #[test]
718 fn get_arguments_test() {
719 let test_cases = vec![
720 ("a{b}{c}", 1, vec!["b", "c"], 6),
722 ("a {b}{c}", 1, vec!["b", "c"], 7),
723 ("a {b}{c}", 2, vec!["b", "c"], 7),
724 ("a {b} {c}", 1, vec!["b", "c"], 8),
725 ("a {b} {c}", 2, vec!["b", "c"], 8),
726 ("a {b} {c", 1, vec!["b"], 5),
727 ("a{b{d}}{c}", 1, vec!["b{d}", "c"], 9),
728 ("", 0, vec![], 0),
729 ];
730
731 let test_cases = test_cases.into_iter().map(
732 |(test_case, start_index, arguments, end_index)|
733 (
734 into_v32(&test_case),
735 start_index,
736 arguments.iter().map(
737 |argument|
738 into_v32(argument)
739 ).collect(),
740 end_index
741 )
742 ).collect::<Vec<(Vec<u32>, usize, Vec<Vec<u32>>, usize)>>();
743
744 for (test_case, start_index, arguments, end_index) in test_cases.into_iter() {
745 assert_eq!(get_arguments(&test_case, start_index), (arguments, end_index));
746 }
747
748 }
749
750 #[test]
751 fn space_test() {
752 assert!(is_space(&into_v32("space")));
753 assert!(is_space(&into_v32("sspace")));
754 assert!(is_space(&into_v32("ssspace")));
755 assert!(is_space(&into_v32("sssssssssssssssssspace")));
756
757 assert!(!is_space(&into_v32("pace")));
758 assert!(!is_space(&into_v32("espace")));
759 assert!(!is_space(&into_v32("sespace")));
760 assert!(!is_space(&into_v32("")));
761 }
762
763}