1use core::fmt::{Display, Formatter};
7use melodium_engine::designer::Reference;
8use regex::Regex;
9use std::str;
10use std::sync::Arc;
11
12#[derive(Debug, Clone, Hash, PartialEq, Eq)]
16pub struct Word {
17 pub text: String,
19 pub kind: Option<Kind>,
21 pub position: Position,
23}
24
25impl Default for Word {
26 fn default() -> Self {
27 Word {
28 text: String::new(),
29 kind: None,
30 position: Position::default(),
31 }
32 }
33}
34
35#[derive(Default, Debug, Copy, Clone, Hash, PartialEq, Eq)]
40pub struct Position {
41 pub absolute_position: usize,
43 pub line_number: usize,
45 pub line_position: usize,
47}
48
49#[derive(Default, Debug, Clone, Hash, PartialEq, Eq)]
50pub struct PositionnedString {
51 pub string: String,
52 pub position: Position,
53}
54
55impl Reference for PositionnedString {}
56
57impl PositionnedString {
58 pub fn remove_indent(&mut self) {
59 let mut prefix = None;
60 for line in self.string.lines() {
61 let trimmed_line = line.trim_start();
62 if !trimmed_line.is_empty() {
63 let whitespaces = line.split_at(line.find(trimmed_line).unwrap()).0;
64 prefix = Some(whitespaces.to_string());
65 break;
66 }
67 }
68
69 if let Some(prefix) = prefix {
70 let mut less_indented_string = String::new();
71 for line in self.string.lines() {
72 less_indented_string.push_str(line.strip_prefix(&prefix).unwrap_or(line));
73 less_indented_string.push_str("\n");
74 }
75 self.string = less_indented_string;
76 }
77 }
78
79 pub fn into_ref(&self) -> Arc<dyn Reference> {
80 Arc::new(self.clone())
81 }
82}
83
84impl From<&Word> for PositionnedString {
85 fn from(word: &Word) -> Self {
86 Self {
87 string: word.text.clone(),
88 position: word.position.clone(),
89 }
90 }
91}
92
93#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
99pub enum Kind {
100 Comment,
102 Annotation,
104 OpeningParenthesis,
106 ClosingParenthesis,
108 OpeningBrace,
110 ClosingBrace,
112 OpeningBracket,
114 ClosingBracket,
116 OpeningChevron,
118 ClosingChevron,
120 Equal,
122 Colon,
124 Comma,
126 Dot,
128 Slash,
130 Underscore,
132 Plus,
134 RightArrow,
136 Name,
138 Context,
140 Function,
142 Number,
144 String,
146 Character,
148 Byte,
150}
151
152impl Display for Kind {
153 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
154 let str = match self {
155 Kind::Comment => "// Comment",
156 Kind::Annotation => "# Annotation",
157 Kind::OpeningParenthesis => "(",
158 Kind::ClosingParenthesis => ")",
159 Kind::OpeningBrace => "{",
160 Kind::ClosingBrace => "}",
161 Kind::OpeningBracket => "[",
162 Kind::ClosingBracket => "]",
163 Kind::OpeningChevron => "<",
164 Kind::ClosingChevron => ">",
165 Kind::Equal => "=",
166 Kind::Colon => ":",
167 Kind::Comma => ",",
168 Kind::Dot => ".",
169 Kind::Slash => "/",
170 Kind::Underscore => "_",
171 Kind::Plus => "+",
172 Kind::RightArrow => "->",
173 Kind::Name => "name",
174 Kind::Context => "context (@Context)",
175 Kind::Function => "function (|function)",
176 Kind::Number => "number",
177 Kind::String => r#"string ("string")"#,
178 Kind::Character => "character ('c')",
179 Kind::Byte => "byte (0x2A)",
180 };
181 write!(f, "{}", str)
182 }
183}
184
185#[derive(Debug)]
189struct KindCheck {
190 pub is_that_kind: bool,
191 pub end_at: usize,
192 pub is_well_formed: bool,
193}
194
195impl Default for KindCheck {
196 fn default() -> Self {
197 KindCheck {
198 is_that_kind: false,
199 end_at: 0,
200 is_well_formed: false,
201 }
202 }
203}
204
205pub fn get_words(script: &str) -> Result<Vec<Word>, Vec<Word>> {
211 let mut words = Vec::new();
212 let mut remaining_script = script.trim_start();
213 let mut actual_position = script.len() - remaining_script.len();
214 let mut kind_check: KindCheck;
215
216 while !remaining_script.is_empty() {
217 let kind: Option<Kind>;
218
219 if {
221 kind_check = manage_comment(remaining_script);
222 kind_check.is_that_kind
223 } {
224 kind = Some(Kind::Comment);
225 }
226 else if {
228 kind_check = manage_annotation(remaining_script);
229 kind_check.is_that_kind
230 } {
231 kind = Some(Kind::Annotation);
232 }
233 else if {
235 kind_check = manage_single_char('(', remaining_script);
236 kind_check.is_that_kind
237 } {
238 kind = Some(Kind::OpeningParenthesis);
239 }
240 else if {
242 kind_check = manage_single_char(')', remaining_script);
243 kind_check.is_that_kind
244 } {
245 kind = Some(Kind::ClosingParenthesis);
246 }
247 else if {
249 kind_check = manage_single_char('{', remaining_script);
250 kind_check.is_that_kind
251 } {
252 kind = Some(Kind::OpeningBrace);
253 }
254 else if {
256 kind_check = manage_single_char('}', remaining_script);
257 kind_check.is_that_kind
258 } {
259 kind = Some(Kind::ClosingBrace);
260 }
261 else if {
263 kind_check = manage_single_char('[', remaining_script);
264 kind_check.is_that_kind
265 } {
266 kind = Some(Kind::OpeningBracket);
267 }
268 else if {
270 kind_check = manage_single_char(']', remaining_script);
271 kind_check.is_that_kind
272 } {
273 kind = Some(Kind::ClosingBracket);
274 }
275 else if {
277 kind_check = manage_single_char('<', remaining_script);
278 kind_check.is_that_kind
279 } {
280 kind = Some(Kind::OpeningChevron);
281 }
282 else if {
284 kind_check = manage_single_char('>', remaining_script);
285 kind_check.is_that_kind
286 } {
287 kind = Some(Kind::ClosingChevron);
288 }
289 else if {
291 kind_check = manage_single_char('=', remaining_script);
292 kind_check.is_that_kind
293 } {
294 kind = Some(Kind::Equal);
295 }
296 else if {
298 kind_check = manage_single_char(':', remaining_script);
299 kind_check.is_that_kind
300 } {
301 kind = Some(Kind::Colon);
302 }
303 else if {
305 kind_check = manage_single_char(',', remaining_script);
306 kind_check.is_that_kind
307 } {
308 kind = Some(Kind::Comma);
309 }
310 else if {
312 kind_check = manage_single_char('.', remaining_script);
313 kind_check.is_that_kind
314 } {
315 kind = Some(Kind::Dot);
316 }
317 else if {
319 kind_check = manage_single_char('/', remaining_script);
320 kind_check.is_that_kind
321 } {
322 kind = Some(Kind::Slash);
323 }
324 else if {
326 kind_check = manage_single_char('_', remaining_script);
327 kind_check.is_that_kind
328 } {
329 kind = Some(Kind::Underscore);
330 }
331 else if {
333 kind_check = manage_single_char('+', remaining_script);
334 kind_check.is_that_kind
335 } {
336 kind = Some(Kind::Plus);
337 }
338 else if {
340 kind_check = manage_right_arrow(remaining_script);
341 kind_check.is_that_kind
342 } {
343 kind = Some(Kind::RightArrow);
344 }
345 else if {
347 kind_check = manage_name(remaining_script);
348 kind_check.is_that_kind
349 } {
350 kind = Some(Kind::Name);
351 }
352 else if {
354 kind_check = manage_context(remaining_script);
355 kind_check.is_that_kind
356 } {
357 kind = Some(Kind::Context);
358 }
359 else if {
361 kind_check = manage_function(remaining_script);
362 kind_check.is_that_kind
363 } {
364 kind = Some(Kind::Function);
365 }
366 else if {
368 kind_check = manage_byte(remaining_script);
369 kind_check.is_that_kind
370 } {
371 kind = Some(Kind::Byte);
372 }
373 else if {
375 kind_check = manage_number(remaining_script);
376 kind_check.is_that_kind
377 } {
378 kind = Some(Kind::Number);
379 }
380 else if {
382 kind_check = manage_string(remaining_script);
383 kind_check.is_that_kind
384 } {
385 kind = Some(Kind::String);
386 }
387 else if {
389 kind_check = manage_char(remaining_script);
390 kind_check.is_that_kind
391 } {
392 kind = Some(Kind::Character);
393 }
394 else {
396 kind_check = KindCheck {
397 is_that_kind: false,
398 end_at: 1,
399 is_well_formed: false,
400 };
401 kind = None;
402 }
403
404 if let Some(splitted_script) = remaining_script.split_at_checked(kind_check.end_at) {
405 let (line, pos_in_line) = get_line_pos(script, actual_position);
406 let word = Word {
407 text: splitted_script.0.to_string(),
408 position: Position {
409 absolute_position: actual_position,
410 line_position: pos_in_line,
411 line_number: line,
412 },
413 kind: kind,
414 };
415
416 words.push(word);
417
418 if !kind_check.is_well_formed {
419 return Err(words);
420 } else {
421 let after_word = splitted_script.1.trim_start();
422 actual_position += remaining_script.len() - after_word.len();
423 remaining_script = after_word;
424 }
425 } else {
426 return Err(words);
427 }
428 }
429
430 Ok(words)
431}
432
433fn get_line_pos(text: &str, pos: usize) -> (usize, usize) {
434 let considered_text = text.split_at(pos).0;
435 let newlines_indices = considered_text.match_indices('\n');
436
437 let counter = newlines_indices.clone();
438 let lines = counter.count() + 1;
439
440 let line_start;
441 if lines > 1 {
442 line_start = newlines_indices.last().unwrap().0 + 1;
443 } else {
444 line_start = 0;
445 }
446
447 let pos_in_line = pos - line_start;
448
449 (lines, pos_in_line)
450}
451
452fn manage_comment(text: &str) -> KindCheck {
453 if text.starts_with("//") {
454 let end_of_comment = text.find('\n');
455 KindCheck {
456 is_that_kind: true,
457 end_at: end_of_comment.unwrap_or_else(|| text.len()),
458 is_well_formed: true,
459 }
460 } else if text.starts_with("/*") {
461 let end_of_comment = text.find("*/");
462 KindCheck {
463 is_that_kind: true,
464 end_at: end_of_comment.unwrap_or_else(|| text.len()) + 2,
465 is_well_formed: end_of_comment.is_some(),
466 }
467 } else {
468 KindCheck::default()
469 }
470}
471
472fn manage_annotation(text: &str) -> KindCheck {
473 if text.starts_with('#') {
474 let end_of_annotation = text.find('\n');
475 KindCheck {
476 is_that_kind: true,
477 end_at: end_of_annotation.unwrap_or_else(|| text.len()),
478 is_well_formed: true,
479 }
480 } else {
481 KindCheck::default()
482 }
483}
484
485fn manage_single_char(c: char, text: &str) -> KindCheck {
486 if text.starts_with(c) {
487 KindCheck {
488 is_that_kind: true,
489 end_at: 1,
490 is_well_formed: true,
491 }
492 } else {
493 KindCheck::default()
494 }
495}
496
497fn manage_right_arrow(text: &str) -> KindCheck {
498 lazy_static! {
499 static ref REGEX_RIGHT_ARROW: Regex = Regex::new(r"^-+>").unwrap();
500 }
501 let mat = REGEX_RIGHT_ARROW.find(text);
502 if mat.is_some() {
503 KindCheck {
504 is_that_kind: true,
505 end_at: mat.unwrap().end(),
506 is_well_formed: true,
507 }
508 } else {
509 KindCheck::default()
510 }
511}
512
513fn manage_name(text: &str) -> KindCheck {
514 lazy_static! {
515 static ref REGEX_NAME: Regex =
516 Regex::new(r"^[\p{Alphabetic}\p{M}\p{Pc}\p{Join_Control}]\w*").unwrap();
517 }
518 let mat = REGEX_NAME.find(text);
519 if mat.is_some() {
520 KindCheck {
521 is_that_kind: true,
522 end_at: mat.unwrap().end(),
523 is_well_formed: true,
524 }
525 } else {
526 KindCheck::default()
527 }
528}
529
530fn manage_context(text: &str) -> KindCheck {
531 lazy_static! {
532 static ref REGEX_CONTEXT: Regex =
533 Regex::new(r"^@[\p{Alphabetic}\p{M}\p{Pc}\p{Join_Control}]\w*").unwrap();
534 }
535 let mat = REGEX_CONTEXT.find(text);
536 if mat.is_some() {
537 KindCheck {
538 is_that_kind: true,
539 end_at: mat.unwrap().end(),
540 is_well_formed: true,
541 }
542 } else {
543 KindCheck::default()
544 }
545}
546
547fn manage_function(text: &str) -> KindCheck {
548 lazy_static! {
549 static ref REGEX_CONTEXT: Regex =
550 Regex::new(r"^\|[\p{Alphabetic}\p{M}\p{Pc}\p{Join_Control}]\w*").unwrap();
551 }
552 let mat = REGEX_CONTEXT.find(text);
553 if mat.is_some() {
554 KindCheck {
555 is_that_kind: true,
556 end_at: mat.unwrap().end(),
557 is_well_formed: true,
558 }
559 } else {
560 KindCheck::default()
561 }
562}
563
564fn manage_number(text: &str) -> KindCheck {
565 lazy_static! {
566 static ref REGEX_NUMBER: Regex = Regex::new(r"^-?[0-9]*\.?[0-9]+").unwrap();
567 }
568 let mat = REGEX_NUMBER.find(text);
569 if mat.is_some() {
570 KindCheck {
571 is_that_kind: true,
572 end_at: mat.unwrap().end(),
573 is_well_formed: true,
574 }
575 } else {
576 KindCheck::default()
577 }
578}
579
580fn manage_string(text: &str) -> KindCheck {
581 lazy_static! {
582 static ref REGEX_STRING: Regex = Regex::new(r##"^"(?:[^"\\]|\\.)*""##).unwrap();
583 }
584 if text.starts_with('"') {
585 let mat = REGEX_STRING.find(text);
586 if mat.is_some() {
587 KindCheck {
588 is_that_kind: true,
589 end_at: mat.unwrap().end(),
590 is_well_formed: true,
591 }
592 } else {
593 KindCheck {
594 is_that_kind: true,
595 end_at: text.len(),
596 is_well_formed: false,
597 }
598 }
599 } else if text.starts_with("${") {
600 let num_braces = text.chars().skip(1).take_while(|c| *c == '{').count();
601 let mut end_braces: String = "}".into();
602 for _ in 1..num_braces {
603 end_braces.push('}');
604 }
605 if let Some(end_string_position) = text.find(&end_braces) {
606 KindCheck {
607 is_that_kind: true,
608 end_at: end_string_position + num_braces,
609 is_well_formed: true,
610 }
611 } else {
612 KindCheck {
613 is_that_kind: true,
614 end_at: text.len(),
615 is_well_formed: false,
616 }
617 }
618 } else {
619 KindCheck::default()
620 }
621}
622
623fn manage_char(text: &str) -> KindCheck {
624 lazy_static! {
625 static ref REGEX_CHAR: Regex = Regex::new(r##"^'(?:[^'\\]|\.)+'"##).unwrap();
626 }
627 if text.starts_with('\'') {
628 let mat = REGEX_CHAR.find(text);
629 if mat.is_some() {
630 KindCheck {
631 is_that_kind: true,
632 end_at: mat.unwrap().end(),
633 is_well_formed: true,
634 }
635 } else {
636 KindCheck {
637 is_that_kind: true,
638 end_at: text.len(),
639 is_well_formed: false,
640 }
641 }
642 } else {
643 KindCheck::default()
644 }
645}
646
647fn manage_byte(text: &str) -> KindCheck {
648 lazy_static! {
649 static ref REGEX_BYTE: Regex = Regex::new(r##"^(?:0x[0-9A-F]{2})"##).unwrap();
650 }
651 if text.starts_with("0x") {
652 let mat = REGEX_BYTE.find(text);
653 if mat.is_some() {
654 KindCheck {
655 is_that_kind: true,
656 end_at: mat.unwrap().end(),
657 is_well_formed: true,
658 }
659 } else {
660 KindCheck {
661 is_that_kind: true,
662 end_at: text.len(),
663 is_well_formed: false,
664 }
665 }
666 } else {
667 KindCheck::default()
668 }
669}
670
671#[cfg(test)]
672mod tests {
673
674 use super::*;
675
676 #[test]
677 fn test_well_formated_comments() {
678 let comments = "// A comment
679 //Anoter comment
680 Not_a_comment
681 /*A continuous comment*/
682 /* A
683 * quite
684 * long
685 * comment
686 */
687 /* A shorter comment */";
688
689 let words = get_words(comments).unwrap();
690 let kinds: Vec<bool> = words
691 .iter()
692 .map(|w| w.kind == Some(Kind::Comment))
693 .collect();
694
695 assert_eq!(vec![true, true, false, true, true, true], kinds);
696 }
697
698 #[test]
699 fn test_well_formated_numbers() {
700 let numbers = "0
701 -12
702 1.234
703 Not_a_number
704 -1.234
705 -0
706 00000000000000000000000000000";
707
708 let words = get_words(numbers).unwrap();
709 let kinds: Vec<bool> = words.iter().map(|w| w.kind == Some(Kind::Number)).collect();
710
711 assert_eq!(vec![true, true, true, false, true, true, true], kinds);
712 }
713}