use core::fmt::{Display, Formatter};
use melodium_engine::designer::Reference;
use regex::Regex;
use std::str;
use std::sync::Arc;
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Word {
pub text: String,
pub kind: Option<Kind>,
pub position: Position,
}
impl Default for Word {
fn default() -> Self {
Word {
text: String::new(),
kind: None,
position: Position::default(),
}
}
}
#[derive(Default, Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Position {
pub absolute_position: usize,
pub line_number: usize,
pub line_position: usize,
}
#[derive(Default, Debug, Clone, Hash, PartialEq, Eq)]
pub struct PositionnedString {
pub string: String,
pub position: Position,
}
impl Reference for PositionnedString {}
impl PositionnedString {
pub fn remove_indent(&mut self) {
let mut prefix = None;
for line in self.string.lines() {
let trimmed_line = line.trim_start();
if !trimmed_line.is_empty() {
let whitespaces = line.split_at(line.find(trimmed_line).unwrap()).0;
prefix = Some(whitespaces.to_string());
break;
}
}
if let Some(prefix) = prefix {
let mut less_indented_string = String::new();
for line in self.string.lines() {
less_indented_string.push_str(line.strip_prefix(&prefix).unwrap_or(line));
less_indented_string.push_str("\n");
}
self.string = less_indented_string;
}
}
pub fn into_ref(&self) -> Arc<dyn Reference> {
Arc::new(self.clone())
}
}
impl From<&Word> for PositionnedString {
fn from(word: &Word) -> Self {
Self {
string: word.text.clone(),
position: word.position.clone(),
}
}
}
#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
pub enum Kind {
Comment,
Annotation,
OpeningParenthesis,
ClosingParenthesis,
OpeningBrace,
ClosingBrace,
OpeningBracket,
ClosingBracket,
OpeningChevron,
ClosingChevron,
Equal,
Colon,
Comma,
Dot,
Slash,
Underscore,
Plus,
RightArrow,
Name,
Context,
Function,
Number,
String,
Character,
Byte,
}
impl Display for Kind {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
let str = match self {
Kind::Comment => "// Comment",
Kind::Annotation => "# Annotation",
Kind::OpeningParenthesis => "(",
Kind::ClosingParenthesis => ")",
Kind::OpeningBrace => "{",
Kind::ClosingBrace => "}",
Kind::OpeningBracket => "[",
Kind::ClosingBracket => "]",
Kind::OpeningChevron => "<",
Kind::ClosingChevron => ">",
Kind::Equal => "=",
Kind::Colon => ":",
Kind::Comma => ",",
Kind::Dot => ".",
Kind::Slash => "/",
Kind::Underscore => "_",
Kind::Plus => "+",
Kind::RightArrow => "->",
Kind::Name => "name",
Kind::Context => "context (@Context)",
Kind::Function => "function (|function)",
Kind::Number => "number",
Kind::String => r#"string ("string")"#,
Kind::Character => "character ('c')",
Kind::Byte => "byte (0x2A)",
};
write!(f, "{}", str)
}
}
#[derive(Debug)]
struct KindCheck {
pub is_that_kind: bool,
pub end_at: usize,
pub is_well_formed: bool,
}
impl Default for KindCheck {
fn default() -> Self {
KindCheck {
is_that_kind: false,
end_at: 0,
is_well_formed: false,
}
}
}
pub fn get_words(script: &str) -> Result<Vec<Word>, Vec<Word>> {
let mut words = Vec::new();
let mut remaining_script = script.trim_start();
let mut actual_position = script.len() - remaining_script.len();
let mut kind_check: KindCheck;
while !remaining_script.is_empty() {
let kind: Option<Kind>;
if {
kind_check = manage_comment(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Comment);
}
else if {
kind_check = manage_annotation(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Annotation);
}
else if {
kind_check = manage_single_char('(', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::OpeningParenthesis);
}
else if {
kind_check = manage_single_char(')', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::ClosingParenthesis);
}
else if {
kind_check = manage_single_char('{', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::OpeningBrace);
}
else if {
kind_check = manage_single_char('}', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::ClosingBrace);
}
else if {
kind_check = manage_single_char('[', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::OpeningBracket);
}
else if {
kind_check = manage_single_char(']', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::ClosingBracket);
}
else if {
kind_check = manage_single_char('<', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::OpeningChevron);
}
else if {
kind_check = manage_single_char('>', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::ClosingChevron);
}
else if {
kind_check = manage_single_char('=', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Equal);
}
else if {
kind_check = manage_single_char(':', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Colon);
}
else if {
kind_check = manage_single_char(',', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Comma);
}
else if {
kind_check = manage_single_char('.', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Dot);
}
else if {
kind_check = manage_single_char('/', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Slash);
}
else if {
kind_check = manage_single_char('_', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Underscore);
}
else if {
kind_check = manage_single_char('+', remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Plus);
}
else if {
kind_check = manage_right_arrow(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::RightArrow);
}
else if {
kind_check = manage_name(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Name);
}
else if {
kind_check = manage_context(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Context);
}
else if {
kind_check = manage_function(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Function);
}
else if {
kind_check = manage_byte(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Byte);
}
else if {
kind_check = manage_number(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Number);
}
else if {
kind_check = manage_string(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::String);
}
else if {
kind_check = manage_char(remaining_script);
kind_check.is_that_kind
} {
kind = Some(Kind::Character);
}
else {
kind_check = KindCheck {
is_that_kind: false,
end_at: 1,
is_well_formed: false,
};
kind = None;
}
if let Some(splitted_script) = remaining_script.split_at_checked(kind_check.end_at) {
let (line, pos_in_line) = get_line_pos(script, actual_position);
let word = Word {
text: splitted_script.0.to_string(),
position: Position {
absolute_position: actual_position,
line_position: pos_in_line,
line_number: line,
},
kind: kind,
};
words.push(word);
if !kind_check.is_well_formed {
return Err(words);
} else {
let after_word = splitted_script.1.trim_start();
actual_position += remaining_script.len() - after_word.len();
remaining_script = after_word;
}
} else {
return Err(words);
}
}
Ok(words)
}
fn get_line_pos(text: &str, pos: usize) -> (usize, usize) {
let considered_text = text.split_at(pos).0;
let newlines_indices = considered_text.match_indices('\n');
let counter = newlines_indices.clone();
let lines = counter.count() + 1;
let line_start;
if lines > 1 {
line_start = newlines_indices.last().unwrap().0 + 1;
} else {
line_start = 0;
}
let pos_in_line = pos - line_start;
(lines, pos_in_line)
}
fn manage_comment(text: &str) -> KindCheck {
if text.starts_with("//") {
let end_of_comment = text.find('\n');
KindCheck {
is_that_kind: true,
end_at: end_of_comment.unwrap_or_else(|| text.len()),
is_well_formed: true,
}
} else if text.starts_with("/*") {
let end_of_comment = text.find("*/");
KindCheck {
is_that_kind: true,
end_at: end_of_comment.unwrap_or_else(|| text.len()) + 2,
is_well_formed: end_of_comment.is_some(),
}
} else {
KindCheck::default()
}
}
fn manage_annotation(text: &str) -> KindCheck {
if text.starts_with('#') {
let end_of_annotation = text.find('\n');
KindCheck {
is_that_kind: true,
end_at: end_of_annotation.unwrap_or_else(|| text.len()),
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_single_char(c: char, text: &str) -> KindCheck {
if text.starts_with(c) {
KindCheck {
is_that_kind: true,
end_at: 1,
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_right_arrow(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_RIGHT_ARROW: Regex = Regex::new(r"^-+>").unwrap();
}
let mat = REGEX_RIGHT_ARROW.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_name(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_NAME: Regex =
Regex::new(r"^[\p{Alphabetic}\p{M}\p{Pc}\p{Join_Control}]\w*").unwrap();
}
let mat = REGEX_NAME.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_context(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_CONTEXT: Regex =
Regex::new(r"^@[\p{Alphabetic}\p{M}\p{Pc}\p{Join_Control}]\w*").unwrap();
}
let mat = REGEX_CONTEXT.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_function(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_CONTEXT: Regex =
Regex::new(r"^\|[\p{Alphabetic}\p{M}\p{Pc}\p{Join_Control}]\w*").unwrap();
}
let mat = REGEX_CONTEXT.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_number(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_NUMBER: Regex = Regex::new(r"^-?[0-9]*\.?[0-9]+").unwrap();
}
let mat = REGEX_NUMBER.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck::default()
}
}
fn manage_string(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_STRING: Regex = Regex::new(r##"^"(?:[^"\\]|\\.)*""##).unwrap();
}
if text.starts_with('"') {
let mat = REGEX_STRING.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck {
is_that_kind: true,
end_at: text.len(),
is_well_formed: false,
}
}
} else if text.starts_with("${") {
let num_braces = text.chars().skip(1).take_while(|c| *c == '{').count();
let mut end_braces: String = "}".into();
for _ in 1..num_braces {
end_braces.push('}');
}
if let Some(end_string_position) = text.find(&end_braces) {
KindCheck {
is_that_kind: true,
end_at: end_string_position + num_braces,
is_well_formed: true,
}
} else {
KindCheck {
is_that_kind: true,
end_at: text.len(),
is_well_formed: false,
}
}
} else {
KindCheck::default()
}
}
fn manage_char(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_CHAR: Regex = Regex::new(r##"^'(?:[^'\\]|\.)+'"##).unwrap();
}
if text.starts_with('\'') {
let mat = REGEX_CHAR.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck {
is_that_kind: true,
end_at: text.len(),
is_well_formed: false,
}
}
} else {
KindCheck::default()
}
}
fn manage_byte(text: &str) -> KindCheck {
lazy_static! {
static ref REGEX_BYTE: Regex = Regex::new(r##"^(?:0x[0-9A-F]{2})"##).unwrap();
}
if text.starts_with("0x") {
let mat = REGEX_BYTE.find(text);
if mat.is_some() {
KindCheck {
is_that_kind: true,
end_at: mat.unwrap().end(),
is_well_formed: true,
}
} else {
KindCheck {
is_that_kind: true,
end_at: text.len(),
is_well_formed: false,
}
}
} else {
KindCheck::default()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_well_formated_comments() {
let comments = "// A comment
//Anoter comment
Not_a_comment
/*A continuous comment*/
/* A
* quite
* long
* comment
*/
/* A shorter comment */";
let words = get_words(comments).unwrap();
let kinds: Vec<bool> = words
.iter()
.map(|w| w.kind == Some(Kind::Comment))
.collect();
assert_eq!(vec![true, true, false, true, true, true], kinds);
}
#[test]
fn test_well_formated_numbers() {
let numbers = "0
-12
1.234
Not_a_number
-1.234
-0
00000000000000000000000000000";
let words = get_words(numbers).unwrap();
let kinds: Vec<bool> = words.iter().map(|w| w.kind == Some(Kind::Number)).collect();
assert_eq!(vec![true, true, true, false, true, true, true], kinds);
}
}