#[derive(Clone)]
pub(crate) enum TokenType {
Identity(String),
Atom(String),
String(String),
Char(u8),
Number(f64),
Seperator,
SentenceSeperator,
FuncListOpen,
FuncListClose,
ListOpen,
ListClose,
}
impl std::fmt::Debug for TokenType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenType::Identity(id) => write!(f, "Id({})", id),
TokenType::Atom(a) => write!(f, "Atom({})", a),
TokenType::String(s) => write!(f, "Str({})", s),
TokenType::Char(c) => write!(f, "Ch({})", c),
TokenType::Number(n) => write!(f, "Num({})", n),
TokenType::Seperator => write!(f, "S"),
TokenType::SentenceSeperator => write!(f, "SS"),
TokenType::FuncListOpen => write!(f, "FnO"),
TokenType::FuncListClose => write!(f, "FnC"),
TokenType::ListOpen => write!(f, "LstO"),
TokenType::ListClose => write!(f, "LstC"),
}
}
}
#[derive(Clone)]
pub(crate) struct Token {
pub value: TokenType,
pub position: ((usize, usize), (usize, usize)),
}
impl std::fmt::Debug for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Token<{:?}, [({}, {}), ({}, {})]>",
self.value, self.position.0.0, self.position.0.1, self.position.1.0, self.position.1.1,
)
}
}
pub(crate) fn source_to_token(source: &str) -> Option<Vec<Token>> {
let source: Vec<char> = source.chars().collect::<Vec<char>>();
let mut tokens = Vec::new();
let mut comment_depth: usize = 0;
let mut source_position: usize = 0;
let mut current_position: (usize, usize) = (1, 0);
while source_position < source.len() {
if source[source_position] == '"' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
let init_code_position = current_position;
let init_source_position = source_position;
source_position += 1;
let mut is_closed = false;
while let Some(&ch) = source.get(source_position) {
if ch == '"' {
is_closed = true;
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
break;
} else if ch == '\n' {
source_position += 1;
current_position = (current_position.0 + 1, 0);
} else {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
}
}
if is_closed {
tokens.push(Token {
value: TokenType::String(String::from_iter(
source[(init_source_position + 1)..(source_position - 1)].iter(),
)),
position: (init_code_position, current_position),
});
} else {
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Unterminated string at ({}, {})."
),
current_position.0, current_position.1
);
return None;
}
} else if (source[source_position].is_ascii_digit() || source[source_position] == '-' || source[source_position] == '+')
&& comment_depth == 0
{
current_position = (current_position.0, current_position.1 + 1);
let init_code_position = current_position;
let init_source_position = source_position;
source_position += 1;
let mut is_after_dot = false;
let mut is_after_e = false;
while let Some(&ch) = source.get(source_position) {
if ch.is_ascii_digit() {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
} else if ch == '.' && !is_after_e && !is_after_dot {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
is_after_dot = true;
} else if ch == 'e' && !is_after_e {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
is_after_e = true;
} else {
break;
}
}
let num_str = String::from_iter(source[init_source_position..source_position].iter());
match num_str.parse::<f64>() {
Ok(n) => tokens.push(Token {
value: TokenType::Number(n),
position: (init_code_position, current_position),
}),
Err(_) => {
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid number `{}` at ({}, {})."
),
num_str, init_code_position.0, init_code_position.1
);
return None;
}
}
} else if source[source_position] == '#' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
let init_code_position = current_position;
let init_source_position = source_position;
source_position += 1;
if let Some(ch) = source.get(source_position) {
if ch.is_ascii_alphabetic() {
while let Some(&ch) = source.get(source_position) {
if ch.is_ascii_alphanumeric() || ch == '_' || ch == '\'' {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
} else {
break;
}
}
let atom_tag = String::from_iter(source[(init_source_position + 1)..source_position].iter());
tokens.push(Token {
value: TokenType::Atom(atom_tag),
position: (init_code_position, current_position),
});
} else if ch.is_ascii_digit() {
while let Some(&ch) = source.get(source_position) {
if ch.is_ascii_digit() {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
} else {
break;
}
}
let char_tag = String::from_iter(source[(init_source_position + 1)..source_position].iter());
match char_tag.parse::<u8>() {
Ok(c) if c < 128 => tokens.push(Token {
value: TokenType::Char(c),
position: (init_code_position, current_position),
}),
Ok(u) => {
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Out of char index boundary `{}` at ({}, {})."
),
u, init_code_position.0, init_code_position.1
);
return None;
}
Err(_) => {
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid char index `{}` at ({}, {})."
),
char_tag, init_code_position.0, init_code_position.1
);
return None;
}
}
} else {
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid token `{}` for tag at ({}, {})."
),
ch,
current_position.0,
current_position.1 + 1
);
return None;
}
} else {
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Empty tag at ({}, {})."
),
current_position.0, current_position.1
);
return None;
}
} else if source[source_position].is_ascii_alphabetic() && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
let init_code_position = current_position;
let init_source_position = source_position;
source_position += 1;
while let Some(&ch) = source.get(source_position) {
if ch.is_ascii_alphanumeric() || ch == '_' || ch == '\'' {
source_position += 1;
current_position = (current_position.0, current_position.1 + 1);
} else {
break;
}
}
tokens.push(Token {
value: TokenType::Identity(String::from_iter(
source[init_source_position..source_position].iter(),
)),
position: (init_code_position, current_position),
});
} else if source[source_position] == ',' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
tokens.push(Token {
value: TokenType::Seperator,
position: (current_position, current_position),
});
} else if source[source_position] == ';' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
tokens.push(Token {
value: TokenType::SentenceSeperator,
position: (current_position, current_position),
});
} else if source[source_position] == '[' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
tokens.push(Token {
value: TokenType::FuncListOpen,
position: (current_position, current_position),
});
} else if source[source_position] == ']' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
tokens.push(Token {
value: TokenType::FuncListClose,
position: (current_position, current_position),
});
} else if source[source_position] == '{' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
tokens.push(Token {
value: TokenType::ListOpen,
position: (current_position, current_position),
});
} else if source[source_position] == '}' && comment_depth == 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
tokens.push(Token {
value: TokenType::ListClose,
position: (current_position, current_position),
});
} else if source[source_position] == '(' {
if let Some(&ch) = source.get(source_position + 1) {
if ch == '*' {
current_position = (current_position.0, current_position.1 + 2);
source_position += 2;
comment_depth += 1;
} else if comment_depth > 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
} else {
current_position = (current_position.0, current_position.1 + 1);
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid token `{}` at ({}, {})."
),
source[source_position], current_position.0, current_position.1
);
return None;
}
} else {
current_position = (current_position.0, current_position.1 + 1);
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid token `{}` at ({}, {})."
),
source[source_position], current_position.0, current_position.1
);
return None;
}
} else if source[source_position] == '*' {
if let Some(&ch) = source.get(source_position + 1) {
if ch == ')' && comment_depth > 0 {
current_position = (current_position.0, current_position.1 + 2);
source_position += 2;
comment_depth -= 1;
} else if comment_depth > 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
} else {
current_position = (current_position.0, current_position.1 + 1);
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid token `{}` at ({}, {})."
),
source[source_position], current_position.0, current_position.1
);
return None;
}
} else {
current_position = (current_position.0, current_position.1 + 1);
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid token `{}` at ({}, {})."
),
source[source_position], current_position.0, current_position.1
);
return None;
}
} else if source[source_position] == '\n' {
current_position = (current_position.0 + 1, 0);
source_position += 1;
} else if source[source_position].is_whitespace() || comment_depth > 0 {
current_position = (current_position.0, current_position.1 + 1);
source_position += 1;
} else {
current_position = (current_position.0, current_position.1 + 1);
eprintln!(
concat!(
"Error[ksl::token::source_to_token]: ",
"Invalid token `{}` at ({}, {})."
),
source[source_position], current_position.0, current_position.1
);
return None;
}
}
if comment_depth == 0 {
Some(tokens)
} else {
eprintln!(concat!(
"Error[ksl::token::source_to_token]: ",
"Unclosed comment."
));
None
}
}