ksl 0.1.7

KSL core library and interpreter
Documentation
//! # ksl::token
//!
//! Defines token-related types and functions in KSL.

/// All token types.
#[derive(Clone)]
pub(crate) enum TokenType {
    /// ^[a-zA-Z_][0-9a-zA-Z_']*$
    Identity(String),
    /// ^#[a-zA-Z][0-9a-zA-Z_']*$
    Atom(String),
    /// ^\u{22}(.|\n)*\u{22}$
    String(String),
    /// ^#[0-9]{1,3}$
    Char(u8),
    /// ^[+-]?[0-9]+(\.[0-9]*)?(e[0-9]+)?$
    Number(f64),
    /// ^,$
    Seperator,
    /// ^;$
    SentenceSeperator,
    /// ^\[$
    FuncListOpen,
    /// ^\]$
    FuncListClose,
    /// ^\{$
    ListOpen,
    /// ^\}$
    ListClose,
    // comment (* *)
}

impl std::fmt::Debug for TokenType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            TokenType::Identity(id) => write!(f, "Id({})", id),
            TokenType::Atom(a) => write!(f, "Atom({})", a),
            TokenType::String(s) => write!(f, "Str({})", s),
            TokenType::Char(c) => write!(f, "Ch({})", c),
            TokenType::Number(n) => write!(f, "Num({})", n),
            TokenType::Seperator => write!(f, "S"),
            TokenType::SentenceSeperator => write!(f, "SS"),
            TokenType::FuncListOpen => write!(f, "FnO"),
            TokenType::FuncListClose => write!(f, "FnC"),
            TokenType::ListOpen => write!(f, "LstO"),
            TokenType::ListClose => write!(f, "LstC"),
        }
    }
}

/// Tokens in KSL.
#[derive(Clone)]
pub(crate) struct Token {
    pub value: TokenType,
    pub position: ((usize, usize), (usize, usize)),
}

impl std::fmt::Debug for Token {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Token<{:?}, [({}, {}), ({}, {})]>",
            self.value, self.position.0.0, self.position.0.1, self.position.1.0, self.position.1.1,
        )
    }
}

/// Convert source code to tokens.
pub(crate) fn source_to_token(source: &str) -> Option<Vec<Token>> {
    let source: Vec<char> = source.chars().collect::<Vec<char>>();
    let mut tokens = Vec::new();
    let mut comment_depth: usize = 0;
    let mut source_position: usize = 0;
    let mut current_position: (usize, usize) = (1, 0);
    while source_position < source.len() {
        if source[source_position] == '"' && comment_depth == 0 {
            // Get strings.
            current_position = (current_position.0, current_position.1 + 1);
            let init_code_position = current_position;
            let init_source_position = source_position;
            source_position += 1;
            let mut is_closed = false;
            while let Some(&ch) = source.get(source_position) {
                if ch == '"' {
                    // Close quote symbol.
                    is_closed = true;
                    source_position += 1;
                    current_position = (current_position.0, current_position.1 + 1);
                    break;
                } else if ch == '\n' {
                    source_position += 1;
                    current_position = (current_position.0 + 1, 0);
                } else {
                    source_position += 1;
                    current_position = (current_position.0, current_position.1 + 1);
                }
            }
            if is_closed {
                tokens.push(Token {
                    value: TokenType::String(String::from_iter(
                        // Escape double quotes.
                        source[(init_source_position + 1)..(source_position - 1)].iter(),
                    )),
                    position: (init_code_position, current_position),
                });
            } else {
                eprintln!(
                    concat!(
                        "Error[ksl::token::source_to_token]: ",
                        "Unterminated string at ({}, {})."
                    ),
                    current_position.0, current_position.1
                );
                return None;
            }
        } else if (source[source_position].is_ascii_digit() || source[source_position] == '-' || source[source_position] == '+')
            && comment_depth == 0
        {
            // Get numbers.
            current_position = (current_position.0, current_position.1 + 1);
            let init_code_position = current_position;
            let init_source_position = source_position;
            source_position += 1;
            let mut is_after_dot = false;
            let mut is_after_e = false;
            while let Some(&ch) = source.get(source_position) {
                if ch.is_ascii_digit() {
                    source_position += 1;
                    current_position = (current_position.0, current_position.1 + 1);
                } else if ch == '.' && !is_after_e && !is_after_dot {
                    source_position += 1;
                    current_position = (current_position.0, current_position.1 + 1);
                    is_after_dot = true;
                } else if ch == 'e' && !is_after_e {
                    source_position += 1;
                    current_position = (current_position.0, current_position.1 + 1);
                    is_after_e = true;
                } else {
                    break;
                }
            }
            let num_str = String::from_iter(source[init_source_position..source_position].iter());
            match num_str.parse::<f64>() {
                Ok(n) => tokens.push(Token {
                    value: TokenType::Number(n),
                    position: (init_code_position, current_position),
                }),
                Err(_) => {
                    eprintln!(
                        concat!(
                            "Error[ksl::token::source_to_token]: ",
                            "Invalid number `{}` at ({}, {})."
                        ),
                        num_str, init_code_position.0, init_code_position.1
                    );
                    return None;
                }
            }
        } else if source[source_position] == '#' && comment_depth == 0 {
            // Get atoms.
            current_position = (current_position.0, current_position.1 + 1);
            let init_code_position = current_position;
            let init_source_position = source_position;
            source_position += 1;
            if let Some(ch) = source.get(source_position) {
                // First character should be an ascii alphabeta.
                if ch.is_ascii_alphabetic() {
                    // Next can be number, alphabeta, underscore or single quote.
                    while let Some(&ch) = source.get(source_position) {
                        if ch.is_ascii_alphanumeric() || ch == '_' || ch == '\'' {
                            source_position += 1;
                            current_position = (current_position.0, current_position.1 + 1);
                        } else {
                            break;
                        }
                    }
                    let atom_tag = String::from_iter(source[(init_source_position + 1)..source_position].iter());
                    tokens.push(Token {
                        value: TokenType::Atom(atom_tag),
                        position: (init_code_position, current_position),
                    });
                } else if ch.is_ascii_digit() {
                    // Need to be digits.
                    while let Some(&ch) = source.get(source_position) {
                        if ch.is_ascii_digit() {
                            source_position += 1;
                            current_position = (current_position.0, current_position.1 + 1);
                        } else {
                            break;
                        }
                    }
                    let char_tag = String::from_iter(source[(init_source_position + 1)..source_position].iter());
                    match char_tag.parse::<u8>() {
                        Ok(c) if c < 128 => tokens.push(Token {
                            value: TokenType::Char(c),
                            position: (init_code_position, current_position),
                        }),
                        Ok(u) => {
                            eprintln!(
                                concat!(
                                    "Error[ksl::token::source_to_token]: ",
                                    "Out of char index boundary `{}` at ({}, {})."
                                ),
                                u, init_code_position.0, init_code_position.1
                            );
                            return None;
                        }
                        Err(_) => {
                            eprintln!(
                                concat!(
                                    "Error[ksl::token::source_to_token]: ",
                                    "Invalid char index `{}` at ({}, {})."
                                ),
                                char_tag, init_code_position.0, init_code_position.1
                            );
                            return None;
                        }
                    }
                } else {
                    eprintln!(
                        concat!(
                            "Error[ksl::token::source_to_token]: ",
                            "Invalid token `{}` for tag at ({}, {})."
                        ),
                        ch,
                        current_position.0,
                        current_position.1 + 1
                    );
                    return None;
                }
            } else {
                eprintln!(
                    concat!(
                        "Error[ksl::token::source_to_token]: ",
                        "Empty tag at ({}, {})."
                    ),
                    current_position.0, current_position.1
                );
                return None;
            }
        } else if source[source_position].is_ascii_alphabetic() && comment_depth == 0 {
            // Get identities.
            current_position = (current_position.0, current_position.1 + 1);
            let init_code_position = current_position;
            let init_source_position = source_position;
            source_position += 1;
            while let Some(&ch) = source.get(source_position) {
                if ch.is_ascii_alphanumeric() || ch == '_' || ch == '\'' {
                    source_position += 1;
                    current_position = (current_position.0, current_position.1 + 1);
                } else {
                    break;
                }
            }
            tokens.push(Token {
                value: TokenType::Identity(String::from_iter(
                    source[init_source_position..source_position].iter(),
                )),
                position: (init_code_position, current_position),
            });
        } else if source[source_position] == ',' && comment_depth == 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
            tokens.push(Token {
                value: TokenType::Seperator,
                position: (current_position, current_position),
            });
        } else if source[source_position] == ';' && comment_depth == 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
            tokens.push(Token {
                value: TokenType::SentenceSeperator,
                position: (current_position, current_position),
            });
        } else if source[source_position] == '[' && comment_depth == 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
            tokens.push(Token {
                value: TokenType::FuncListOpen,
                position: (current_position, current_position),
            });
        } else if source[source_position] == ']' && comment_depth == 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
            tokens.push(Token {
                value: TokenType::FuncListClose,
                position: (current_position, current_position),
            });
        } else if source[source_position] == '{' && comment_depth == 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
            tokens.push(Token {
                value: TokenType::ListOpen,
                position: (current_position, current_position),
            });
        } else if source[source_position] == '}' && comment_depth == 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
            tokens.push(Token {
                value: TokenType::ListClose,
                position: (current_position, current_position),
            });
        } else if source[source_position] == '(' {
            if let Some(&ch) = source.get(source_position + 1) {
                if ch == '*' {
                    current_position = (current_position.0, current_position.1 + 2);
                    source_position += 2;
                    comment_depth += 1;
                } else if comment_depth > 0 {
                    current_position = (current_position.0, current_position.1 + 1);
                    source_position += 1;
                } else {
                    current_position = (current_position.0, current_position.1 + 1);
                    eprintln!(
                        concat!(
                            "Error[ksl::token::source_to_token]: ",
                            "Invalid token `{}` at ({}, {})."
                        ),
                        source[source_position], current_position.0, current_position.1
                    );
                    return None;
                }
            } else {
                current_position = (current_position.0, current_position.1 + 1);
                eprintln!(
                    concat!(
                        "Error[ksl::token::source_to_token]: ",
                        "Invalid token `{}` at ({}, {})."
                    ),
                    source[source_position], current_position.0, current_position.1
                );
                return None;
            }
        } else if source[source_position] == '*' {
            if let Some(&ch) = source.get(source_position + 1) {
                if ch == ')' && comment_depth > 0 {
                    current_position = (current_position.0, current_position.1 + 2);
                    source_position += 2;
                    comment_depth -= 1;
                } else if comment_depth > 0 {
                    current_position = (current_position.0, current_position.1 + 1);
                    source_position += 1;
                } else {
                    current_position = (current_position.0, current_position.1 + 1);
                    eprintln!(
                        concat!(
                            "Error[ksl::token::source_to_token]: ",
                            "Invalid token `{}` at ({}, {})."
                        ),
                        source[source_position], current_position.0, current_position.1
                    );
                    return None;
                }
            } else {
                current_position = (current_position.0, current_position.1 + 1);
                eprintln!(
                    concat!(
                        "Error[ksl::token::source_to_token]: ",
                        "Invalid token `{}` at ({}, {})."
                    ),
                    source[source_position], current_position.0, current_position.1
                );
                return None;
            }
        } else if source[source_position] == '\n' {
            current_position = (current_position.0 + 1, 0);
            source_position += 1;
        } else if source[source_position].is_whitespace() || comment_depth > 0 {
            current_position = (current_position.0, current_position.1 + 1);
            source_position += 1;
        } else {
            current_position = (current_position.0, current_position.1 + 1);
            eprintln!(
                concat!(
                    "Error[ksl::token::source_to_token]: ",
                    "Invalid token `{}` at ({}, {})."
                ),
                source[source_position], current_position.0, current_position.1
            );
            return None;
        }
    }
    if comment_depth == 0 {
        Some(tokens)
    } else {
        eprintln!(concat!(
            "Error[ksl::token::source_to_token]: ",
            "Unclosed comment."
        ));
        None
    }
}