lexical_scanner 0.1.18

A simple lexer which creates over 115+ various tokens based on the rust programming language. This complete Lexer/Lexical Scanner produces tokens for a string or a file path entry.
Documentation
//! The lexical_scanner procceses the user's input and converts to a vector of 115+ various tokens.
//! Lexical_scanner works using rust Iterator trait. The fn next() allows the library to safely
//! view and identify character patterns without using regex.

#[allow(
    unused_variables,
    dead_code,
    non_camel_case_types,
    unused_imports,
    clippy::module_inception
)]

pub mod lexer {
    use super::*;
    use crate::enums::Token;
    use std::any::type_name;
    use std::collections::HashMap;
    use std::fs::OpenOptions;
    use std::iter::Peekable;
    use std::str::Chars;
    use std::vec::IntoIter;

    #[derive(Debug, Clone)]
    pub struct Tokenizer<'a> {
        pub expr: Peekable<Chars<'a>>,
        pub keywords: HashMap<&'a str, Token>,
    }

    impl<'a> Iterator for Tokenizer<'a> {
        type Item = Token;

        fn next(&mut self) -> Option<Token> {
            let next_char = self.expr.peek();
            match next_char {
                // (1) String Value
                Some(c) if Self::starts_with_double_quote(*c) => {
                    let mut value = c.to_string();
                    self.expr.next();
                    while let Some(peeking) = self.expr.peek() {
                        match Some(peeking) {
                            Some(c) if Self::starts_with_double_quote(*c) => {
                                value.push(*c);
                                self.expr.next();
                                break;
                            }
                            Some(c) => {
                                value.push(*c);
                                self.expr.next();
                            }
                            None => break,
                        }
                    }

                    Some(Token::String(value))
                }
                // (1) WhiteSpace
                Some(c) if Self::is_whitespace(*c) => {
                    self.expr.next();
                    Some(Token::WhiteSpace)
                }
                // (9) \x41 \x7F \n \r \t \\ \0 \' \"
                Some(c) if Self::is_escaped(*c) => match Some(c) {
                    //Some('\x41') => return Some(Token::BitCharacterCode7(c.to_string())),
                    //Some('\x7F') => return Some(Token::BitCharacterCode8(c.to_string())),
                    Some('\n') => {
                        self.expr.next();
                        Some(Token::Newline)
                    }
                    Some('\r') => {
                        self.expr.next();
                        Some(Token::CarriageReturn)
                    }
                    Some('\t') => {
                        self.expr.next();
                        Some(Token::Tab)
                    }
                    Some('\\') => {
                        self.expr.next();
                        Some(Token::Backslash)
                    }
                    Some('\0') => {
                        self.expr.next();
                        Some(Token::Null)
                    }
                    Some('\'') => {
                        self.expr.next();
                        Some(Token::SingleQuote)
                    }
                    Some('\"') => {
                        self.expr.next();
                        Some(Token::DoubleQuote)
                    }
                    Some(c) => {
                        println!("Token::Undefined(1)::'{}'", c);
                        self.expr.next();
                        Some(Token::Undefined)
                    }
                    None => {
                        println!("Token::Undefined(2)::'None'");
                        self.expr.next();
                        Some(Token::Undefined)
                    }
                },
                // (7) @ _ , ; # $ ?
                Some(c) if Self::is_lesser_punctutation(*c) => match Some(c) {
                    Some('@') => {
                        self.expr.next();
                        Some(Token::At)
                    }
                    Some('_') => {
                        self.expr.next();
                        Some(Token::Underscore)
                    }
                    Some(',') => {
                        self.expr.next();
                        Some(Token::Comma)
                    }
                    Some(';') => {
                        self.expr.next();
                        Some(Token::Semi)
                    }
                    Some('#') => {
                        self.expr.next();
                        Some(Token::Pound)
                    }
                    Some('$') => {
                        self.expr.next();
                        Some(Token::Dollar)
                    }
                    Some('?') => {
                        self.expr.next();
                        Some(Token::Question)
                    }
                    Some(c) => {
                        println!("Token::Undefined(3)::'{}'", c);
                        Some(Token::Undefined)
                    }
                    None => {
                        //println!("Token::Undefined(4)::'None'");
                        Some(Token::Undefined)
                    }
                },
                // (5) Numeric, . .. ... ..=
                Some(c) if Self::is_numeric_with_dot(*c) => {
                    let mut value = c.to_string();
                    self.expr.next();
                    while let Some(peeking) = self.expr.peek() {
                        match Some(peeking) {
                            Some(cc) if Self::is_numeric_with_dot_eq_underscore(*cc) => {
                                value.push(*cc);
                                self.expr.next();
                            }
                            Some(_) => {
                                break;
                            }
                            None => break,
                        }
                    }

                    match Some(value.as_str()) {
                        Some(".") => return Some(Token::Dot),
                        Some("..") => return Some(Token::DotDot),
                        Some("...") => return Some(Token::DotDotDot),
                        Some("..=") => return Some(Token::DotDotEq),
                        Some(_) => {}
                        None => {}
                    }
                    if value.contains('.') || value.contains('_') {
                        return Some(Token::Floating(value));
                    }

                    Some(Token::Numeric(value))
                }
                // (41) = : :: > >= >> < <= << => += -= *= /= &= ^= &= |= == != + - * / % ^ & && | || ! // /* */ >>= <<= -> /// //! /*! /**
                Some(c) if Self::is_punctuation(*c) => {
                    let (token, next_this_times) =
                        Self::next_punctuation(c.to_string(), self.expr.clone());

                    //Advance 'next()' x times position since self.expr was cloned()
                    for i in 0..next_this_times {
                        self.expr.next();
                    }
                    token
                }
                // (6) {}[]()
                Some(c) if Self::bracket_delimiters(*c) => match Some(c) {
                    Some('{') => {
                        self.expr.next();
                        Some(Token::CurlyBraceLeft)
                    }
                    Some('}') => {
                        self.expr.next();
                        Some(Token::CurlyBraceRight)
                    }
                    Some('[') => {
                        self.expr.next();
                        Some(Token::BracketLeft)
                    }
                    Some(']') => {
                        self.expr.next();
                        Some(Token::BracketRight)
                    }
                    Some('(') => {
                        self.expr.next();
                        Some(Token::ParenLeft)
                    }
                    Some(')') => {
                        self.expr.next();
                        Some(Token::ParenRight)
                    }
                    Some(c) => {
                        //self.expr.next();
                        println!("Token::Undefined(5)::'{}'", c);
                        Some(Token::Undefined)
                    }
                    None => {
                        //self.expr.next();
                        //println!("Token::Undefined(6)::'None'");
                        Some(Token::Undefined)
                    }
                },
                // Word()
                Some(c) if Self::is_word(*c) => {
                    let mut value = c.to_string();
                    self.expr.next();
                    while let Some(peeking) = self.expr.peek() {
                        match Some(peeking) {
                            Some(cc) if Self::is_word(*peeking) => {
                                value.push(*cc);
                                self.expr.next();
                            }
                            Some(_) => {
                                break;
                            }
                            None => break,
                        }
                    }

                    //Check if word is KeyWord
                    let flag = self.keywords.get(&*value);
                    match flag {
                        Some(_) => {
                            let token = Self::translate_token_to_keyword_token(
                                flag.unwrap(),
                                value.to_string(),
                            );
                            Some(token.unwrap())
                        }
                        None => Some(Token::Word(value)),
                    }
                }
                // Catch All
                Some(c) => {
                    let value = c.to_string();
                    self.expr.next();
                    Some(Token::Character(value))
                }
                None => {
                    //println!("Token::Undefined(7)::'None'");
                    Some(Token::Undefined)
                }
            }
        }
    }


    pub mod numeric {
        use crate::enums::Token;
        use crate::lexer::lexer::lexer::Tokenizer;
        use std::{iter::Peekable, str::Chars};

        impl<'a> Tokenizer<'a> {
            /// Check if character is numeric or contains a Dot or Underscore
            pub fn is_numeric_with_dot(c: char) -> bool {
                c.is_ascii_digit() || c == '.' || c == '_'
            }

            /// Check if character is numeric or contains a Dot or Underscore or Eq
            /// This is a sub guard used for more detailed matching
            pub fn is_numeric_with_dot_eq_underscore(c: char) -> bool {
                c.is_ascii_digit() || c == '.' || c == '_' || c == '='
            }
        }
    }

    pub mod escapes {
        use crate::enums::Token;
        use crate::lexer::lexer::lexer::Tokenizer;

        /// Check if character is escaped
        //TODO add support for "c == '\x41'""
        impl<'a> Tokenizer<'a> {
            pub fn is_escaped(c: char) -> bool {
                //c == '\x41'
                c == '\n'
                    || c == '\r'
                    || c == '\t'
                    || c == '\\'
                    || c == '\0'
                    || c == '\x7F'
                    || c == '\''
                    || c == '\"'
            }

            /// Check if character is a type of bracket
            pub fn bracket_delimiters(c: char) -> bool {
                c == '{' || c == '[' || c == '(' || c == ')' || c == ']' || c == '}'
            }
        }
    }

    pub mod generic {
        use std::{iter::Peekable, str::Chars};

        use crate::enums::Token;
        use crate::lexer::lexer::lexer::Tokenizer;

        impl<'a> Tokenizer<'a> {
            /// Check if character is "
            pub fn starts_with_double_quote(c: char) -> bool {
                c == '"'
            }

            /// Check if character is whitespace
            pub fn is_whitespace(c: char) -> bool {
                c == ' '
            }

            /// Check if character is alphanumeric or underscore
            /// Words can start with or contain an underscore
            pub fn is_word(c: char) -> bool {
                c.is_alphanumeric() || c == '_'
            }

            /// Check if character is a major punctutation
            pub fn is_punctuation(c: char) -> bool {
                //println!("___________ c is '{}'", &c);
                c == '.'
                    || c == '+'
                    || c == '-'
                    || c == '*'
                    || c == '/'
                    || c == '%'
                    || c == '^'
                    || c == '&'
                    || c == '|'
                    || c == '!'
                    || c == ':'
                    || c == '>'
                    || c == '='
                    || c == '<'
            }

            /// Check if character is a lesser punctutation
            /// Punctuation are split up for ease of control
            pub fn is_lesser_punctutation(c: char) -> bool {
                c == '@' || c == '_' || c == ',' || c == ';' || c == '#' || c == '$' || c == '?'
            }
        }
    }

}