mamba 0.3.6

A transpiler which converts Mamba files to Python 3 files
Documentation
use std::iter::Peekable;
use std::str::Chars;

use crate::common::position::CaretPos;
use crate::parse::lex::result::{LexErr, LexResult};
use crate::parse::lex::state::State;
use crate::parse::lex::token::{Lex, Token};
use crate::parse::lex::tokenize_direct;

#[allow(clippy::cognitive_complexity)]
pub fn into_tokens(c: char, it: &mut Peekable<Chars>, state: &mut State) -> LexResult {
    match c {
        ',' => create(state, Token::Comma),
        ':' => match it.peek() {
            Some(':') => match (it.next(), it.peek()) {
                (_, Some('=')) => next_and_create(it, state, Token::SliceIncl),
                _ => create(state, Token::Slice),
            },
            Some('=') => next_and_create(it, state, Token::Assign),
            _ => create(state, Token::DoublePoint),
        },
        '(' => create(state, Token::LRBrack),
        ')' => create(state, Token::RRBrack),
        '[' => create(state, Token::LSBrack),
        ']' => create(state, Token::RSBrack),
        '{' => create(state, Token::LCBrack),
        '}' => create(state, Token::RCBrack),
        '|' => create(state, Token::Ver),
        '\n' => create(state, Token::NL),
        '\r' => match it.next() {
            Some('\n') => create(state, Token::NL),
            _ => Err(LexErr::new(state.pos, None, "return carriage not followed by newline")),
        },
        '.' => match it.peek() {
            Some('.') => match (it.next(), it.peek()) {
                (_, Some('=')) => next_and_create(it, state, Token::RangeIncl),
                _ => create(state, Token::Range),
            },
            _ => create(state, Token::Point),
        },
        '<' => match it.peek() {
            Some('<') => match (it.next(), it.peek()) {
                (_, Some('=')) => next_and_create(it, state, Token::BLShiftAssign),
                _ => next_and_create(it, state, Token::BLShift),
            },
            Some('=') => next_and_create(it, state, Token::Leq),
            _ => create(state, Token::Le),
        },
        '>' => match it.peek() {
            Some('>') => match (it.next(), it.peek()) {
                (_, Some('=')) => next_and_create(it, state, Token::BRShiftAssign),
                _ => next_and_create(it, state, Token::BRShift),
            },
            Some('=') => next_and_create(it, state, Token::Geq),
            _ => create(state, Token::Ge),
        },
        '+' => match it.peek() {
            Some('=') => next_and_create(it, state, Token::AddAssign),
            _ => create(state, Token::Add),
        },
        '-' => match it.peek() {
            Some('=') => next_and_create(it, state, Token::SubAssign),
            Some('>') => next_and_create(it, state, Token::To),
            _ => create(state, Token::Sub),
        },
        '*' => match it.peek() {
            Some('=') => next_and_create(it, state, Token::MulAssign),
            _ => create(state, Token::Mul),
        },
        '/' => match it.peek() {
            Some('=') => next_and_create(it, state, Token::DivAssign),
            Some('/') => next_and_create(it, state, Token::FDiv),
            _ => create(state, Token::Div),
        },
        '\\' => create(state, Token::BSlash),
        '^' => match it.peek() {
            Some('=') => next_and_create(it, state, Token::PowAssign),
            _ => create(state, Token::Pow),
        },
        '=' => match it.peek() {
            Some('>') => next_and_create(it, state, Token::BTo),
            _ => create(state, Token::Eq),
        },
        '#' => {
            let mut comment = String::new();
            while it.peek().is_some() && *it.peek().unwrap() != '\n' && *it.peek().unwrap() != '\r'
            {
                comment.push(it.next().unwrap());
            }
            create(state, Token::Comment(comment))
        }
        '!' => match it.peek() {
            Some('=') => next_and_create(it, state, Token::Neq),
            _ => {
                let msg = String::from("'!' is not a valid character on its own");
                Err(LexErr::new(state.pos, None, &msg))
            }
        },
        '?' => create(state, Token::Question),
        '0'..='9' => {
            let mut number = c.to_string();
            let mut exp = String::new();
            let mut float = false;
            let mut e_num = false;

            while let Some(&c) = it.peek() {
                match c {
                    '0'..='9' if !e_num => {
                        number.push(c);
                        it.next();
                    }
                    '0'..='9' if e_num => {
                        exp.push(c);
                        it.next();
                    }
                    'E' if e_num => break,
                    'E' => {
                        e_num = true;
                        it.next();
                    }
                    '.' if float || e_num => break,
                    '.' => {
                        {
                            // Check if not range by peeking ahead extra char
                            let mut it = it.clone();
                            it.next();
                            if let Some('.') = it.peek() {
                                break;
                            }
                        }

                        number.push(c);
                        float = true;
                        it.next();
                    }
                    _ => break,
                }
            }
            create(
                state,
                if e_num {
                    Token::ENum(number, exp)
                } else if float {
                    Token::Real(number)
                } else {
                    Token::Int(number)
                },
            )
        }
        'a'..='z' | 'A'..='Z' | '_' => {
            let mut id_or_operation = c.to_string();
            while let Some(c) = it.peek() {
                match c {
                    'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => {
                        id_or_operation.push(*c);
                        it.next();
                    }
                    _ => break,
                }
            }
            create(state, as_op_or_id(id_or_operation))
        }
        '"' => {
            let mut string = String::new();
            let mut back_slash = false;

            let mut exprs: Vec<(CaretPos, String)> = vec![];
            let mut build_cur_expr = 0;
            let mut cur_offset = CaretPos::start();
            let mut cur_expr = String::new();

            for c in it {
                if !back_slash && build_cur_expr == 0 && c == '"' {
                    break;
                }
                string.push(c);

                if !back_slash {
                    if build_cur_expr > 0 {
                        cur_expr.push(c);
                    }

                    if c == '{' {
                        if build_cur_expr == 0 {
                            cur_offset = state.pos.offset_pos(string.len() + 1);
                        }
                        build_cur_expr += 1;
                    } else if c == '}' {
                        build_cur_expr -= 1;
                    }

                    if build_cur_expr == 0 && !cur_expr.is_empty() {
                        // Last char is always } due to counter
                        cur_expr = cur_expr[0..cur_expr.len() - 1].to_owned();
                        if !cur_expr.is_empty() {
                            exprs.push((cur_offset, cur_expr.clone()));
                        }
                        cur_expr.clear()
                    }
                }

                back_slash = c == '\\';
            }

            if string.starts_with("\"\"") && string.ends_with("\"\"") {
                let string = string.trim_start_matches("\"\"").trim_end_matches("\"\"");
                create(state, Token::DocStr(String::from(string)))
            } else {
                let tokens = exprs
                    .iter()
                    .map(|(offset, string)| match tokenize_direct(string) {
                        Ok(tokens) => Ok(tokens
                            .iter()
                            .map(|lex| Lex::new(lex.pos.offset(offset).start, lex.token.clone()))
                            .collect()),
                        Err(err) => Err(err),
                    })
                    .collect::<Result<_, _>>()?;

                create(state, Token::Str(string, tokens))
            }
        }
        ' ' => {
            state.space();
            Ok(vec![])
        }
        c => Err(LexErr::new(state.pos, None, &format!("unrecognized character: {c}"))),
    }
}

fn next_and_create(
    it: &mut Peekable<Chars>,
    state: &mut State,
    token: Token,
) -> LexResult<Vec<Lex>> {
    it.next();
    create(state, token)
}

fn create(state: &mut State, token: Token) -> LexResult<Vec<Lex>> {
    Ok(state.token(token))
}

fn as_op_or_id(string: String) -> Token {
    match string.as_ref() {
        "_" => Token::Underscore,

        "from" => Token::From,
        "type" => Token::Type,
        "class" => Token::Class,
        "pure" => Token::Pure,
        "as" => Token::As,

        "import" => Token::Import,
        "forward" => Token::Forward,
        "self" => Token::_Self,
        "vararg" => Token::Vararg,
        "init" => Token::Init,

        "def" => Token::Def,
        "fin" => Token::Fin,
        "and" => Token::And,
        "or" => Token::Or,
        "not" => Token::Not,
        "is" => Token::Is,
        "isa" => Token::IsA,
        "isnt" => Token::IsN,
        "isnta" => Token::IsNA,
        "mod" => Token::Mod,
        "sqrt" => Token::Sqrt,
        "while" => Token::While,
        "for" => Token::For,

        "_and_" => Token::BAnd,
        "_or_" => Token::BOr,
        "_xor_" => Token::BXOr,
        "_not_" => Token::BOneCmpl,

        "if" => Token::If,
        "else" => Token::Else,
        "match" => Token::Match,
        "continue" => Token::Continue,
        "break" => Token::Break,
        "return" => Token::Ret,
        "then" => Token::Then,
        "do" => Token::Do,
        "with" => Token::With,

        "in" => Token::In,

        "raise" => Token::Raise,
        "handle" => Token::Handle,
        "when" => Token::When,

        "True" => Token::Bool(true),
        "False" => Token::Bool(false),

        "None" => Token::Undefined,
        "pass" => Token::Pass,

        _ => Token::Id(string),
    }
}

#[cfg(test)]
mod test {
    use crate::parse::lex::result::LexErr;
    use crate::parse::lex::token::Token;
    use crate::parse::lex::tokenize;

    #[test]
    fn function_with_ret() -> Result<(), LexErr> {
        let source = "def f(x: Int) -> Int =>\n    return";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::Def);
        assert_eq!(tokens[1].token, Token::Id(String::from("f")));
        assert_eq!(tokens[2].token, Token::LRBrack);
        assert_eq!(tokens[3].token, Token::Id(String::from("x")));
        assert_eq!(tokens[4].token, Token::DoublePoint);
        assert_eq!(tokens[5].token, Token::Id(String::from("Int")));
        assert_eq!(tokens[6].token, Token::RRBrack);
        assert_eq!(tokens[7].token, Token::To);
        assert_eq!(tokens[8].token, Token::Id(String::from("Int")));
        assert_eq!(tokens[9].token, Token::BTo);
        assert_eq!(tokens[10].token, Token::NL);
        assert_eq!(tokens[11].token, Token::Indent);
        assert_eq!(tokens[12].token, Token::Ret);
        assert_eq!(tokens[13].token, Token::Dedent);

        Ok(())
    }

    #[test]
    fn class_with_body_class_right_after() -> Result<(), LexErr> {
        let source = "class MyClass\n    def var := 10\nclass MyClass1\n";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::Class);
        assert_eq!(tokens[1].token, Token::Id(String::from("MyClass")));
        assert_eq!(tokens[2].token, Token::NL);
        assert_eq!(tokens[3].token, Token::Indent);
        assert_eq!(tokens[4].token, Token::Def);
        assert_eq!(tokens[5].token, Token::Id(String::from("var")));
        assert_eq!(tokens[6].token, Token::Assign);
        assert_eq!(tokens[7].token, Token::Int(String::from("10")));
        assert_eq!(tokens[8].token, Token::NL);
        assert_eq!(tokens[9].token, Token::Dedent);
        assert_eq!(tokens[10].token, Token::NL);
        assert_eq!(tokens[11].token, Token::Class);
        assert_eq!(tokens[12].token, Token::Id(String::from("MyClass1")));

        Ok(())
    }

    #[test]
    fn if_statement() -> Result<(), LexErr> {
        let source = "if a then\n    b\nelse\n    c";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::If);
        assert_eq!(tokens[1].token, Token::Id(String::from("a")));
        assert_eq!(tokens[2].token, Token::Then);
        assert_eq!(tokens[3].token, Token::NL);
        assert_eq!(tokens[4].token, Token::Indent);
        assert_eq!(tokens[5].token, Token::Id(String::from("b")));
        assert_eq!(tokens[6].token, Token::NL);
        assert_eq!(tokens[7].token, Token::Dedent);
        assert_eq!(tokens[8].token, Token::NL);
        assert_eq!(tokens[9].token, Token::Else);
        assert_eq!(tokens[10].token, Token::NL);
        assert_eq!(tokens[11].token, Token::Indent);
        assert_eq!(tokens[12].token, Token::Id(String::from("c")));

        Ok(())
    }

    #[test]
    fn e_number() -> Result<(), LexErr> {
        let source = "3E4";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::ENum(String::from("3"), String::from("4")));
        Ok(())
    }

    #[test]
    fn int() -> Result<(), LexErr> {
        let source = "0";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::Int(String::from("0")));
        Ok(())
    }

    #[test]
    fn real() -> Result<(), LexErr> {
        let source = "0.";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::Real(String::from("0.")));
        Ok(())
    }

    #[test]
    fn real2() -> Result<(), LexErr> {
        let source = "0.0";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::Real(String::from("0.0")));
        Ok(())
    }

    #[test]
    fn real3() -> Result<(), LexErr> {
        let source = "0.0.";
        let tokens = tokenize(&source)
            .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

        assert_eq!(tokens[0].token, Token::Real(String::from("0.0")));
        assert_eq!(tokens[1].token, Token::Point);
        Ok(())
    }

    #[test]
    fn range_incl() -> Result<(), LexErr> {
        let sources = vec!["0 ..= 2", "0..= 2", "0 ..=2", "0..=2"];

        for source in sources {
            let tokens = tokenize(&source)
                .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

            assert_eq!(tokens[0].token, Token::Int(String::from("0")), "(0): {}", source);
            assert_eq!(tokens[1].token, Token::RangeIncl, "(..=): {}", source);
            assert_eq!(tokens[2].token, Token::Int(String::from("2")), "(2): {}", source);
        }

        Ok(())
    }

    #[test]
    fn range() -> Result<(), LexErr> {
        let sources = vec!["0 .. 2", "0.. 2", "0 ..2", "0..2"];

        for source in sources {
            let tokens = tokenize(&source)
                .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

            assert_eq!(tokens[0].token, Token::Int(String::from("0")), "(0): {}", source);
            assert_eq!(tokens[1].token, Token::Range, "(..): {}", source);
            assert_eq!(tokens[2].token, Token::Int(String::from("2")), "(2): {}", source);
        }

        Ok(())
    }

    #[test]
    fn range_tripped_up() -> Result<(), LexErr> {
        let sources = vec!["0 ... 2", "0... 2", "0 ...2", "0...2"];

        for source in sources {
            let tokens = tokenize(&source)
                .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;

            assert_eq!(tokens[0].token, Token::Int(String::from("0")), "(0): {}", source);
            assert_eq!(tokens[1].token, Token::Range, "(..): {}", source);
            assert_eq!(tokens[2].token, Token::Point, "(.): {}", source);
            assert_eq!(tokens[3].token, Token::Int(String::from("2")), "(2): {}", source);
        }

        Ok(())
    }
}