sharp 0.1.0

A modern, statically-typed programming language with Python-like syntax, compiled to native code via LLVM. Game engine ready!
use logos::{Logos, SpannedIter};
use std::collections::VecDeque;
use std::ops::Range;

mod token;
pub use token::Token;

pub struct Lexer<'a> {
    inner: SpannedIter<'a, Token>,
    source: &'a str,
    indent_stack: Vec<usize>,
    pending_tokens: VecDeque<(Token, Range<usize>)>,
    eof_processed: bool,
}

impl<'a> Lexer<'a> {
    pub fn new(source: &'a str) -> Self {
        Self {
            inner: Token::lexer(source).spanned(),
            source,
            indent_stack: vec![0],
            pending_tokens: VecDeque::new(),
            eof_processed: false,
        }
    }

    fn calculate_indent(&self, slice: &str) -> usize {
        // Slice starts with \n, so skip it.
        // For now, assume 1 space = 1 indent, 1 tab = 4 spaces.
        let mut indent = 0;
        for char in slice.chars().skip(1) {
            match char {
                ' ' => indent += 1,
                '\t' => indent += 4,
                _ => {}
            }
        }
        indent
    }
}

impl<'a> Iterator for Lexer<'a> {
    type Item = (Token, Range<usize>);

    fn next(&mut self) -> Option<Self::Item> {
        if let Some(token) = self.pending_tokens.pop_front() {
            return Some(token);
        }

        match self.inner.next() {
            Some((Ok(token), span)) => {
                match token {
                    Token::Newline => {
                        let slice = &self.source[span.clone()];
                        let indent = self.calculate_indent(slice);
                        let current_indent = *self.indent_stack.last().unwrap_or(&0);

                        if indent > current_indent {
                            self.indent_stack.push(indent);
                            self.pending_tokens.push_back((Token::Indent, span.clone()));
                            Some((Token::Newline, span))
                        } else if indent < current_indent {
                            while let Some(&top) = self.indent_stack.last() {
                                if top > indent {
                                    self.indent_stack.pop();
                                    self.pending_tokens.push_back((Token::Dedent, span.clone()));
                                } else {
                                    break;
                                }
                            }
                            // Verify we landed on a valid indent level
                            if let Some(&top) = self.indent_stack.last() {
                                if top != indent {
                                    // Indentation error: mismatch
                                    return Some((Token::Error, span));
                                }
                            }
                            // After dedents, we still emit the newline that caused it?
                            // Usually: Statement -> Newline -> Dedent -> Next Statement
                            // So: Newline, Dedent, Dedent...
                            // But we are currently AT the newline.
                            // So we return Newline first, then the Dedents.
                            // But wait, if I return Newline now, next call pops Dedent.
                            // Yes.
                            Some((Token::Newline, span))
                        } else {
                            // Same indentation, just a newline separator
                            Some((Token::Newline, span))
                        }
                    }
                    _ => Some((token, span)),
                }
            }
            Some((Err(_), span)) => Some((Token::Error, span)),
            None => {
                if !self.eof_processed {
                    self.eof_processed = true;
                    // Emit remaining Dedents
                    while self.indent_stack.len() > 1 {
                        self.indent_stack.pop();
                        // Use the end of source as span for implicit dedents
                        let len = self.source.len();
                        self.pending_tokens.push_back((Token::Dedent, len..len));
                    }
                    if let Some(token) = self.pending_tokens.pop_front() {
                        return Some(token);
                    }
                }
                None
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_basic_tokens() {
        let source = "def foo(x): return x + 1";
        let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
        assert_eq!(tokens, vec![
            Token::Def, Token::Ident("foo".to_string()), Token::LParen, Token::Ident("x".to_string()), Token::RParen, Token::Colon,
            Token::Return, Token::Ident("x".to_string()), Token::Plus, Token::IntLit(1)
        ]);
    }

    #[test]
    fn test_indentation() {
        let source = "def foo():\n    return 1\nx = 2\n";
        let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
        
        let expected = vec![
            Token::Def, Token::Ident("foo".to_string()), Token::LParen, Token::RParen, Token::Colon,
            Token::Newline, Token::Indent,
            Token::Return, Token::IntLit(1),
            Token::Newline, Token::Dedent,
            Token::Ident("x".to_string()), Token::Eq, Token::IntLit(2),
            Token::Newline,
        ];
        
        assert_eq!(tokens, expected);
    }

    #[test]
    fn test_operators() {
        let source = "+= -= *= /= == != :: ->";
        let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
        assert_eq!(tokens, vec![
            Token::PlusEq, Token::MinusEq, Token::StarEq, Token::SlashEq,
            Token::EqEq, Token::NotEq, Token::ColonColon, Token::Arrow,
        ]);
    }

    #[test]
    fn test_keywords() {
        let source = "async await unsafe safe asm comptime";
        let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
        assert_eq!(tokens, vec![
            Token::Async, Token::Await, Token::Unsafe, Token::Safe, Token::Asm, Token::Comptime,
        ]);
    }

    #[test]
    fn test_numbers_and_strings() {
        let source = r#"42 3.14 "hello world""#;
        let tokens: Vec<Token> = Lexer::new(source).map(|(t, _)| t).collect();
        assert_eq!(tokens[0], Token::IntLit(42));
        assert_eq!(tokens[1], Token::FloatLit(3.14));
        assert!(matches!(tokens[2], Token::StringLit(_)));
    }
}