ruchy 4.2.0

A systems scripting language that transpiles to idiomatic Rust with extreme quality engineering
Documentation
// Basic Ruchy lexer implemented in Ruchy
// This is a proof-of-concept for self-hosting capability

// Token types
enum Token {
    // Literals
    Integer(i64),
    Float(f64),
    String(String),
    Bool(bool),
    
    // Keywords
    Let,
    Fun,
    If,
    Else,
    Match,
    For,
    While,
    Struct,
    Enum,
    Trait,
    Import,
    Export,
    
    // Operators
    Plus,
    Minus,
    Star,
    Slash,
    Equal,
    NotEqual,
    Less,
    Greater,
    
    // Delimiters
    LeftParen,
    RightParen,
    LeftBrace,
    RightBrace,
    LeftBracket,
    RightBracket,
    
    // Special
    Arrow,
    Comma,
    Semicolon,
    Colon,
    Dot,
    
    // Identifiers
    Identifier(String),
    
    // Control
    Newline,
    Eof,
}

// Span information for error reporting
struct Span {
    start: usize,
    end: usize,
}

// Lexer state
struct Lexer {
    input: String,
    position: usize,
    current_char: Option<char>,
}

impl Lexer {
    fun new(input: String) -> Lexer {
        let mut lexer = Lexer {
            input: input,
            position: 0,
            current_char: None,
        };
        lexer.advance();
        lexer
    }
    
    fun advance(self) {
        if self.position < self.input.len() {
            self.current_char = Some(self.input.chars().nth(self.position).unwrap());
            self.position = self.position + 1;
        } else {
            self.current_char = None;
        }
    }
    
    fun skip_whitespace(self) {
        while self.current_char == Some(' ') || self.current_char == Some('\t') {
            self.advance();
        }
    }
    
    fun read_number(self) -> Token {
        let mut num_str = String::new();
        let mut is_float = false;
        
        while self.current_char.is_some() {
            match self.current_char.unwrap() {
                '0'..='9' => {
                    num_str.push(self.current_char.unwrap());
                    self.advance();
                },
                '.' => {
                    if !is_float {
                        is_float = true;
                        num_str.push('.');
                        self.advance();
                    } else {
                        break;
                    }
                },
                _ => break,
            }
        }
        
        if is_float {
            Token::Float(num_str.parse().unwrap())
        } else {
            Token::Integer(num_str.parse().unwrap())
        }
    }
    
    fun read_identifier(self) -> Token {
        let mut ident = String::new();
        
        while self.current_char.is_some() {
            let ch = self.current_char.unwrap();
            if ch.is_alphanumeric() || ch == '_' {
                ident.push(ch);
                self.advance();
            } else {
                break;
            }
        }
        
        // Check for keywords
        match ident.as_str() {
            "let" => Token::Let,
            "fun" => Token::Fun,
            "if" => Token::If,
            "else" => Token::Else,
            "match" => Token::Match,
            "for" => Token::For,
            "while" => Token::While,
            "struct" => Token::Struct,
            "enum" => Token::Enum,
            "trait" => Token::Trait,
            "import" => Token::Import,
            "export" => Token::Export,
            "true" => Token::Bool(true),
            "false" => Token::Bool(false),
            _ => Token::Identifier(ident),
        }
    }
    
    fun read_string(self) -> Token {
        self.advance(); // Skip opening quote
        let mut string = String::new();
        
        while self.current_char.is_some() && self.current_char != Some('"') {
            if self.current_char == Some('\\') {
                self.advance();
                if self.current_char.is_some() {
                    match self.current_char.unwrap() {
                        'n' => string.push('\n'),
                        't' => string.push('\t'),
                        '\\' => string.push('\\'),
                        '"' => string.push('"'),
                        _ => {
                            string.push('\\');
                            string.push(self.current_char.unwrap());
                        }
                    }
                    self.advance();
                }
            } else {
                string.push(self.current_char.unwrap());
                self.advance();
            }
        }
        
        if self.current_char == Some('"') {
            self.advance(); // Skip closing quote
        }
        
        Token::String(string)
    }
    
    fun next_token(self) -> Token {
        self.skip_whitespace();
        
        if self.current_char.is_none() {
            return Token::Eof;
        }
        
        let ch = self.current_char.unwrap();
        
        match ch {
            '\n' => {
                self.advance();
                Token::Newline
            },
            '(' => {
                self.advance();
                Token::LeftParen
            },
            ')' => {
                self.advance();
                Token::RightParen
            },
            '{' => {
                self.advance();
                Token::LeftBrace
            },
            '}' => {
                self.advance();
                Token::RightBrace
            },
            '[' => {
                self.advance();
                Token::LeftBracket
            },
            ']' => {
                self.advance();
                Token::RightBracket
            },
            '+' => {
                self.advance();
                Token::Plus
            },
            '-' => {
                self.advance();
                if self.current_char == Some('>') {
                    self.advance();
                    Token::Arrow
                } else {
                    Token::Minus
                }
            },
            '*' => {
                self.advance();
                Token::Star
            },
            '/' => {
                self.advance();
                if self.current_char == Some('/') {
                    // Skip comment line
                    while self.current_char.is_some() && self.current_char != Some('\n') {
                        self.advance();
                    }
                    self.next_token() // Recursively get next token
                } else {
                    Token::Slash
                }
            },
            '=' => {
                self.advance();
                if self.current_char == Some('=') {
                    self.advance();
                    Token::Equal
                } else {
                    Token::Equal // For now, treat = and == the same
                }
            },
            '!' => {
                self.advance();
                if self.current_char == Some('=') {
                    self.advance();
                    Token::NotEqual
                } else {
                    Token::NotEqual // Simplified
                }
            },
            '<' => {
                self.advance();
                Token::Less
            },
            '>' => {
                self.advance();
                Token::Greater
            },
            ',' => {
                self.advance();
                Token::Comma
            },
            ';' => {
                self.advance();
                Token::Semicolon
            },
            ':' => {
                self.advance();
                Token::Colon
            },
            '.' => {
                self.advance();
                Token::Dot
            },
            '"' => self.read_string(),
            '0'..='9' => self.read_number(),
            'a'..='z' | 'A'..='Z' | '_' => self.read_identifier(),
            _ => {
                self.advance();
                self.next_token() // Skip unknown characters
            }
        }
    }
    
    fun tokenize(self) -> Vec<Token> {
        let mut tokens = Vec::new();
        
        loop {
            let token = self.next_token();
            if token == Token::Eof {
                tokens.push(token);
                break;
            }
            if token != Token::Newline { // Skip newlines for simplicity
                tokens.push(token);
            }
        }
        
        tokens
    }
}

// Test the lexer
fun main() {
    let input = "let x = 42 + 3.14";
    let mut lexer = Lexer::new(input.to_string());
    let tokens = lexer.tokenize();
    
    println("Tokens for: {}", input);
    for token in tokens {
        match token {
            Token::Integer(n) => println("  Integer: {}", n),
            Token::Float(f) => println("  Float: {}", f),
            Token::String(s) => println("  String: {}", s),
            Token::Identifier(id) => println("  Identifier: {}", id),
            Token::Let => println("  Keyword: let"),
            Token::Plus => println("  Operator: +"),
            Token::Equal => println("  Operator: ="),
            Token::Eof => println("  EOF"),
            _ => println("  Other token"),
        }
    }
}