ruchy 4.2.0

A systems scripting language that transpiles to idiomatic Rust with extreme quality engineering
Documentation
// Self-hosted Ruchy Lexer - Simplified Proof of Concept  
// RUCHY-0722: Port lexer to Ruchy (Phase 1 self-hosting)

// Simple character-based lexer implementation
fun is_digit(ch: char) -> bool {
    ch >= '0' && ch <= '9'
}

fun is_letter(ch: char) -> bool {
    (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'
}

fun is_alphanumeric(ch: char) -> bool {
    is_letter(ch) || is_digit(ch)
}

fun char_at(s: String, pos: i32) -> char {
    let chars = []
    for ch in s.chars() {
        chars = chars + [ch]
    }
    if pos >= 0 && pos < chars.len() as i32 {
        chars[pos as usize]
    } else {
        ' '  // Default character for out of bounds
    }
}

// Simple token types for proof of concept
enum SimpleToken {
    Number(i32),
    Word(String),
    Plus,
    Minus,
    Equal,
    LeftParen,
    RightParen,
    LeftBrace,
    RightBrace,
    Semicolon,
    Let,
    Fun,
    Unknown,
    Eof,
}

// Basic lexer state
struct SimpleLexer {
    input: String,
    position: i32,
    length: i32,
}

fun create_lexer(input: String) -> SimpleLexer {
    SimpleLexer {
        input: input,
        position: 0,
        length: input.len() as i32,
    }
}

fun current_char(lexer: SimpleLexer) -> char {
    if lexer.position >= lexer.length {
        '\0'
    } else {
        char_at(lexer.input, lexer.position)
    }
}

fun peek_char(lexer: SimpleLexer) -> char {
    if lexer.position + 1 >= lexer.length {
        '\0'
    } else {
        char_at(lexer.input, lexer.position + 1)
    }
}

fun advance_lexer(lexer: SimpleLexer) -> SimpleLexer {
    SimpleLexer {
        input: lexer.input,
        position: lexer.position + 1,
        length: lexer.length,
    }
}

fun skip_whitespace(lexer: SimpleLexer) -> SimpleLexer {
    let mut current = lexer
    let mut ch = current_char(current)
    
    while ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
        current = advance_lexer(current)
        ch = current_char(current)
        if current.position >= current.length {
            break
        }
    }
    
    current
}

fun read_number(lexer: SimpleLexer) -> (SimpleToken, SimpleLexer) {
    let mut current = lexer
    let mut number_str = ""
    let mut ch = current_char(current)
    
    while is_digit(ch) {
        number_str = number_str + ch
        current = advance_lexer(current)
        ch = current_char(current)
        if current.position >= current.length {
            break
        }
    }
    
    let number = if number_str == "" { 0 } else { number_str.parse::<i32>().unwrap_or(0) }
    (SimpleToken::Number(number), current)
}

fun read_word(lexer: SimpleLexer) -> (SimpleToken, SimpleLexer) {
    let mut current = lexer
    let mut word = ""
    let mut ch = current_char(current)
    
    while is_alphanumeric(ch) {
        word = word + ch
        current = advance_lexer(current)
        ch = current_char(current)
        if current.position >= current.length {
            break
        }
    }
    
    let token = if word == "let" {
        SimpleToken::Let
    } else if word == "fun" {
        SimpleToken::Fun
    } else {
        SimpleToken::Word(word)
    }
    
    (token, current)
}

fun next_token(lexer: SimpleLexer) -> (SimpleToken, SimpleLexer) {
    let current = skip_whitespace(lexer)
    
    if current.position >= current.length {
        return (SimpleToken::Eof, current)
    }
    
    let ch = current_char(current)
    
    if is_digit(ch) {
        read_number(current)
    } else if is_letter(ch) {
        read_word(current)
    } else if ch == '+' {
        (SimpleToken::Plus, advance_lexer(current))
    } else if ch == '-' {
        (SimpleToken::Minus, advance_lexer(current))
    } else if ch == '=' {
        (SimpleToken::Equal, advance_lexer(current))
    } else if ch == '(' {
        (SimpleToken::LeftParen, advance_lexer(current))
    } else if ch == ')' {
        (SimpleToken::RightParen, advance_lexer(current))
    } else if ch == '{' {
        (SimpleToken::LeftBrace, advance_lexer(current))
    } else if ch == '}' {
        (SimpleToken::RightBrace, advance_lexer(current))
    } else if ch == ';' {
        (SimpleToken::Semicolon, advance_lexer(current))
    } else {
        (SimpleToken::Unknown, advance_lexer(current))
    }
}

fun token_to_string(token: SimpleToken) -> String {
    match token {
        SimpleToken::Number(n) => "Number(" + n + ")",
        SimpleToken::Word(w) => "Word(" + w + ")",
        SimpleToken::Plus => "Plus",
        SimpleToken::Minus => "Minus", 
        SimpleToken::Equal => "Equal",
        SimpleToken::LeftParen => "LeftParen",
        SimpleToken::RightParen => "RightParen",
        SimpleToken::LeftBrace => "LeftBrace",
        SimpleToken::RightBrace => "RightBrace",
        SimpleToken::Semicolon => "Semicolon",
        SimpleToken::Let => "Let",
        SimpleToken::Fun => "Fun",
        SimpleToken::Unknown => "Unknown",
        SimpleToken::Eof => "Eof",
    }
}

fun tokenize_string(input: String) -> [SimpleToken] {
    let mut lexer = create_lexer(input)
    let mut tokens = []
    
    loop {
        let (token, new_lexer) = next_token(lexer)
        lexer = new_lexer
        
        let is_eof = match token {
            SimpleToken::Eof => true,
            _ => false,
        }
        
        tokens = tokens + [token]
        
        if is_eof {
            break
        }
    }
    
    tokens
}

fun test_simple_lexer() {
    println("🔧 Testing Self-Hosted Simple Lexer...")
    
    // Test 1: Numbers and operators
    let test1 = "42 + 10"
    println("Test 1: " + test1)
    let tokens1 = tokenize_string(test1)
    for token in tokens1 {
        println("  " + token_to_string(token))
    }
    
    // Test 2: Let statement
    let test2 = "let x = 5"
    println("Test 2: " + test2)
    let tokens2 = tokenize_string(test2)
    for token in tokens2 {
        println("  " + token_to_string(token))
    }
    
    // Test 3: Function definition skeleton
    let test3 = "fun add"
    println("Test 3: " + test3)
    let tokens3 = tokenize_string(test3)
    for token in tokens3 {
        println("  " + token_to_string(token))
    }
    
    println("✅ Self-hosted simple lexer tests completed!")
}

// Run the test
test_simple_lexer()