klex 0.1.2

A simple lexer (tokenizer) generator for Rust
Documentation
use std::collections::HashMap;

%%
// Test character literal
'q' -> CharQ
'w' -> CharW

// Test string literal  
"hello" -> HelloString
"world" -> WorldString

// Test character range syntax  
[0-9]+ -> DigitRange
[A-Z]+ -> UpperCase
[a-z]+ -> LowerCase

// Test character set syntax
[abc]+ -> AbcSet
[xyz]+ -> XyzSet

// Test regular expression (more specific patterns)
/[a-zA-Z_][a-zA-Z0-9_]*/ -> Identifier

// Test whitespace for separation (must come before other patterns)
/[ \t]+/ -> Whitespace

// Basic escaped character test (complex tests in separate file)
\+ -> PlusEscaped

// Test choice
'-' -> MinusSign
("true"|"false") -> Boolean

// Basic any character test (complex tests in separate file)
/[^a-zA-Z0-9 \t\n+x-]/ -> SpecialChar

// Test action with new patterns
'x' -> { println!("Found x character"); Some(Token::new(TokenKind::Unknown, test_t.text.clone(), test_t.index, test_t.row, test_t.col, test_t.length, test_t.indent)) }
%%

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_new_patterns() {
        let input = "q hello 123 ABC abc xyz x";
        let mut lexer = Lexer::from_str(input);
        
        let mut tokens = Vec::new();
        while let Some(token) = lexer.next_token() {
            tokens.push(token);
        }
        
        // Check that we got some tokens
        assert!(!tokens.is_empty());
        println!("Generated {} tokens", tokens.len());
        for token in &tokens {
            println!("Token: kind={:?}, value='{}'", token.kind, token.text);
        }

        // Test specific patterns
        assert!(tokens.iter().any(|t| t.text == "q" && t.kind == TokenKind::CharQ));
        assert!(tokens.iter().any(|t| t.text == "hello" && t.kind == TokenKind::HelloString));
        assert!(tokens.iter().any(|t| t.text == "123" && t.kind == TokenKind::DigitRange));
        assert!(tokens.iter().any(|t| t.text == "ABC" && t.kind == TokenKind::UpperCase));
        // abc matches LOWER_CASE pattern first since it comes before ABC_SET in rule order
        assert!(tokens.iter().any(|t| t.text == "abc" && t.kind == TokenKind::LowerCase));
    }
    
    #[test]
    fn test_character_ranges_and_sets() {
        let input = "123 def XYZ xyz";
        let mut lexer = Lexer::from_str(input);
        
        let mut tokens = Vec::new();
        while let Some(token) = lexer.next_token() {
            tokens.push(token);
        }
        
        println!("Character ranges test tokens:");
        for token in &tokens {
            println!("Token: kind={:?}, value='{}'", token.kind, token.text);
        }
        
        // Should match digit range, lower case, upper case, and character sets
        assert!(tokens.iter().any(|t| t.kind == TokenKind::DigitRange));
        assert!(tokens.iter().any(|t| t.kind == TokenKind::LowerCase));
        assert!(tokens.iter().any(|t| t.kind == TokenKind::UpperCase));
        // xyz's 'x' matches action rule first, so we check for any LOWER_CASE match
        assert!(tokens.iter().any(|t| t.kind == TokenKind::LowerCase && (t.text == "def" || t.text == "yz")));
    }
}