[][src]Crate uwl

A stream of chars for building such as a lexer. Making the step of "iteration between characters" considerably easier. And providing certain utilites for making the code simpler. Respects both ASCII and Unicode.

Example, lexing identifiers, numbers and some punctuation marks:

use uwl::Stream;

#[derive(Debug, PartialEq)]
enum TokenKind {
    Ident,
    Number,
    Question,
    Exclamation,
    Comma,
    Point,

    // An invalid token
    Illegal,
}

#[derive(Debug, PartialEq)]
enum Lit<'a> {
    Short(char),
    Long(&'a str),
}

#[derive(Debug, PartialEq)]
struct Token<'a> {
    kind: TokenKind,
    lit: Lit<'a>,
}

impl<'a> Token<'a> {
    fn new(kind: TokenKind, lit: Lit<'a>) -> Self {
        Token {
            kind,
            lit,
        }
    }
}

fn lex<'a>(stream: &mut Stream<'a>) -> Option<Token<'a>> {
    match stream.current() {
        Some(c) => match c {
            // Ignore whitespace.
            s if s.is_whitespace() => {
                stream.take_while(|c| c.is_whitespace());
                return lex(stream);
            },
            s if s.is_alphabetic() => {
                let lit = Lit::Long(stream.take_while(|s| s.is_alphabetic()));
                Some(Token::new(TokenKind::Ident, lit))
            },
            s if s.is_numeric() => {
                let lit = Lit::Long(stream.take_while(|s| s.is_numeric()));
                Some(Token::new(TokenKind::Number, lit))
            },
            '?' => Some(Token::new(TokenKind::Question, Lit::Short(stream.next()?))),
            '!' => Some(Token::new(TokenKind::Exclamation, Lit::Short(stream.next()?))),
            ',' => Some(Token::new(TokenKind::Comma, Lit::Short(stream.next()?))),
            '.' => Some(Token::new(TokenKind::Point, Lit::Short(stream.next()?))),
            _ => Some(Token::new(TokenKind::Illegal, Lit::Short(stream.next()?))),
        },
        None => None,
    }
}

fn main() {
    let mut stream = Stream::new("Hello, world! ...world? Hello?");

    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("Hello"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Comma, Lit::Short(','))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("world"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Exclamation, Lit::Short('!'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, Lit::Short('.'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, Lit::Short('.'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, Lit::Short('.'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("world"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, Lit::Short('?'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("Hello"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, Lit::Short('?'))));

    // Reached the end
    assert_eq!(lex(&mut stream), None);
}

Structs

Stream

A stream of characters. Handles ASCII and/or Unicode.

Traits

CharExt

Adds additional is_* methods to char,