Crate uwl

Source
Expand description

A stream designed for the individual manipulation of bytes and Unicode codepoint characters.

§Example

Using the stream for creating a lexer to tokenize the English language.

use uwl::Stream;

#[derive(Debug, PartialEq)]
enum TokenKind {
    Ident,
    Number,
    Question,
    Exclamation,
    Comma,
    Point,

    // An invalid token
    Illegal,
}

#[derive(Debug, PartialEq)]
struct Token<'a> {
    kind: TokenKind,
    lit: &'a str,
}

impl<'a> Token<'a> {
    fn new(kind: TokenKind, lit: &'a str) -> Self {
        Self { kind, lit }
    }
}

fn lex<'a>(stream: &mut Stream<'a>) -> Option<Token<'a>> {
    let b: u8 = stream.current()?;

    if b.is_ascii_whitespace() {
        // Ignore whitespace.
        stream.take_while(|b| b.is_ascii_whitespace());
        return lex(stream);
    }

    if b.is_ascii_digit() {
        let lit = stream.take_while(|b| b.is_ascii_digit());
        return Some(Token::new(TokenKind::Number, lit));
    }

    if b.is_ascii_alphabetic() {
        let lit = stream.take_while(|b| b.is_ascii_alphabetic());
        return Some(Token::new(TokenKind::Ident, lit));
    }

    let token = match b {
        b'?' => Some(Token::new(TokenKind::Question, &stream.rest()[..1])),
        b'!' => Some(Token::new(TokenKind::Exclamation, &stream.rest()[..1])),
        b',' => Some(Token::new(TokenKind::Comma, &stream.rest()[..1])),
        b'.' => Some(Token::new(TokenKind::Point, &stream.rest()[..1])),
        _ => Some(Token::new(TokenKind::Illegal, &stream.rest()[..1])),
    };

    stream.next();

    token
}

fn main() {
    let mut stream = Stream::new("Hello, world! ...world? Hello?");

    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "Hello")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Comma, ",")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "world")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Exclamation, "!")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, ".")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, ".")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, ".")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "world")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, "?")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "Hello")));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, "?")));

    // Reached the end
    assert_eq!(lex(&mut stream), None);
}

Structs§

  • A stream of bytes and characters.