Expand description
A stream designed for the individual manipulation of bytes and Unicode codepoint characters.
§Example
Using the stream for creating a lexer to tokenize the English language.
use uwl::Stream;
#[derive(Debug, PartialEq)]
enum TokenKind {
Ident,
Number,
Question,
Exclamation,
Comma,
Point,
// An invalid token
Illegal,
}
#[derive(Debug, PartialEq)]
struct Token<'a> {
kind: TokenKind,
lit: &'a str,
}
impl<'a> Token<'a> {
fn new(kind: TokenKind, lit: &'a str) -> Self {
Self { kind, lit }
}
}
fn lex<'a>(stream: &mut Stream<'a>) -> Option<Token<'a>> {
let b: u8 = stream.current()?;
if b.is_ascii_whitespace() {
// Ignore whitespace.
stream.take_while(|b| b.is_ascii_whitespace());
return lex(stream);
}
if b.is_ascii_digit() {
let lit = stream.take_while(|b| b.is_ascii_digit());
return Some(Token::new(TokenKind::Number, lit));
}
if b.is_ascii_alphabetic() {
let lit = stream.take_while(|b| b.is_ascii_alphabetic());
return Some(Token::new(TokenKind::Ident, lit));
}
let token = match b {
b'?' => Some(Token::new(TokenKind::Question, &stream.rest()[..1])),
b'!' => Some(Token::new(TokenKind::Exclamation, &stream.rest()[..1])),
b',' => Some(Token::new(TokenKind::Comma, &stream.rest()[..1])),
b'.' => Some(Token::new(TokenKind::Point, &stream.rest()[..1])),
_ => Some(Token::new(TokenKind::Illegal, &stream.rest()[..1])),
};
stream.next();
token
}
fn main() {
let mut stream = Stream::new("Hello, world! ...world? Hello?");
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "Hello")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Comma, ",")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "world")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Exclamation, "!")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, ".")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, ".")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, ".")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "world")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, "?")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, "Hello")));
assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, "?")));
// Reached the end
assert_eq!(lex(&mut stream), None);
}
Structs§
- A stream of bytes and characters.