Crate tokenizer_lib
source ·Expand description
Tokenizer-lib
Tokenization utilities for building parsers in Rust
Examples
Buffered token channel:
use tokenizer_lib::{BufferedTokenQueue, Token, TokenReader, TokenSender, TokenTrait};
#[derive(PartialEq, Debug)]
struct Span(pub u32, pub u32);
#[derive(PartialEq, Debug)]
struct N(pub u32);
impl TokenTrait for N {}
let mut btq = BufferedTokenQueue::new();
btq.push(Token(N(12), Span(0, 2)));
btq.push(Token(N(32), Span(2, 4)));
btq.push(Token(N(52), Span(4, 8)));
assert_eq!(btq.next().unwrap().0, N(12));
assert_eq!(btq.next().unwrap().0, N(32));
assert_eq!(btq.next().unwrap().0, N(52));
assert!(btq.next().is_none());
(Multi-thread safe) Parallel token queue:
use tokenizer_lib::{ParallelTokenQueue, Token, TokenReader, TokenSender, TokenTrait};
#[derive(PartialEq, Debug)]
struct Span(pub u32, pub u32);
#[derive(PartialEq, Debug)]
struct N(pub u32);
impl TokenTrait for N {}
let (mut sender, mut reader) = ParallelTokenQueue::new();
std::thread::spawn(move || {
sender.push(Token(N(12), Span(0, 2)));
sender.push(Token(N(32), Span(2, 4)));
sender.push(Token(N(52), Span(4, 8)));
});
assert_eq!(reader.next().unwrap().0, N(12));
assert_eq!(reader.next().unwrap().0, N(32));
assert_eq!(reader.next().unwrap().0, N(52));
assert!(reader.next().is_none());
Generator token queue:
use tokenizer_lib::{GeneratorTokenQueue, GeneratorTokenQueueBuffer, Token, TokenReader, TokenSender, TokenTrait};
#[derive(PartialEq, Debug)]
struct N(pub u32);
impl TokenTrait for N {}
fn lexer(state: &mut u32, sender: &mut GeneratorTokenQueueBuffer<N, ()>) {
*state += 1;
match state {
1..=3 => {
sender.push(Token(N(*state * 2), ()));
}
_ => {}
}
}
let mut reader = GeneratorTokenQueue::new(lexer, 0);
assert_eq!(reader.next().unwrap().0, N(2));
assert_eq!(reader.next().unwrap().0, N(4));
assert_eq!(reader.next().unwrap().0, N(6));
assert!(reader.next().is_none());
Provides utilities such as peek
, peek_n
and scan
for lookahead. Also expect_next
for expecting a token value and conditional_next
for advancing on a predicate.
Structs
A queue which can be used as a sender and reader. Use this for buffering all the tokens before reading
A token queue which has a backing generator/lexer which is called when needed by parsing logic
A wrapping struct for the cache around
GeneratorTokenQueue
. Use as the second parameter
in the generator/lexer functionA token queue used for doing lexing and parsing on different threads. Will send tokens between threads
A structure with a piece of data and some additional data such as a position
Traits
A reader over a sequence of tokens
Trait for a sender that can append a token to a sequence
PartialEq is required for comparing tokens with TokenReader::expect_next