1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
//! This example shows how to create an iterator tokenizer. Iterator tokenizers generate their tokens
//! as they're needed, rather than all at once.
//!
//! This has the benefit of reducing the memory footprint used by the tokenizer.
//! Especially if the tokenizer has many small tokens, whose size may be bloated
//! due to span information being fairly large.
#![allow(dead_code)]
use alkale::{token::Token, TokenizerContext};
#[derive(Debug)]
enum MyToken {
Single(char),
Pair(char, char),
}
fn main() {
let source = "this is my source code :3";
let mut ctx = TokenizerContext::new(source.chars());
// Collect every pair of characters into a Pair token.
// If there is a lone character at the end, collect it into a Single token.
let iter = ctx.create_iterator(|context| {
// If there's another character to look at, consume it and continue
if let Some((char1, span1)) = context.next_span() {
// If another character follows this one, consume it and create a Pair token.
// If there is no character after this one, create a Single token.
if let Some((char2, span2)) = context.next_span() {
context.push_token(Token::new(
MyToken::Pair(char1, char2),
span1.between(&span2),
))
} else {
context.push_token(Token::new(MyToken::Single(char1), span1))
}
}
});
// Print each token in the iterator.
// This has the benefit of not having every single token in-memory at once.
for token in iter {
println!("{:?}", token)
}
}