1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//! This example shows how to create an iterator tokenizer. Iterator tokenizers generate their tokens
//! as they're needed, rather than all at once. 
//! 
//! This has the benefit of reducing the memory footprint used by the tokenizer. 
//! Especially if the tokenizer has many small tokens, whose size may be bloated 
//! due to span information being fairly large.

#![allow(dead_code)]

use alkale::{token::Token, TokenizerContext};

#[derive(Debug)]
enum MyToken {
    Single(char),
    Pair(char, char),
}

fn main() {
    let source = "this is my source code :3";
    let mut ctx = TokenizerContext::new(source.chars());

    // Collect every pair of characters into a Pair token.
    // If there is a lone character at the end, collect it into a Single token.
    let iter = ctx.create_iterator(|context| {
        // If there's another character to look at, consume it and continue
        if let Some((char1, span1)) = context.next_span() {
            // If another character follows this one, consume it and create a Pair token.
            // If there is no character after this one, create a Single token.
            if let Some((char2, span2)) = context.next_span() {
                context.push_token(Token::new(
                    MyToken::Pair(char1, char2),
                    span1.between(&span2),
                ))
            } else {
                context.push_token(Token::new(MyToken::Single(char1), span1))
            }
        }
    });

    // Print each token in the iterator.
    // This has the benefit of not having every single token in-memory at once.
    for token in iter {
        println!("{:?}", token)    
    }
}