tinytoken

Struct Tokenizer

Source
pub struct Tokenizer {
    pub lines: Vec<Vec<char>>,
    /* private fields */
}
Expand description

Primary struct for tokenizing an input string, with methods for parsing and generating tokens

Fields§

§lines: Vec<Vec<char>>

Implementations§

Source§

impl Tokenizer

Source

pub fn builder() -> TokenizerBuilder

Creates a TokenizerBuilder instance for configuring and initializing the tokenizer

Examples found in repository?
examples/test.rs (line 8)
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
fn main() {
    let tokenizer = Tokenizer::builder()
        .parse_char_as_string(true)
        .allow_digit_separator(tinytoken::Choice::Yes('_'))
        .add_symbols(&['{', '}', '(', ')', ';', '#', ',', '[', ']'])
        .add_operators(&['+', '-', '*', '%', '/', '&'])
        .build(TO_PARSE);
    let start_time = SystemTime::now();
    match tokenizer.tokenize() {
        Ok(tokens) => {
            eprintln!(
                "-> elapsed: {}µs",
                start_time.elapsed().unwrap().as_micros()
            );
            println!("---------\nparsed {} token(s)\n---------", tokens.len());
            println!("{tokens:?}");
        }
        Err(e) => {
            println!("{e}")
        }
    }
}
Source

pub fn new<T>(input: T, config: TokenizerConfig) -> Self
where T: ToString,

Initializes the tokenizer with input text and a configuration

Source

pub fn tokenize(self) -> Result<Vec<Token>, TokenizationError>

Tokenizes the input and returns a list of Tokens or a TokenizationError if parsing fails

Examples found in repository?
examples/test.rs (line 15)
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
fn main() {
    let tokenizer = Tokenizer::builder()
        .parse_char_as_string(true)
        .allow_digit_separator(tinytoken::Choice::Yes('_'))
        .add_symbols(&['{', '}', '(', ')', ';', '#', ',', '[', ']'])
        .add_operators(&['+', '-', '*', '%', '/', '&'])
        .build(TO_PARSE);
    let start_time = SystemTime::now();
    match tokenizer.tokenize() {
        Ok(tokens) => {
            eprintln!(
                "-> elapsed: {}µs",
                start_time.elapsed().unwrap().as_micros()
            );
            println!("---------\nparsed {} token(s)\n---------", tokens.len());
            println!("{tokens:?}");
        }
        Err(e) => {
            println!("{e}")
        }
    }
}

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.