retoken 0.1.1

Build &str tokenizers using regex
Documentation

Retoken

Build &str tokenizers using regex

Tokenizer Macro

You can create tokenizers using a simple macro

Example: Foo Lang

Foo lang is simple language it only allows you to assign variables to literal strings and have whitespace

 my_var = "my string"
 my_other_var = "my other string"
use retoken::{tokenize::Tokenize, tokenizer};

tokenizer! {
// let's create a token that we don't care about, in this case whitespace or new lines / tabs, etc.
    #[skip]
    Skip = r#"\s+"#,

    // our identifiers are simple, only lower and upper case characters and underscore, feel free to get creative in your own tokenizer
    Ident = "[a-zA-Z_]+",

    // equals sign
    Equals = r#"="#,

    // let's add a quote token but we won't add it to our token set, you'll see why later
    #[no_variant]
    Quote = r#"""#,

    // let's finally add strings, we'll write a custom tokenizer for that
    #[no_impl]
    QuotedString,

}

impl<'a> Tokenize<'a> for QuotedString<'a> {
    fn token_name() -> &'static str {
        "QuotedString"
    }

    fn tokenize(str_src: &'a retoken::str_src::StrSrc<'a>) -> Result<Self, retoken::error::Error> {
        let Quote {
            idx: start,
            value: _,
        } = Quote::tokenize(str_src)?;

        let end = loop {
            let incr = str_src
                .slice()?
                .chars()
                .nth(0)
                .map(|el| el.len_utf8())
                .unwrap_or_default();
            str_src.incr(incr);

            if let Ok(ok) = Quote::tokenize(&str_src) {
                break ok.idx;
            }
        };

        let value = str_src.slice_with_range((start + 1)..end)?;

        Ok(Self { idx: start, value })
    }

}