rslexer 1.0.2

Simple lexer written in Rust!
Documentation

//! # RSLexer #
//! 
//! Simple lexer written in Rust!

#![deny(missing_docs,
    missing_debug_implementations, missing_copy_implementations,
    trivial_casts, trivial_numeric_casts,
    unsafe_code, unstable_features,
    unused_import_braces, unused_qualifications)]

use regex::Regex;

/// Use the `rules!` macro to create Rulse for the lexer!
/// 
/// Simple type around a `Vec<Rule<T>>`
pub type Rules<T> = Vec<Rule<T>>;

/// Don't create a Rule yourself, use the 
/// `rules!` macro to create Rulse for the lexer!
/// 
/// Internal struct that contains a `Regex`
/// and a `Fn(&str, usize, usize) -> Option<T>`
pub struct Rule<T> {
    /// `Regex`
    pub r: Regex,
    /// `Fn(&str, usize, usize) -> Option<T>`
    pub f: Box<dyn Fn(&str, usize, usize) -> Option<T>>,
}

use std::fmt;
impl<T> fmt::Debug for Rule<T> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Rule({:?})", self.r)
    }
}

/// `rules!` macro to create Rules for the lexer!
/// 
/// `rules!` expects a Type (Token) and a list of rules:
/// &str (regex) => |string, line, character| Option<Token>
/// 
/// Example:
/// ```rs
/// enum Token {
///     Word(String, usize, usize),
/// }
/// 
/// rules!(Token;
///     r"\s+" => |_,_,_| None,
///     r"[^\s]+" => |s, l, c| Some(Token::Word(s.to_str(), l, c)),
/// )
/// 
/// ```
#[macro_export]
macro_rules! rules {
    ($t:ty; $($e:expr => $f:expr,)+) => { rules!($t; $($e => $f),+) };
    ($t:ty; $($e:expr => $f:expr),*) => {{
        extern crate regex;
        use regex::Regex;
        use $crate::Rule;
        let mut s: Vec<Rule<$t>> = Vec::new();
        $( 
            s.push(Rule {
                r: Regex::new($e).unwrap(),
                f: Box::new($f)
            });
        )*

        s
    }};
}

/// The main funtion of the lexer
/// 
/// `lex` expects the content you want to lex as a &str
/// and a Vec of Rulse (use the `rules!` macro for that)!
/// 
/// Example:
/// ```rs
/// 
/// enum Token {
///     Word(String, usize, usize),
/// }
/// 
/// lex("test string \n new line!",
///     rules!(Token;
///         r"\s+" => |_,_,_| None,
///         r"[^\s]+" => |s, l, c| Some(Token::Word(s.to_str(), l, c)),
///     )
/// )
/// 
/// ```
pub fn lex<T>(content: &str, rules: Rules<T>) -> Result<Vec<T>, String> {

    let mut pos: usize = 0;
    let mut line: usize = 1;
    let mut character: usize = 1;

    let mut ts: Vec<T> = Vec::new();

    while let Some(c) = content.get(pos..) {
        if c.is_empty() { break; }

        let mut changed = false;
        for rule in &rules {

            if let Some(m) = rule.r.find(c) {
                if m.start() != 0 { continue; }

                let mut s = m.as_str();
                if let Some(t) = (rule.f)(s, line, character) {
                    ts.push(t);
                }

                character += s.len();

                while let Some(i) = s.find("\n") {
                    line += 1;
                    s = s.get(i + 1..).unwrap();
                    character = s.len() + 1;
                }

                pos += m.end();
                changed = true;
                break;
            }
        }
        if !changed {
            return Err(format!("No match for content: {:?}", c));
        }
    }

    Ok(ts)
}