char_lex/
lib.rs

1#![deny(
2    missing_docs,
3    missing_debug_implementations,
4    missing_copy_implementations,
5    trivial_casts,
6    trivial_numeric_casts,
7    unsafe_code,
8    unstable_features,
9    unused_import_braces,
10    unused_qualifications
11)]
12
13//! # CHAR-LEX
14//!
15//! `Char-Lex` is a crate for easely creating a `char` based lexer from multiple custom enums!
16//!
17//! ## Example
18//!
19//! ```rust
20//! use char_lex::prelude::*;
21//!
22//! #[token]
23//! #[derive(Debug, PartialEq)]
24//! enum Digit {
25//!     Zero = '0',
26//!     One = '1',
27//!     Two = '2',
28//!     Three = '3',
29//!     Four = '4',
30//!     Five = '5',
31//!     Six = '6',
32//!     Seven = '7',
33//!     Eight = '8',
34//!     Nine = '9',
35//! }
36//!
37//! fn main() {
38//!     let lexer: Lexer<Digit, Digit> = Lexer::new("189");
39//!     let tokens: Vec<Digit> = lexer.collect();
40//!     
41//!     assert_eq!(vec![Digit::One, Digit::Eight, Digit::Nine], tokens);
42//! }
43//! ```
44//!
45//! `Tokens` can also be wrapped in anything that implements the `TokenWrapper<T>` trait!
46//!
47//! ## Example
48//!
49//! ```rust
50//! use char_lex::prelude::*;
51//!
52//! #[token]
53//! #[derive(Debug, PartialEq)]
54//! enum Token {
55//!     One = '1',
56//! }
57//!
58//! #[derive(Debug, PartialEq)]
59//! struct Wrapper {
60//!     token: Token,
61//!     character: char,
62//! }
63//!
64//! impl TokenWrapper<Token> for Wrapper {
65//!     fn wrap(token: Token, context: Context) -> Self {
66//!         Self { token, character: context.character }
67//!     }
68//! }
69//!
70//! fn main() {
71//!     let lexer: Lexer<Token, Wrapper> = Lexer::new("1");
72//!     let tokens: Vec<Wrapper> = lexer.collect();
73//!     
74//!     assert_eq!(vec![Wrapper { token: Token::One, character: '1' }], tokens);
75//! }
76//! ```
77
78/// Prelude module.
79/// It renames `Error` to `LexErr`!
80pub mod prelude {
81    pub use crate::{error::Error as LexErr, utils::*, *};
82}
83
84/// Contains the `Error` type.
85pub mod error;
86
87/// Contains utility types!
88pub mod utils;
89
90pub use char_lex_macro::token;
91pub use traits::{TokenMatch, TokenTrait, TokenWrapper};
92
93mod traits;
94
95use error::Error;
96use std::marker::PhantomData;
97use utils::Context;
98
99/// The main lexer type.
100///
101/// # Generics
102/// `T`: `TokenTrait` is the trait implemented by `token` attribute macro.
103/// `W`: `TokenWrapper<T>` is the trait that can wrap any token to contain more information,
104/// all `TokenTrait` objects automatically implement `TokenWrapper<Self>`, so you don't necessarily need a wrapper!
105#[derive(Debug, Copy, Clone, PartialEq, Eq)]
106pub struct Lexer<'l, T, W>
107where
108    T: TokenTrait,
109    W: TokenWrapper<T>,
110{
111    cursor: usize,
112    content: &'l str,
113    pos: (usize, usize),
114    error: Option<Error>,
115    pd: PhantomData<(T, W)>,
116}
117
118impl<'l, T, W> Lexer<'l, T, W>
119where
120    T: TokenTrait,
121    W: TokenWrapper<T>,
122{
123    /// Create a new `Lexer<'l, T, W>` with th `content: &str` that is to be tokenized.
124    pub fn new(content: &'l str) -> Self {
125        Self {
126            content,
127            cursor: 0,
128            pos: (1, 0),
129            error: None,
130            pd: PhantomData,
131        }
132    }
133
134    /// Like the `next` method but with the possibility to ignore certain `Tokens`
135    /// by giving a `TokenMatch<T>` like a single `Token` or multiple `vec![Tokens]`.
136    pub fn next_ignored<M>(&mut self, m: M) -> Option<W>
137    where
138        M: TokenMatch<T>,
139    {
140        loop {
141            let (t, c) = self.next_token()?;
142            if !m.matches_token(&t) {
143                break Some(<W as TokenWrapper<T>>::wrap(t, Context::new(c, self.pos)));
144            }
145        }
146    }
147
148    /// Returns the `Error` that was the reason for the lexer to return `None` from any `next` method!
149    pub fn get_error(&self) -> Option<&Error> {
150        self.error.as_ref()
151    }
152
153    /// Returns the current cursor position.
154    pub fn get_cursor(&self) -> usize {
155        self.cursor
156    }
157
158    /// Sets the new cursor position.
159    pub fn set_cursor(&mut self, cursor: usize) {
160        self.cursor = cursor
161    }
162
163    fn next_token(&mut self) -> Option<(T, char)> {
164        if let None = self.error {
165            self.cursor += 1;
166            if let Some(c) = next_char(self.content, self.cursor) {
167                self.pos.1 += 1;
168                if c == '\n' {
169                    self.pos.0 += 1;
170                    self.pos.1 = 0;
171                }
172                if let Some(t) = <T as TokenTrait>::match_char(c) {
173                    Some((t, c))
174                } else {
175                    self.error = Some(Error::Unexpected(Context::new(c, self.pos)));
176                    None
177                }
178            } else {
179                self.error = Some(Error::EndOfFile);
180                None
181            }
182        } else {
183            None
184        }
185    }
186}
187
188impl<'l, T, W> Iterator for Lexer<'l, T, W>
189where
190    T: TokenTrait,
191    W: TokenWrapper<T>,
192{
193    type Item = W;
194
195    fn next(&mut self) -> Option<Self::Item> {
196        let (t, c) = self.next_token()?;
197        Some(<W as TokenWrapper<T>>::wrap(t, Context::new(c, self.pos)))
198    }
199}
200
201fn next_char<'l>(content: &'l str, cursor: usize) -> Option<char> {
202    if cursor <= content.len() {
203        let (c, _) = content.split_at(cursor);
204        Some(c.chars().last().unwrap())
205    } else {
206        None
207    }
208}