char_lex/lib.rs
1#![deny(
2 missing_docs,
3 missing_debug_implementations,
4 missing_copy_implementations,
5 trivial_casts,
6 trivial_numeric_casts,
7 unsafe_code,
8 unstable_features,
9 unused_import_braces,
10 unused_qualifications
11)]
12
13//! # CHAR-LEX
14//!
15//! `Char-Lex` is a crate for easely creating a `char` based lexer from multiple custom enums!
16//!
17//! ## Example
18//!
19//! ```rust
20//! use char_lex::prelude::*;
21//!
22//! #[token]
23//! #[derive(Debug, PartialEq)]
24//! enum Digit {
25//! Zero = '0',
26//! One = '1',
27//! Two = '2',
28//! Three = '3',
29//! Four = '4',
30//! Five = '5',
31//! Six = '6',
32//! Seven = '7',
33//! Eight = '8',
34//! Nine = '9',
35//! }
36//!
37//! fn main() {
38//! let lexer: Lexer<Digit, Digit> = Lexer::new("189");
39//! let tokens: Vec<Digit> = lexer.collect();
40//!
41//! assert_eq!(vec![Digit::One, Digit::Eight, Digit::Nine], tokens);
42//! }
43//! ```
44//!
45//! `Tokens` can also be wrapped in anything that implements the `TokenWrapper<T>` trait!
46//!
47//! ## Example
48//!
49//! ```rust
50//! use char_lex::prelude::*;
51//!
52//! #[token]
53//! #[derive(Debug, PartialEq)]
54//! enum Token {
55//! One = '1',
56//! }
57//!
58//! #[derive(Debug, PartialEq)]
59//! struct Wrapper {
60//! token: Token,
61//! character: char,
62//! }
63//!
64//! impl TokenWrapper<Token> for Wrapper {
65//! fn wrap(token: Token, context: Context) -> Self {
66//! Self { token, character: context.character }
67//! }
68//! }
69//!
70//! fn main() {
71//! let lexer: Lexer<Token, Wrapper> = Lexer::new("1");
72//! let tokens: Vec<Wrapper> = lexer.collect();
73//!
74//! assert_eq!(vec![Wrapper { token: Token::One, character: '1' }], tokens);
75//! }
76//! ```
77
78/// Prelude module.
79/// It renames `Error` to `LexErr`!
80pub mod prelude {
81 pub use crate::{error::Error as LexErr, utils::*, *};
82}
83
84/// Contains the `Error` type.
85pub mod error;
86
87/// Contains utility types!
88pub mod utils;
89
90pub use char_lex_macro::token;
91pub use traits::{TokenMatch, TokenTrait, TokenWrapper};
92
93mod traits;
94
95use error::Error;
96use std::marker::PhantomData;
97use utils::Context;
98
99/// The main lexer type.
100///
101/// # Generics
102/// `T`: `TokenTrait` is the trait implemented by `token` attribute macro.
103/// `W`: `TokenWrapper<T>` is the trait that can wrap any token to contain more information,
104/// all `TokenTrait` objects automatically implement `TokenWrapper<Self>`, so you don't necessarily need a wrapper!
105#[derive(Debug, Copy, Clone, PartialEq, Eq)]
106pub struct Lexer<'l, T, W>
107where
108 T: TokenTrait,
109 W: TokenWrapper<T>,
110{
111 cursor: usize,
112 content: &'l str,
113 pos: (usize, usize),
114 error: Option<Error>,
115 pd: PhantomData<(T, W)>,
116}
117
118impl<'l, T, W> Lexer<'l, T, W>
119where
120 T: TokenTrait,
121 W: TokenWrapper<T>,
122{
123 /// Create a new `Lexer<'l, T, W>` with th `content: &str` that is to be tokenized.
124 pub fn new(content: &'l str) -> Self {
125 Self {
126 content,
127 cursor: 0,
128 pos: (1, 0),
129 error: None,
130 pd: PhantomData,
131 }
132 }
133
134 /// Like the `next` method but with the possibility to ignore certain `Tokens`
135 /// by giving a `TokenMatch<T>` like a single `Token` or multiple `vec![Tokens]`.
136 pub fn next_ignored<M>(&mut self, m: M) -> Option<W>
137 where
138 M: TokenMatch<T>,
139 {
140 loop {
141 let (t, c) = self.next_token()?;
142 if !m.matches_token(&t) {
143 break Some(<W as TokenWrapper<T>>::wrap(t, Context::new(c, self.pos)));
144 }
145 }
146 }
147
148 /// Returns the `Error` that was the reason for the lexer to return `None` from any `next` method!
149 pub fn get_error(&self) -> Option<&Error> {
150 self.error.as_ref()
151 }
152
153 /// Returns the current cursor position.
154 pub fn get_cursor(&self) -> usize {
155 self.cursor
156 }
157
158 /// Sets the new cursor position.
159 pub fn set_cursor(&mut self, cursor: usize) {
160 self.cursor = cursor
161 }
162
163 fn next_token(&mut self) -> Option<(T, char)> {
164 if let None = self.error {
165 self.cursor += 1;
166 if let Some(c) = next_char(self.content, self.cursor) {
167 self.pos.1 += 1;
168 if c == '\n' {
169 self.pos.0 += 1;
170 self.pos.1 = 0;
171 }
172 if let Some(t) = <T as TokenTrait>::match_char(c) {
173 Some((t, c))
174 } else {
175 self.error = Some(Error::Unexpected(Context::new(c, self.pos)));
176 None
177 }
178 } else {
179 self.error = Some(Error::EndOfFile);
180 None
181 }
182 } else {
183 None
184 }
185 }
186}
187
188impl<'l, T, W> Iterator for Lexer<'l, T, W>
189where
190 T: TokenTrait,
191 W: TokenWrapper<T>,
192{
193 type Item = W;
194
195 fn next(&mut self) -> Option<Self::Item> {
196 let (t, c) = self.next_token()?;
197 Some(<W as TokenWrapper<T>>::wrap(t, Context::new(c, self.pos)))
198 }
199}
200
201fn next_char<'l>(content: &'l str, cursor: usize) -> Option<char> {
202 if cursor <= content.len() {
203 let (c, _) = content.split_at(cursor);
204 Some(c.chars().last().unwrap())
205 } else {
206 None
207 }
208}