logos_nom_bridge/lib.rs
1//! # logos-nom-bridge
2//!
3//! A [`logos::Lexer`] wrapper than can be used as an input for
4//! [nom](https://docs.rs/nom/7.0.0/nom/index.html).
5//!
6//! ### Simple example
7//!
8//! ```
9//! // First, create a `logos` lexer:
10//!
11//! #[derive(Clone, Debug, PartialEq, Eq, logos::Logos)]
12//! enum Token {
13//! #[token("+")]
14//! Plus,
15//!
16//! #[token("-")]
17//! Minus,
18//!
19//! #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
20//! Number(i64),
21//!
22//! #[error]
23//! #[regex(r"[ \t\n\f]+", logos::skip)]
24//! Error,
25//! }
26//!
27//! // Then, write a nom parser that accepts a `Tokens<'_, Token>` as input:
28//!
29//! use logos_nom_bridge::Tokens;
30//!
31//! type Input<'source> = Tokens<'source, Token>;
32//!
33//! #[derive(Debug, PartialEq, Eq)]
34//! enum Op {
35//! Number(i64),
36//! Addition(Box<(Op, Op)>),
37//! Subtraction(Box<(Op, Op)>),
38//! }
39//!
40//! fn parse_expression(input: Input<'_>) -> nom::IResult<Input<'_>, Op> {
41//! # use nom::{branch::alt, combinator::map, sequence::tuple};
42//! #
43//! # fn parse_number(input: Input<'_>) -> nom::IResult<Input<'_>, Op> {
44//! # match input.peek() {
45//! # Some((Token::Number(n), _)) => Ok((input.advance(), Op::Number(n))),
46//! # _ => Err(nom::Err::Error(nom::error::Error::new(
47//! # input,
48//! # nom::error::ErrorKind::IsA,
49//! # ))),
50//! # }
51//! # }
52//! # logos_nom_bridge::token_parser!(token: Token);
53//! #
54//! # alt((
55//! # map(
56//! # tuple((parse_number, alt((Token::Plus, Token::Minus)), parse_expression)),
57//! # |(a, op, b)| {
58//! # if op == "+" {
59//! # Op::Addition(Box::new((a, b)))
60//! # } else {
61//! # Op::Subtraction(Box::new((a, b)))
62//! # }
63//! # },
64//! # ),
65//! # parse_number,
66//! # ))(input)
67//! // zip
68//! }
69//!
70//! // Finally, you can use it to parse a string:
71//!
72//! let input = "10 + 3 - 4";
73//! let tokens = Tokens::new(input);
74//!
75//! let (rest, parsed) = parse_expression(tokens).unwrap();
76//!
77//! assert!(rest.is_empty());
78//! assert_eq!(
79//! parsed,
80//! Op::Addition(Box::new((
81//! Op::Number(10),
82//! Op::Subtraction(Box::new((
83//! Op::Number(3),
84//! Op::Number(4),
85//! ))),
86//! ))),
87//! )
88//! ```
89//!
90//! ## Macros
91//!
92//! You can implement [`nom::Parser`] for your token type with the [`token_parser`] macro:
93//!
94//! ```
95//! # #[derive(Clone, Debug, PartialEq, Eq, logos::Logos)]
96//! # enum Token {
97//! # #[error]
98//! # Error,
99//! # }
100//! #
101//! logos_nom_bridge::token_parser!(token: Token);
102//! ```
103//!
104//! If some enum variants of your token type contain data, you can implement a [`nom::Parser`]
105//! for them using the [`data_variant_parser`] macro:
106//!
107//! ```
108//! # enum Op { Number(i64) }
109//! #
110//! #[derive(Clone, Debug, PartialEq, Eq, logos::Logos)]
111//! enum Token {
112//! #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
113//! Number(i64),
114//!
115//! // etc.
116//! # #[error]
117//! # Error,
118//! }
119//!
120//! logos_nom_bridge::data_variant_parser! {
121//! fn parse_number(input) -> Result<Op>;
122//! pattern = Token::Number(n) => Op::Number(n);
123//! }
124//! ```
125
126mod macros;
127
128use core::fmt;
129
130use logos::{Lexer, Logos, Span, SpannedIter};
131use nom::{InputIter, InputLength, InputTake};
132
133/// A [`logos::Lexer`] wrapper than can be used as an input for
134/// [nom](https://docs.rs/nom/7.0.0/nom/index.html).
135///
136/// You can find an example in the [module-level docs](..).
137pub struct Tokens<'i, T>
138where
139 T: Logos<'i>,
140{
141 lexer: Lexer<'i, T>,
142}
143
144impl<'i, T> Clone for Tokens<'i, T>
145where
146 T: Logos<'i> + Clone,
147 T::Extras: Clone,
148{
149 fn clone(&self) -> Self {
150 Self {
151 lexer: self.lexer.clone(),
152 }
153 }
154}
155
156impl<'i, T> Tokens<'i, T>
157where
158 T: Logos<'i, Source = str> + Clone,
159 T::Extras: Default + Clone,
160{
161 pub fn new(input: &'i str) -> Self {
162 Tokens {
163 lexer: Lexer::new(input),
164 }
165 }
166
167 pub fn len(&self) -> usize {
168 self.lexer.source().len() - self.lexer.span().end
169 }
170
171 pub fn is_empty(&self) -> bool {
172 self.len() == 0
173 }
174
175 pub fn peek(&self) -> Option<(T, &'i str)> {
176 let mut iter = self.lexer.clone().spanned();
177 iter.next().map(|(t, span)| (t, &self.lexer.source()[span]))
178 }
179
180 pub fn advance(mut self) -> Self {
181 self.lexer.next();
182 self
183 }
184}
185
186impl<'i, T> PartialEq for Tokens<'i, T>
187where
188 T: PartialEq + Logos<'i> + Clone,
189 T::Extras: Clone,
190{
191 fn eq(&self, other: &Self) -> bool {
192 Iterator::eq(self.lexer.clone(), other.lexer.clone())
193 }
194}
195
196impl<'i, T> Eq for Tokens<'i, T>
197where
198 T: Eq + Logos<'i> + Clone,
199 T::Extras: Clone,
200{
201}
202
203impl<'i, T> fmt::Debug for Tokens<'i, T>
204where
205 T: fmt::Debug + Logos<'i, Source = str>,
206{
207 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208 let source = self.lexer.source();
209 let start = self.lexer.span().start;
210 f.debug_tuple("Tokens").field(&&source[start..]).finish()
211 }
212}
213
214impl<'i, T> Default for Tokens<'i, T>
215where
216 T: Logos<'i, Source = str>,
217 T::Extras: Default,
218{
219 fn default() -> Self {
220 Tokens {
221 lexer: Lexer::new(""),
222 }
223 }
224}
225
226/// An iterator, that (similarly to [`std::iter::Enumerate`]) produces byte offsets of the tokens.
227pub struct IndexIterator<'i, T>
228where
229 T: Logos<'i>,
230{
231 logos: Lexer<'i, T>,
232}
233
234impl<'i, T> Iterator for IndexIterator<'i, T>
235where
236 T: Logos<'i>,
237{
238 type Item = (usize, (T, Span));
239
240 fn next(&mut self) -> Option<Self::Item> {
241 self.logos.next().map(|t| {
242 let span = self.logos.span();
243 (span.start, (t, span))
244 })
245 }
246}
247
248impl<'i, T> InputIter for Tokens<'i, T>
249where
250 T: Logos<'i, Source = str> + Clone,
251 T::Extras: Default + Clone,
252{
253 type Item = (T, Span);
254
255 type Iter = IndexIterator<'i, T>;
256
257 type IterElem = SpannedIter<'i, T>;
258
259 fn iter_indices(&self) -> Self::Iter {
260 IndexIterator {
261 logos: self.lexer.clone(),
262 }
263 }
264
265 fn iter_elements(&self) -> Self::IterElem {
266 self.lexer.clone().spanned()
267 }
268
269 fn position<P>(&self, predicate: P) -> Option<usize>
270 where
271 P: Fn(Self::Item) -> bool,
272 {
273 let mut iter = self.lexer.clone().spanned();
274 iter.find(|t| predicate(t.clone()))
275 .map(|(_, span)| span.start)
276 }
277
278 fn slice_index(&self, count: usize) -> Result<usize, nom::Needed> {
279 let mut cnt = 0;
280 for (_, span) in self.lexer.clone().spanned() {
281 if cnt == count {
282 return Ok(span.start);
283 }
284 cnt += 1;
285 }
286 if cnt == count {
287 return Ok(self.len());
288 }
289 Err(nom::Needed::Unknown)
290 }
291}
292
293impl<'i, T> InputLength for Tokens<'i, T>
294where
295 T: Logos<'i, Source = str> + Clone,
296 T::Extras: Default + Clone,
297{
298 fn input_len(&self) -> usize {
299 self.len()
300 }
301}
302
303impl<'i, T> InputTake for Tokens<'i, T>
304where
305 T: Logos<'i, Source = str>,
306 T::Extras: Default,
307{
308 fn take(&self, count: usize) -> Self {
309 Tokens {
310 lexer: Lexer::new(&self.lexer.source()[..count]),
311 }
312 }
313
314 fn take_split(&self, count: usize) -> (Self, Self) {
315 let (a, b) = self.lexer.source().split_at(count);
316 (
317 Tokens {
318 lexer: Lexer::new(a),
319 },
320 Tokens {
321 lexer: Lexer::new(b),
322 },
323 )
324 }
325}