logos_nom_bridge/
lib.rs

1//! # logos-nom-bridge
2//!
3//! A [`logos::Lexer`] wrapper than can be used as an input for
4//! [nom](https://docs.rs/nom/7.0.0/nom/index.html).
5//!
6//! ### Simple example
7//!
8//! ```
9//! // First, create a `logos` lexer:
10//!
11//! #[derive(Clone, Debug, PartialEq, Eq, logos::Logos)]
12//! enum Token {
13//!     #[token("+")]
14//!     Plus,
15//!
16//!     #[token("-")]
17//!     Minus,
18//!
19//!     #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
20//!     Number(i64),
21//!
22//!     #[error]
23//!     #[regex(r"[ \t\n\f]+", logos::skip)]
24//!     Error,
25//! }
26//!
27//! // Then, write a nom parser that accepts a `Tokens<'_, Token>` as input:
28//!
29//! use logos_nom_bridge::Tokens;
30//!
31//! type Input<'source> = Tokens<'source, Token>;
32//!
33//! #[derive(Debug, PartialEq, Eq)]
34//! enum Op {
35//!     Number(i64),
36//!     Addition(Box<(Op, Op)>),
37//!     Subtraction(Box<(Op, Op)>),
38//! }
39//!
40//! fn parse_expression(input: Input<'_>) -> nom::IResult<Input<'_>, Op> {
41//! #   use nom::{branch::alt, combinator::map, sequence::tuple};
42//! #
43//! #   fn parse_number(input: Input<'_>) -> nom::IResult<Input<'_>, Op> {
44//! #       match input.peek() {
45//! #           Some((Token::Number(n), _)) => Ok((input.advance(), Op::Number(n))),
46//! #           _ => Err(nom::Err::Error(nom::error::Error::new(
47//! #               input,
48//! #               nom::error::ErrorKind::IsA,
49//! #           ))),
50//! #       }
51//! #   }
52//! #   logos_nom_bridge::token_parser!(token: Token);
53//! #
54//! #   alt((
55//! #       map(
56//! #           tuple((parse_number, alt((Token::Plus, Token::Minus)), parse_expression)),
57//! #           |(a, op, b)| {
58//! #               if op == "+" {
59//! #                   Op::Addition(Box::new((a, b)))
60//! #               } else {
61//! #                   Op::Subtraction(Box::new((a, b)))
62//! #               }
63//! #           },
64//! #       ),
65//! #       parse_number,
66//! #   ))(input)
67//!     // zip
68//! }
69//!
70//! // Finally, you can use it to parse a string:
71//!
72//! let input = "10 + 3 - 4";
73//! let tokens = Tokens::new(input);
74//!
75//! let (rest, parsed) = parse_expression(tokens).unwrap();
76//!
77//! assert!(rest.is_empty());
78//! assert_eq!(
79//!     parsed,
80//!     Op::Addition(Box::new((
81//!         Op::Number(10),
82//!         Op::Subtraction(Box::new((
83//!             Op::Number(3),
84//!             Op::Number(4),
85//!         ))),
86//!     ))),
87//! )
88//! ```
89//!
90//! ## Macros
91//!
92//! You can implement [`nom::Parser`] for your token type with the [`token_parser`] macro:
93//!
94//! ```
95//! # #[derive(Clone, Debug, PartialEq, Eq, logos::Logos)]
96//! # enum Token {
97//! #     #[error]
98//! #     Error,
99//! # }
100//! #
101//! logos_nom_bridge::token_parser!(token: Token);
102//! ```
103//!
104//! If some enum variants of your token type contain data, you can implement a [`nom::Parser`]
105//! for them using the [`data_variant_parser`] macro:
106//!
107//! ```
108//! # enum Op { Number(i64) }
109//! #
110//! #[derive(Clone, Debug, PartialEq, Eq, logos::Logos)]
111//! enum Token {
112//!     #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
113//!     Number(i64),
114//!
115//!     // etc.
116//! #   #[error]
117//! #   Error,
118//! }
119//!
120//! logos_nom_bridge::data_variant_parser! {
121//!     fn parse_number(input) -> Result<Op>;
122//!     pattern = Token::Number(n) => Op::Number(n);
123//! }
124//! ```
125
126mod macros;
127
128use core::fmt;
129
130use logos::{Lexer, Logos, Span, SpannedIter};
131use nom::{InputIter, InputLength, InputTake};
132
133/// A [`logos::Lexer`] wrapper than can be used as an input for
134/// [nom](https://docs.rs/nom/7.0.0/nom/index.html).
135///
136/// You can find an example in the [module-level docs](..).
137pub struct Tokens<'i, T>
138where
139    T: Logos<'i>,
140{
141    lexer: Lexer<'i, T>,
142}
143
144impl<'i, T> Clone for Tokens<'i, T>
145where
146    T: Logos<'i> + Clone,
147    T::Extras: Clone,
148{
149    fn clone(&self) -> Self {
150        Self {
151            lexer: self.lexer.clone(),
152        }
153    }
154}
155
156impl<'i, T> Tokens<'i, T>
157where
158    T: Logos<'i, Source = str> + Clone,
159    T::Extras: Default + Clone,
160{
161    pub fn new(input: &'i str) -> Self {
162        Tokens {
163            lexer: Lexer::new(input),
164        }
165    }
166
167    pub fn len(&self) -> usize {
168        self.lexer.source().len() - self.lexer.span().end
169    }
170
171    pub fn is_empty(&self) -> bool {
172        self.len() == 0
173    }
174
175    pub fn peek(&self) -> Option<(T, &'i str)> {
176        let mut iter = self.lexer.clone().spanned();
177        iter.next().map(|(t, span)| (t, &self.lexer.source()[span]))
178    }
179
180    pub fn advance(mut self) -> Self {
181        self.lexer.next();
182        self
183    }
184}
185
186impl<'i, T> PartialEq for Tokens<'i, T>
187where
188    T: PartialEq + Logos<'i> + Clone,
189    T::Extras: Clone,
190{
191    fn eq(&self, other: &Self) -> bool {
192        Iterator::eq(self.lexer.clone(), other.lexer.clone())
193    }
194}
195
196impl<'i, T> Eq for Tokens<'i, T>
197where
198    T: Eq + Logos<'i> + Clone,
199    T::Extras: Clone,
200{
201}
202
203impl<'i, T> fmt::Debug for Tokens<'i, T>
204where
205    T: fmt::Debug + Logos<'i, Source = str>,
206{
207    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208        let source = self.lexer.source();
209        let start = self.lexer.span().start;
210        f.debug_tuple("Tokens").field(&&source[start..]).finish()
211    }
212}
213
214impl<'i, T> Default for Tokens<'i, T>
215where
216    T: Logos<'i, Source = str>,
217    T::Extras: Default,
218{
219    fn default() -> Self {
220        Tokens {
221            lexer: Lexer::new(""),
222        }
223    }
224}
225
226/// An iterator, that (similarly to [`std::iter::Enumerate`]) produces byte offsets of the tokens.
227pub struct IndexIterator<'i, T>
228where
229    T: Logos<'i>,
230{
231    logos: Lexer<'i, T>,
232}
233
234impl<'i, T> Iterator for IndexIterator<'i, T>
235where
236    T: Logos<'i>,
237{
238    type Item = (usize, (T, Span));
239
240    fn next(&mut self) -> Option<Self::Item> {
241        self.logos.next().map(|t| {
242            let span = self.logos.span();
243            (span.start, (t, span))
244        })
245    }
246}
247
248impl<'i, T> InputIter for Tokens<'i, T>
249where
250    T: Logos<'i, Source = str> + Clone,
251    T::Extras: Default + Clone,
252{
253    type Item = (T, Span);
254
255    type Iter = IndexIterator<'i, T>;
256
257    type IterElem = SpannedIter<'i, T>;
258
259    fn iter_indices(&self) -> Self::Iter {
260        IndexIterator {
261            logos: self.lexer.clone(),
262        }
263    }
264
265    fn iter_elements(&self) -> Self::IterElem {
266        self.lexer.clone().spanned()
267    }
268
269    fn position<P>(&self, predicate: P) -> Option<usize>
270    where
271        P: Fn(Self::Item) -> bool,
272    {
273        let mut iter = self.lexer.clone().spanned();
274        iter.find(|t| predicate(t.clone()))
275            .map(|(_, span)| span.start)
276    }
277
278    fn slice_index(&self, count: usize) -> Result<usize, nom::Needed> {
279        let mut cnt = 0;
280        for (_, span) in self.lexer.clone().spanned() {
281            if cnt == count {
282                return Ok(span.start);
283            }
284            cnt += 1;
285        }
286        if cnt == count {
287            return Ok(self.len());
288        }
289        Err(nom::Needed::Unknown)
290    }
291}
292
293impl<'i, T> InputLength for Tokens<'i, T>
294where
295    T: Logos<'i, Source = str> + Clone,
296    T::Extras: Default + Clone,
297{
298    fn input_len(&self) -> usize {
299        self.len()
300    }
301}
302
303impl<'i, T> InputTake for Tokens<'i, T>
304where
305    T: Logos<'i, Source = str>,
306    T::Extras: Default,
307{
308    fn take(&self, count: usize) -> Self {
309        Tokens {
310            lexer: Lexer::new(&self.lexer.source()[..count]),
311        }
312    }
313
314    fn take_split(&self, count: usize) -> (Self, Self) {
315        let (a, b) = self.lexer.source().split_at(count);
316        (
317            Tokens {
318                lexer: Lexer::new(a),
319            },
320            Tokens {
321                lexer: Lexer::new(b),
322            },
323        )
324    }
325}