yap/
types.rs

1//! This module contains types which implement the [`Tokens`] interface. You
2//! won't often need to import this module unless you wish to explicitly name
3//! the types in question.
4//!
5//! You should be able to remain generic by using `t: &mut impl Tokens<Item=char>` as a
6//! function argument instead of naming concrete types like the ones here.
7use super::{IntoTokens, TokenLocation, Tokens};
8
9/// This is what we are given back if we call `into_tokens()` on
10/// a `&[T]`. It implements the [`Tokens`] interface.
11pub struct SliceTokens<'a, Item> {
12    slice: &'a [Item],
13    cursor: usize,
14}
15
16/// This implements [`TokenLocation`] and stores the location of
17/// our current cursor into some slice.
18#[derive(Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
19pub struct SliceTokensLocation(usize);
20
21impl TokenLocation for SliceTokensLocation {
22    fn offset(&self) -> usize {
23        self.0
24    }
25}
26
27impl<'a, Item> SliceTokens<'a, Item> {
28    /// Return the parsed portion of the slice.
29    pub fn consumed(&self) -> &'a [Item] {
30        &self.slice[..self.cursor]
31    }
32
33    /// Return the unparsed remainder of the slice.
34    pub fn remaining(&self) -> &'a [Item] {
35        &self.slice[self.cursor..]
36    }
37}
38
39impl<'a, Item> From<SliceTokens<'a, Item>> for &'a [Item] {
40    fn from(toks: SliceTokens<'a, Item>) -> Self {
41        toks.slice
42    }
43}
44
45impl<'a, Item> Tokens for SliceTokens<'a, Item> {
46    type Item = &'a Item;
47    type Location = SliceTokensLocation;
48
49    fn next(&mut self) -> Option<Self::Item> {
50        let res = self.slice.get(self.cursor);
51        self.cursor += 1;
52        res
53    }
54    fn location(&self) -> Self::Location {
55        SliceTokensLocation(self.cursor)
56    }
57    fn set_location(&mut self, location: Self::Location) {
58        self.cursor = location.0;
59    }
60    fn is_at_location(&self, location: &Self::Location) -> bool {
61        self.cursor == location.0
62    }
63}
64
65impl<'a, Item> IntoTokens<&'a Item> for SliceTokens<'a, Item> {
66    type Tokens = Self;
67    fn into_tokens(self) -> Self {
68        self
69    }
70}
71
72impl<'a, Item> IntoTokens<&'a Item> for &'a [Item] {
73    type Tokens = SliceTokens<'a, Item>;
74    fn into_tokens(self) -> Self::Tokens {
75        SliceTokens {
76            slice: self,
77            cursor: 0,
78        }
79    }
80}
81
82/// This is what we are given back if we call `into_tokens()` on
83/// a `&str`. It implements the [`Tokens`] interface.
84pub struct StrTokens<'a> {
85    str: &'a str,
86    cursor: usize,
87}
88
89/// This implements [`TokenLocation`] and stores the location of
90/// our current cursor into some string. The location is the byte index
91/// into the string and not the nth character we're up to (a character
92/// may be represented by several bytes).
93#[derive(Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
94pub struct StrTokensLocation(usize);
95
96impl TokenLocation for StrTokensLocation {
97    fn offset(&self) -> usize {
98        self.0
99    }
100}
101
102impl<'a> StrTokens<'a> {
103    /// Return the parsed portion of the str.
104    pub fn consumed(&self) -> &'a str {
105        &self.str[..self.cursor]
106    }
107
108    /// Return the unparsed remainder of the str.
109    pub fn remaining(&self) -> &'a str {
110        &self.str[self.cursor..]
111    }
112}
113
114impl<'a> From<StrTokens<'a>> for &'a str {
115    fn from(toks: StrTokens<'a>) -> Self {
116        toks.str
117    }
118}
119
120impl<'a> Tokens for StrTokens<'a> {
121    type Item = char;
122    type Location = StrTokensLocation;
123
124    fn next(&mut self) -> Option<Self::Item> {
125        if self.cursor == self.str.len() {
126            return None;
127        }
128
129        // Cursor should always start at a valid char boundary.
130        // So, we just find the next char boundary and return the
131        // char between those two.
132        let mut next_char_boundary = self.cursor + 1;
133        while !self.str.is_char_boundary(next_char_boundary) {
134            next_char_boundary += 1;
135        }
136
137        // We have to go to &str and then char. Unchecked because we know
138        // that we are on a valid boundary. There's probably a quicker way..
139        // To check that bounds detection works even on exotic characters, there's a test included
140        // at the end of the file.
141        let next_char = unsafe { self.str.get_unchecked(self.cursor..next_char_boundary) }
142            .chars()
143            .next()
144            .unwrap();
145
146        self.cursor = next_char_boundary;
147        Some(next_char)
148    }
149
150    fn location(&self) -> Self::Location {
151        StrTokensLocation(self.cursor)
152    }
153
154    fn set_location(&mut self, location: Self::Location) {
155        self.cursor = location.0;
156    }
157
158    fn is_at_location(&self, location: &Self::Location) -> bool {
159        self.cursor == location.0
160    }
161
162    // We can do better than the default impl here; we have a &str that we
163    // can call parse on without needing to buffer anything,
164
165    fn parse<Out, Buf>(&mut self) -> Result<Out, <Out as core::str::FromStr>::Err>
166    where
167        Out: core::str::FromStr,
168        Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
169    {
170        let res = self.remaining().parse()?;
171        // If parse succeeds, consume all remaining tokens:
172        self.cursor = self.str.len();
173        Ok(res)
174    }
175
176    fn parse_slice<Out, Buf>(
177        &mut self,
178        from: Self::Location,
179        to: Self::Location,
180    ) -> Result<Out, <Out as core::str::FromStr>::Err>
181    where
182        Out: core::str::FromStr,
183        Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
184    {
185        // Don't change the location; slices never consume the underlying Tokens.
186        self.str[from.0..to.0].parse()
187    }
188
189    fn parse_take<Out, Buf>(&mut self, n: usize) -> Result<Out, <Out as core::str::FromStr>::Err>
190    where
191        Out: core::str::FromStr,
192        Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
193    {
194        // Consume the n tokens.
195        let from = self.location();
196        self.take(n).consume();
197        let to = self.location();
198
199        let res = self.str[from.0..to.0].parse();
200
201        // Reset location on error.
202        if res.is_err() {
203            self.set_location(from);
204        }
205        res
206    }
207
208    fn parse_take_while<Out, Buf, F>(
209        &mut self,
210        take_while: F,
211    ) -> Result<Out, <Out as core::str::FromStr>::Err>
212    where
213        Out: core::str::FromStr,
214        Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
215        F: FnMut(&Self::Item) -> bool,
216    {
217        // Consume all of the tokens matching the function.
218        let from = self.location();
219        self.take_while(take_while).consume();
220        let to = self.location();
221
222        let res = self.str[from.0..to.0].parse();
223
224        // Reset location on error.
225        if res.is_err() {
226            self.set_location(from);
227        }
228        res
229    }
230}
231
232impl<'a> IntoTokens<char> for StrTokens<'a> {
233    type Tokens = Self;
234    fn into_tokens(self) -> Self {
235        self
236    }
237}
238
239impl<'a> IntoTokens<char> for &'a str {
240    type Tokens = StrTokens<'a>;
241    fn into_tokens(self) -> Self::Tokens {
242        StrTokens {
243            str: self,
244            cursor: 0,
245        }
246    }
247}
248
249/// This is what we are given back if we call [`IterTokens::into_tokens(iter)`] on
250/// an `impl Iterator + Clone`. It implements the [`Tokens`] interface.
251#[derive(Clone)]
252pub struct IterTokens<I> {
253    iter: I,
254    cursor: usize,
255}
256
257/// This implements [`TokenLocation`] and stores the location and state of
258/// our current cursor into some iterator. The location is equivalent to `offset`
259/// in [`Iterator::nth(offset)`].
260#[derive(Clone)]
261pub struct IterTokensLocation<I>(IterTokens<I>);
262
263impl<I> core::fmt::Debug for IterTokensLocation<I> {
264    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
265        write!(f, "IterTokensLocation(cursor = {})", self.0.cursor)
266    }
267}
268
269// Locations match as long as the cursors do. This is as strong as the guarantee
270// for string or slice locations, and in all cases, locations from StrTokens/SliceTokens
271// may be equal even if the underlying tokens are different.
272impl<I> PartialEq for IterTokensLocation<I> {
273    fn eq(&self, other: &Self) -> bool {
274        self.0.cursor == other.0.cursor
275    }
276}
277
278impl<I> TokenLocation for IterTokensLocation<I> {
279    fn offset(&self) -> usize {
280        self.0.cursor
281    }
282}
283
284impl<I> IterTokens<I> {
285    /// We can't define a blanket impl for [`IntoTokens`] on all `impl Iterator + Clone` without
286    /// [specialization](https://rust-lang.github.io/rfcs/1210-impl-specialization.html).
287    ///
288    /// Instead, you must manually construct new [`IterTokens`] using this function.
289    ///
290    /// # Example
291    ///
292    /// ```rust
293    /// use yap::{ Tokens, types::IterTokens };
294    ///
295    /// // In normal usage, "hello \n\t world".into_tokens()
296    /// // would be preferred here (which would give StrTokens).
297    /// // This is just to demonstrate using IterTokens:
298    /// let chars_iter = "hello \n\t world".chars();
299    /// let mut tokens = IterTokens::new(chars_iter);
300    ///
301    /// let loc = tokens.location();
302    ///
303    /// // now we have tokens, we can do some parsing:
304    /// assert!(tokens.tokens("hello".chars()));
305    /// tokens.skip_while(|c| c.is_whitespace());
306    /// assert!(tokens.tokens("world".chars()));
307    ///
308    /// // We can reset the location too as with other Tokens impls.
309    /// // A location here is just a copy of the iterator at an
310    /// // earlier point.
311    /// tokens.set_location(loc);
312    /// assert!(tokens.tokens("hello".chars()));
313    /// ```
314    pub fn new(iter: I) -> Self {
315        IterTokens { iter, cursor: 0 }
316    }
317
318    /// Return the inner iterator, consuming self.
319    pub fn into_inner(self) -> I {
320        self.iter
321    }
322}
323
324impl<I> Tokens for IterTokens<I>
325where
326    I: Iterator + Clone,
327{
328    type Item = I::Item;
329    type Location = IterTokensLocation<I>;
330
331    fn next(&mut self) -> Option<Self::Item> {
332        self.cursor += 1;
333        self.iter.next()
334    }
335    fn location(&self) -> Self::Location {
336        IterTokensLocation(self.clone())
337    }
338    fn set_location(&mut self, location: Self::Location) {
339        *self = location.0;
340    }
341    fn is_at_location(&self, location: &Self::Location) -> bool {
342        self.cursor == location.0.cursor
343    }
344
345    // Override the default impl to avoid a clone when calling
346    // `self.location().offset()`:
347    fn offset(&self) -> usize {
348        self.cursor
349    }
350}
351
352impl<I> IntoTokens<I::Item> for IterTokens<I>
353where
354    I: Iterator + Clone,
355{
356    type Tokens = Self;
357    fn into_tokens(self) -> Self {
358        self
359    }
360}
361
362/// Embed some context with your [`Tokens`] implementation to
363/// access at any time. Use [`Tokens::with_context`] to produce this.
364pub struct WithContext<T, C> {
365    tokens: T,
366    context: C,
367}
368
369/// Embed some context with a mutable reference to your [`Tokens`] to
370/// access at any time. Use [`Tokens::with_context`] to produce this.
371pub struct WithContextMut<T, C> {
372    tokens: T,
373    context: C,
374}
375
376// `WithContext` and `WithContextMut` have almost identical looking impls,
377// but one only works with `Tokens`, and one with `&mut Tokens` (because
378// those impls would conflict if both on the same struct).
379macro_rules! with_context_impls {
380    ($name:ident $( $($mut:tt)+ )?) => {
381        impl <T, C> $name<T, C> {
382            /// Provide something that implements [`Tokens`] and
383            /// some arbitrary context.
384            pub(crate) fn new(tokens: T, context: C) -> Self {
385                Self { tokens, context }
386            }
387
388            /// Return the original tokens and context
389            pub fn into_parts(self) -> (T, C) {
390                (self.tokens, self.context)
391            }
392
393            /// Access the context
394            pub fn context(&self) -> &C {
395                &self.context
396            }
397
398            /// Mutably access the context
399            pub fn context_mut(&mut self) -> &mut C {
400                &mut self.context
401            }
402        }
403
404        impl <T, C> Tokens for $name<$( $($mut)+ )? T, C>
405        where T: Tokens {
406            type Item = T::Item;
407            type Location = T::Location;
408
409            fn next(&mut self) -> Option<Self::Item> {
410                self.tokens.next()
411            }
412            fn location(&self) -> Self::Location {
413                self.tokens.location()
414            }
415            fn set_location(&mut self, location: Self::Location) {
416                self.tokens.set_location(location)
417            }
418            fn is_at_location(&self, location: &Self::Location) -> bool {
419                self.tokens.is_at_location(location)
420            }
421
422            // allow any parse optimisations from the underlying Tokens
423            // impl to carry through to this too:
424            fn parse<Out, Buf>(&mut self) -> Result<Out, <Out as core::str::FromStr>::Err>
425            where
426                Out: core::str::FromStr,
427                Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
428            {
429                self.tokens.parse::<Out, Buf>()
430            }
431            fn parse_slice<Out, Buf>(
432                &mut self,
433                from: Self::Location,
434                to: Self::Location,
435            ) -> Result<Out, <Out as core::str::FromStr>::Err>
436            where
437                Out: core::str::FromStr,
438                Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
439            {
440                self.tokens.parse_slice::<Out, Buf>(from, to)
441            }
442            fn parse_take<Out, Buf>(&mut self, n: usize) -> Result<Out, <Out as core::str::FromStr>::Err>
443            where
444                Out: core::str::FromStr,
445                Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
446            {
447                self.tokens.parse_take::<Out, Buf>(n)
448            }
449            fn parse_take_while<Out, Buf, F>(
450                &mut self,
451                take_while: F,
452            ) -> Result<Out, <Out as core::str::FromStr>::Err>
453            where
454                Out: core::str::FromStr,
455                Buf: FromIterator<Self::Item> + core::ops::Deref<Target = str>,
456                F: FnMut(&Self::Item) -> bool,
457            {
458                self.tokens.parse_take_while::<Out, Buf, F>(take_while)
459            }
460        }
461    }
462}
463
464with_context_impls!(WithContext);
465with_context_impls!(WithContextMut &mut);
466
467#[cfg(test)]
468mod tests {
469    use super::*;
470
471    #[test]
472    fn exotic_character_bounds() {
473        let mut tokens = "🗻∈🌏".into_tokens();
474
475        assert_eq!(tokens.next(), Some('🗻'));
476        assert_eq!(tokens.next(), Some('∈'));
477        assert_eq!(tokens.next(), Some('🌏'));
478    }
479
480    #[test]
481    fn iterator_tokens_sanity_check() {
482        // In reality, one should always prefer to use StrTokens for strings:
483        let chars = "hello \n\t world".chars();
484        let mut tokens = IterTokens::new(chars);
485
486        let loc = tokens.location();
487        assert!(tokens.tokens("hello".chars()));
488
489        tokens.set_location(loc.clone());
490        assert!(tokens.tokens("hello".chars()));
491
492        tokens.skip_while(|c| c.is_whitespace());
493
494        assert!(tokens.tokens("world".chars()));
495
496        tokens.set_location(loc);
497        assert!(tokens.tokens("hello".chars()));
498    }
499
500    #[test]
501    fn str_tokens_parse_optimisations_work() {
502        // This buffer will panic if it's used.
503        struct BadBuffer;
504        impl core::iter::FromIterator<char> for BadBuffer {
505            fn from_iter<T: IntoIterator<Item = char>>(_: T) -> Self {
506                panic!("FromIterator impl shouldn't be used")
507            }
508        }
509        impl core::ops::Deref for BadBuffer {
510            type Target = str;
511            fn deref(&self) -> &Self::Target {
512                panic!("Deref impl shouldn't be used")
513            }
514        }
515
516        // 1. slice(..).parse()
517
518        let mut tokens = "123abc".into_tokens();
519
520        // Find locations to the number:
521        let from = tokens.location();
522        tokens.take_while(|t| t.is_numeric()).consume();
523        let to = tokens.location();
524
525        let n = tokens
526            .slice(from, to)
527            .parse::<u16, BadBuffer>()
528            .expect("parse worked (1)");
529
530        assert_eq!(n, 123);
531        assert_eq!(tokens.remaining(), "abc");
532
533        // 2. take(..).parse()
534
535        let mut tokens = "123abc".into_tokens();
536
537        let n = tokens
538            .take(3)
539            .parse::<u16, BadBuffer>()
540            .expect("parse worked (2)");
541
542        assert_eq!(n, 123);
543        assert_eq!(tokens.remaining(), "abc");
544
545        // 3. take_while(..).parse()
546
547        let mut tokens = "123abc".into_tokens();
548
549        let n = tokens
550            .take_while(|t| t.is_numeric())
551            .parse::<u16, BadBuffer>()
552            .expect("parse worked (3)");
553
554        assert_eq!(n, 123);
555        assert_eq!(tokens.remaining(), "abc");
556
557        // 4. take(..).take_while(..).take(..).parse()
558
559        let mut tokens = "123ab+=".into_tokens();
560
561        let n = tokens
562            .take(6)
563            .take(5)
564            .take_while(|t| t.is_alphanumeric())
565            .take_while(|t| t.is_numeric())
566            .take(2)
567            .parse::<u16, BadBuffer>()
568            .expect("parse worked (4)");
569
570        assert_eq!(n, 12);
571        assert_eq!(tokens.remaining(), "3ab+=");
572    }
573}