tokit/lexer/
source.rs

1use core::ops::RangeBounds;
2
3#[cfg(feature = "bytes")]
4mod bytes;
5
6#[cfg(feature = "bstr")]
7mod bstr;
8
9#[cfg(feature = "hipstr")]
10mod hipstr;
11
12/// The slice type returned by lexers' sources.
13pub trait Slice<'source>: PartialEq + Eq + core::fmt::Debug {
14  /// The character type used by the lexer.
15  ///
16  /// - Use `char` for text-based lexers processing UTF-8 strings
17  /// - Use `u8` for byte-based lexers processing binary data or non-UTF-8 input
18  ///
19  /// This type must match the character type used by the Logos lexer's source.
20  type Char: Copy + core::fmt::Debug + PartialEq + Eq + core::hash::Hash;
21
22  /// An iterator over the characters in the slice.
23  type Iter<'a>: Iterator<Item = Self::Char>
24  where
25    Self: 'a;
26
27  /// An iterator over the characters in the slice with their offsets to the start of the slice.
28  type PositionedIter<'a>: Iterator<Item = (usize, Self::Char)>
29  where
30    Self: 'a;
31
32  /// Returns an iterator over the characters in the slice.
33  fn iter<'a>(&'a self) -> Self::Iter<'a>
34  where
35    Self: 'a;
36
37  /// Returns an iterator over the characters in the slice with their offsets to the start of the slice.
38  fn positioned_iter<'a>(&'a self) -> Self::PositionedIter<'a>
39  where
40    Self: 'a;
41
42  /// Returns the length of the slice.
43  fn len(&self) -> usize;
44
45  /// Returns `true` if the slice is empty.
46  #[cfg_attr(not(tarpaulin), inline(always))]
47  fn is_empty(&self) -> bool {
48    self.len() == 0
49  }
50}
51
52impl<'source> Slice<'source> for &'source [u8] {
53  type Char = u8;
54
55  type Iter<'a>
56    = core::iter::Copied<core::slice::Iter<'a, u8>>
57  where
58    Self: 'a;
59
60  type PositionedIter<'a>
61    = core::iter::Enumerate<core::iter::Copied<core::slice::Iter<'a, u8>>>
62  where
63    Self: 'a;
64
65  #[cfg_attr(not(tarpaulin), inline(always))]
66  fn iter<'a>(&'a self) -> Self::Iter<'a>
67  where
68    Self: 'a,
69  {
70    <[u8]>::iter(self).copied()
71  }
72
73  #[cfg_attr(not(tarpaulin), inline(always))]
74  fn positioned_iter<'a>(&'a self) -> Self::PositionedIter<'a>
75  where
76    Self: 'a,
77  {
78    <[u8]>::iter(self).copied().enumerate()
79  }
80
81  #[cfg_attr(not(tarpaulin), inline(always))]
82  fn len(&self) -> usize {
83    <[u8]>::len(self)
84  }
85}
86
87impl<'source> Slice<'source> for &'source str {
88  type Char = char;
89
90  type Iter<'a>
91    = core::str::Chars<'a>
92  where
93    Self: 'a;
94
95  type PositionedIter<'a>
96    = core::str::CharIndices<'a>
97  where
98    Self: 'a;
99
100  #[cfg_attr(not(tarpaulin), inline(always))]
101  fn iter<'a>(&'a self) -> Self::Iter<'a>
102  where
103    Self: 'a,
104  {
105    self.chars()
106  }
107
108  #[cfg_attr(not(tarpaulin), inline(always))]
109  fn positioned_iter<'a>(&'a self) -> Self::PositionedIter<'a>
110  where
111    Self: 'a,
112  {
113    self.char_indices()
114  }
115
116  #[cfg_attr(not(tarpaulin), inline(always))]
117  fn len(&self) -> usize {
118    <str>::len(self)
119  }
120}
121
122/// The source trait for lexers
123pub trait Source<Cursor> {
124  /// A type this `Source` can be sliced into.
125  type Slice<'source>: Slice<'source>
126  where
127    Self: 'source;
128
129  /// Returns `true` if the source is empty.
130  fn is_empty(&self) -> bool;
131
132  /// Length of the source
133  fn len(&self) -> Cursor;
134
135  /// Get a slice of the source at given range. This is analogous to
136  /// `slice::get(range)`.
137  fn slice<'a, R>(&self, range: R) -> Option<Self::Slice<'_>>
138  where
139    R: RangeBounds<&'a Cursor>,
140    Cursor: 'a;
141
142  /// For `&str` sources attempts to find the closest `char` boundary at which source
143  /// can be sliced, starting from `index`.
144  ///
145  /// For binary sources (`&[u8]`) this should just return `index` back.
146  #[inline]
147  fn find_boundary(&self, index: Cursor) -> Cursor {
148    index
149  }
150
151  /// Check if `index` is valid for this `Source`, that is:
152  ///
153  /// + It's not larger than the byte length of the `Source`.
154  /// + (`str` only) It doesn't land in the middle of a UTF-8 code point.
155  fn is_boundary(&self, index: Cursor) -> bool;
156}
157
158impl Source<usize> for [u8] {
159  type Slice<'source>
160    = &'source [u8]
161  where
162    Self: 'source;
163
164  #[cfg_attr(not(tarpaulin), inline(always))]
165  fn is_empty(&self) -> bool {
166    <[u8]>::is_empty(self)
167  }
168
169  #[cfg_attr(not(tarpaulin), inline(always))]
170  fn len(&self) -> usize {
171    self.len()
172  }
173
174  #[cfg_attr(not(tarpaulin), inline(always))]
175  fn slice<'a, R>(&self, range: R) -> Option<Self::Slice<'_>>
176  where
177    R: RangeBounds<&'a usize>,
178    usize: 'a,
179  {
180    self.get((
181      range.start_bound().map(|s| **s),
182      range.end_bound().map(|s| **s),
183    ))
184  }
185
186  #[cfg_attr(not(tarpaulin), inline(always))]
187  fn is_boundary(&self, index: usize) -> bool {
188    index <= self.len()
189  }
190}
191
192impl Source<usize> for str {
193  type Slice<'source>
194    = &'source str
195  where
196    Self: 'source;
197
198  #[cfg_attr(not(tarpaulin), inline(always))]
199  fn is_empty(&self) -> bool {
200    <str>::is_empty(self)
201  }
202
203  #[cfg_attr(not(tarpaulin), inline(always))]
204  fn len(&self) -> usize {
205    <str>::len(self)
206  }
207
208  #[cfg_attr(not(tarpaulin), inline(always))]
209  fn slice<'a, R>(&self, range: R) -> Option<Self::Slice<'_>>
210  where
211    R: RangeBounds<&'a usize>,
212  {
213    self.get((
214      range.start_bound().map(|s| **s),
215      range.end_bound().map(|s| **s),
216    ))
217  }
218
219  #[cfg_attr(not(tarpaulin), inline(always))]
220  fn is_boundary(&self, index: usize) -> bool {
221    self.is_char_boundary(index)
222  }
223}