1use logos::Logos;
4
5use super::Span;
6use super::parser::ParserToken;
7use super::tree::SyntaxKind;
8
9pub mod v1;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
19pub struct TokenSet(u128);
20
21impl TokenSet {
22 pub const EMPTY: Self = Self(0);
24
25 pub const fn new(tokens: &[u8]) -> Self {
27 let mut bits = 0u128;
28 let mut i = 0;
29 while i < tokens.len() {
30 bits |= Self::mask(tokens[i]);
31 i += 1;
32 }
33 Self(bits)
34 }
35
36 pub const fn union(self, other: Self) -> Self {
38 Self(self.0 | other.0)
39 }
40
41 pub const fn without(self, other: Self) -> Self {
45 Self(self.0 & !other.0)
46 }
47
48 pub const fn contains(&self, token: u8) -> bool {
50 self.0 & Self::mask(token) != 0
51 }
52
53 pub const fn count(&self) -> usize {
55 self.0.count_ones() as usize
56 }
57
58 pub fn iter(&self) -> impl Iterator<Item = u8> + use<> {
60 let mut bits = self.0;
61 std::iter::from_fn(move || {
62 if bits == 0 {
63 return None;
64 }
65
66 let token = u8::try_from(bits.trailing_zeros())
67 .expect("the maximum token value should be less than 128");
68
69 bits ^= bits & bits.overflowing_neg().0;
70 Some(token)
71 })
72 }
73
74 const fn mask(token: u8) -> u128 {
76 1u128 << (token as usize)
77 }
78}
79
80#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
90#[repr(u8)]
91pub enum PreambleToken {
92 #[regex(r"[ \t\r\n]+")]
94 Whitespace,
95
96 #[regex(r"#[^\r\n]*")]
98 Comment,
99
100 #[token("version")]
102 VersionKeyword,
103
104 #[regex("[^ \t\r\n#]")]
107 Any,
108
109 MAX,
112}
113
114const _: () = assert!(PreambleToken::MAX as u8 <= 128);
116
117impl ParserToken<'_> for PreambleToken {
118 fn into_syntax(self) -> SyntaxKind {
119 match self {
120 Self::Whitespace => SyntaxKind::Whitespace,
121 Self::Comment => SyntaxKind::Comment,
122 Self::VersionKeyword => SyntaxKind::VersionKeyword,
123 Self::Any | Self::MAX => unreachable!(),
124 }
125 }
126
127 fn into_raw(self) -> u8 {
128 self as u8
129 }
130
131 fn from_raw(token: u8) -> Self {
132 assert!(token < Self::MAX as u8, "invalid token value");
133 unsafe { std::mem::transmute(token) }
134 }
135
136 fn describe(self) -> &'static str {
137 match self {
138 Self::Whitespace => "whitespace",
139 Self::Comment => "comment",
140 Self::VersionKeyword => "`version` keyword",
141 Self::Any | Self::MAX => unreachable!(),
142 }
143 }
144
145 fn is_trivia(self) -> bool {
146 matches!(self, Self::Whitespace | Self::Comment)
147 }
148}
149
150#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
158#[repr(u8)]
159pub enum VersionStatementToken {
160 #[regex(r"[ \t\r\n]+")]
162 Whitespace,
163
164 #[regex(r"#[^\r\n]*")]
166 Comment,
167
168 #[regex(r"[a-zA-Z0-9][a-zA-Z0-9.\-]*")]
170 Version,
171
172 MAX,
175}
176
177const _: () = assert!(VersionStatementToken::MAX as u8 <= 128);
179
180impl ParserToken<'_> for VersionStatementToken {
181 fn into_syntax(self) -> SyntaxKind {
182 match self {
183 Self::Whitespace => SyntaxKind::Whitespace,
184 Self::Comment => SyntaxKind::Comment,
185 Self::Version => SyntaxKind::Version,
186 Self::MAX => unreachable!(),
187 }
188 }
189
190 fn into_raw(self) -> u8 {
191 self as u8
192 }
193
194 fn from_raw(token: u8) -> Self {
195 assert!(token < Self::MAX as u8, "invalid token value");
196 unsafe { std::mem::transmute(token) }
197 }
198
199 fn describe(self) -> &'static str {
200 match self {
201 Self::Whitespace => "whitespace",
202 Self::Comment => "comment",
203 Self::Version => "version",
204 Self::MAX => unreachable!(),
205 }
206 }
207
208 fn is_trivia(self) -> bool {
209 matches!(self, Self::Whitespace | Self::Comment)
210 }
211}
212
213pub type LexerResult<T> = Result<T, ()>;
215
216#[derive(Debug, Clone, Copy)]
220struct Peeked<T> {
221 result: LexerResult<T>,
223 span: Span,
225 offset: usize,
229}
230
231#[allow(missing_debug_implementations)]
235#[derive(Clone)]
236pub struct Lexer<'a, T>
237where
238 T: Logos<'a, Extras = ()>,
239{
240 lexer: logos::Lexer<'a, T>,
242 peeked: Option<Peeked<T>>,
244}
245
246impl<'a, T> Lexer<'a, T>
247where
248 T: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
249{
250 pub fn new(source: &'a str) -> Self
252 where
253 T::Extras: Default,
254 {
255 Self {
256 lexer: T::lexer(source),
257 peeked: None,
258 }
259 }
260
261 pub fn source(&self, span: Span) -> &'a str {
263 &self.lexer.source()[span.start()..span.end()]
264 }
265
266 pub fn source_len(&self) -> usize {
268 self.lexer.source().len()
269 }
270
271 pub fn span(&self) -> Span {
273 self.lexer.span().into()
274 }
275
276 pub fn peek(&mut self) -> Option<(LexerResult<T>, Span)> {
278 if self.peeked.is_none() {
279 let offset = self.lexer.span().start;
280 self.peeked = self.lexer.next().map(|r| Peeked {
281 result: r,
282 span: self.lexer.span().into(),
283 offset,
284 });
285 }
286
287 self.peeked.map(|p| (p.result, p.span))
288 }
289
290 pub fn morph<T2>(self) -> Lexer<'a, T2>
295 where
296 T2: Logos<'a, Source = str, Error = (), Extras = ()> + Copy,
297 {
298 let lexer = match self.peeked {
302 Some(peeked) => {
303 let mut lexer = T2::lexer(self.lexer.source());
304 if peeked.offset > 0 {
305 lexer.bump(peeked.offset);
306 lexer.next();
307 }
308
309 lexer
310 }
311 None => self.lexer.morph(),
312 };
313
314 Lexer {
315 lexer,
316 peeked: None,
317 }
318 }
319
320 pub fn consume_remainder(&mut self) -> Option<Span> {
323 if let Some(peeked) = self.peeked.take() {
325 self.lexer = T::lexer(self.lexer.source());
326 if peeked.offset > 0 {
327 self.lexer.bump(peeked.offset);
328 self.lexer.next();
329 }
330 }
331
332 self.lexer.next();
334 self.lexer.bump(self.lexer.remainder().len());
335 let span = self.lexer.span();
336 assert!(self.next().is_none(), "lexer should be completed");
337 if span.is_empty() {
338 None
339 } else {
340 Some(span.into())
341 }
342 }
343}
344
345impl<'a, T> Iterator for Lexer<'a, T>
346where
347 T: Logos<'a, Error = (), Extras = ()> + Copy,
348{
349 type Item = (LexerResult<T>, Span);
350
351 fn next(&mut self) -> Option<Self::Item> {
352 if let Some(peeked) = self.peeked.take() {
353 return Some((peeked.result, peeked.span));
354 }
355
356 self.lexer.next().map(|r| (r, self.lexer.span().into()))
357 }
358}
359
360#[cfg(test)]
361mod test {
362 use pretty_assertions::assert_eq;
363
364 use super::*;
365
366 pub(crate) fn map<T>(
367 (t, s): (LexerResult<T>, Span),
368 ) -> (LexerResult<T>, std::ops::Range<usize>) {
369 (t, s.start()..s.end())
370 }
371
372 #[test]
373 fn test_version_1_0() {
374 let mut lexer = Lexer::<PreambleToken>::new(
375 "
376# Test for 1.0 documents
377version 1.0",
378 );
379 assert_eq!(
380 lexer.next().map(map).unwrap(),
381 (Ok(PreambleToken::Whitespace), 0..1)
382 );
383 assert_eq!(
384 lexer.next().map(map).unwrap(),
385 (Ok(PreambleToken::Comment), 1..25),
386 );
387 assert_eq!(
388 lexer.next().map(map).unwrap(),
389 (Ok(PreambleToken::Whitespace), 25..26),
390 );
391 assert_eq!(
392 lexer.next().map(map).unwrap(),
393 (Ok(PreambleToken::VersionKeyword), 26..33),
394 );
395
396 let mut lexer: Lexer<'_, VersionStatementToken> = lexer.morph();
397 assert_eq!(
398 lexer.next().map(map).unwrap(),
399 (Ok(VersionStatementToken::Whitespace), 33..34),
400 );
401 assert_eq!(
402 lexer.next().map(map).unwrap(),
403 (Ok(VersionStatementToken::Version), 34..37)
404 );
405 }
406
407 #[test]
408 fn test_version_1_1() {
409 let mut lexer = Lexer::<PreambleToken>::new(
410 "
411# Test for 1.1 documents
412version 1.1",
413 );
414 assert_eq!(
415 lexer.next().map(map).unwrap(),
416 (Ok(PreambleToken::Whitespace), 0..1)
417 );
418 assert_eq!(
419 lexer.next().map(map).unwrap(),
420 (Ok(PreambleToken::Comment), 1..25)
421 );
422 assert_eq!(
423 lexer.next().map(map).unwrap(),
424 (Ok(PreambleToken::Whitespace), 25..26)
425 );
426 assert_eq!(
427 lexer.next().map(map).unwrap(),
428 (Ok(PreambleToken::VersionKeyword), 26..33)
429 );
430
431 let mut lexer: Lexer<'_, VersionStatementToken> = lexer.morph();
432 assert_eq!(
433 lexer.next().map(map).unwrap(),
434 (Ok(VersionStatementToken::Whitespace), 33..34)
435 );
436 assert_eq!(
437 lexer.next().map(map).unwrap(),
438 (Ok(VersionStatementToken::Version), 34..37)
439 );
440 }
441
442 #[test]
443 fn test_version_draft3() {
444 let mut lexer = Lexer::<PreambleToken>::new(
449 "
450# Test for draft-3 documents
451version draft-3",
452 );
453 assert_eq!(
454 lexer.next().map(map).unwrap(),
455 (Ok(PreambleToken::Whitespace), 0..1)
456 );
457 assert_eq!(
458 lexer.next().map(map).unwrap(),
459 (Ok(PreambleToken::Comment), 1..29)
460 );
461 assert_eq!(
462 lexer.next().map(map).unwrap(),
463 (Ok(PreambleToken::Whitespace), 29..30)
464 );
465 assert_eq!(
466 lexer.next().map(map).unwrap(),
467 (Ok(PreambleToken::VersionKeyword), 30..37)
468 );
469
470 let mut lexer: Lexer<'_, VersionStatementToken> = lexer.morph();
471 assert_eq!(
472 lexer.next().map(map).unwrap(),
473 (Ok(VersionStatementToken::Whitespace), 37..38)
474 );
475 assert_eq!(
476 lexer.next().map(map).unwrap(),
477 (Ok(VersionStatementToken::Version), 38..45)
478 );
479 }
480}