any_lexer/
lib.rs

1//! Collection of lexer implementations, capable of tokenizing various
2//! programming languages, markup languages, and various other formats.
3
4#![forbid(unsafe_code)]
5#![forbid(elided_lifetimes_in_paths)]
6
7mod lexers;
8
9pub use text_scanner as scanner;
10
11pub use self::lexers::*;
12
13use std::fmt;
14use std::ops::Range;
15
16use text_scanner::Scanner;
17
18#[derive(Eq, Clone)]
19pub struct TokenSpan<'text> {
20    text: &'text str,
21    range: Range<usize>,
22}
23
24impl<'text> TokenSpan<'text> {
25    #[inline]
26    pub fn new(text: &'text str, range: Range<usize>) -> Self {
27        Self { text, range }
28    }
29
30    #[inline]
31    pub fn as_str(&self) -> &'text str {
32        &self.text[self.range.clone()]
33    }
34
35    #[inline]
36    pub fn range(&self) -> Range<usize> {
37        self.range.clone()
38    }
39
40    #[inline]
41    pub fn start(&self) -> usize {
42        self.range.start
43    }
44
45    #[inline]
46    pub fn end(&self) -> usize {
47        self.range.end
48    }
49
50    #[inline]
51    pub fn len(&self) -> usize {
52        self.range.len()
53    }
54
55    #[inline]
56    pub fn is_empty(&self) -> bool {
57        self.range.is_empty()
58    }
59
60    #[doc(hidden)]
61    #[inline]
62    pub fn join(&self, other: &Self) -> Option<Self> {
63        if self.text.as_ptr() == other.text.as_ptr() {
64            Some(self.join_unchecked(other))
65        } else {
66            None
67        }
68    }
69
70    #[doc(hidden)]
71    #[inline]
72    pub fn join_unchecked(&self, other: &Self) -> Self {
73        let start = self.range.start.min(other.range.start);
74        let end = self.range.end.max(other.range.end);
75        Self::new(self.text, start..end)
76    }
77}
78
79impl fmt::Debug for TokenSpan<'_> {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        f.debug_struct("TokenSpan")
82            .field("start", &self.range.start)
83            .field("end", &self.range.end)
84            .field("string", &self.as_str())
85            .finish()
86    }
87}
88
89impl fmt::Display for TokenSpan<'_> {
90    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91        self.as_str().fmt(f)
92    }
93}
94
95impl PartialEq for TokenSpan<'_> {
96    fn eq(&self, other: &Self) -> bool {
97        (self.text.as_ptr() == other.text.as_ptr()) && (self.range == other.range)
98    }
99}
100
101pub trait Lexer<'text> {
102    type Token: ScanToken;
103
104    #[inline]
105    fn next_token(&mut self) -> Option<(Self::Token, TokenSpan<'text>)> {
106        Self::Token::scan_token(self.scanner_mut())
107    }
108
109    #[inline]
110    fn peek_token(&self) -> Option<(Self::Token, TokenSpan<'text>)> {
111        self.scanner().peeking(Self::Token::scan_token)
112    }
113
114    #[inline]
115    fn cursor_pos(&self) -> usize {
116        self.scanner().cursor_pos()
117    }
118
119    #[inline]
120    fn set_cursor_pos(&mut self, pos: usize) -> usize {
121        self.scanner_mut().set_cursor_pos(pos)
122    }
123
124    #[inline]
125    fn reset(&mut self) -> usize {
126        self.set_cursor_pos(0)
127    }
128
129    fn scanner(&self) -> &Scanner<'text>;
130    fn scanner_mut(&mut self) -> &mut Scanner<'text>;
131}
132
133pub trait ScanToken: Sized {
134    fn scan_token<'text>(scanner: &mut Scanner<'text>) -> Option<(Self, TokenSpan<'text>)>;
135}
136
137macro_rules! impl_lexer_from_scanner {
138    ($lifetime:lifetime, $lexer:ty, $token:ty, $scanner:ident) => {
139        impl<$lifetime> $crate::Lexer<$lifetime> for $lexer {
140            type Token = $token;
141
142            #[inline]
143            fn scanner(&self) -> &Scanner<$lifetime> {
144                &self.$scanner
145            }
146
147            #[inline]
148            fn scanner_mut(&mut self) -> &mut Scanner<$lifetime> {
149                &mut self.$scanner
150            }
151        }
152
153        $crate::impl_iter_for_lexer!($lifetime, $lexer);
154    };
155}
156
157pub(crate) use impl_lexer_from_scanner;
158
159macro_rules! impl_iter_for_lexer {
160    ($lifetime:lifetime, $lexer:ty) => {
161        impl<$lifetime> Iterator for $lexer {
162            type Item = (
163                <Self as $crate::Lexer<$lifetime>>::Token,
164                TokenSpan<$lifetime>,
165            );
166
167            #[inline]
168            fn next(&mut self) -> Option<Self::Item> {
169                $crate::Lexer::next_token(self)
170            }
171        }
172
173        impl<$lifetime> std::iter::FusedIterator for $lexer {}
174    };
175}
176
177pub(crate) use impl_iter_for_lexer;
178
179pub(crate) trait ScannerExt<'text> {
180    fn span(&self, range: Range<usize>) -> TokenSpan<'text>;
181}
182
183impl<'text> ScannerExt<'text> for Scanner<'text> {
184    #[inline]
185    fn span(&self, range: Range<usize>) -> TokenSpan<'text> {
186        TokenSpan::new(self.text(), range)
187    }
188}