1#![forbid(unsafe_code)]
2use std::ops::Range;
8use std::sync::Arc;
9
10pub use text_size::{TextRange, TextSize};
11
12#[must_use]
13pub const fn text_size(value: u32) -> TextSize {
14 TextSize::new(value)
15}
16
17#[must_use]
18pub const fn text_range(start: u32, end: u32) -> TextRange {
19 TextRange::new(text_size(start), text_size(end))
20}
21
22#[must_use]
23pub fn range_start(range: TextRange) -> u32 {
24 range.start().into()
25}
26
27#[must_use]
28pub fn range_end(range: TextRange) -> u32 {
29 range.end().into()
30}
31
32#[must_use]
33pub fn range_len(range: TextRange) -> u32 {
34 range.len().into()
35}
36
37#[must_use]
38pub fn text_slice(text: &str, range: TextRange) -> &str {
39 &text[range_start(range) as usize..range_end(range) as usize]
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub struct SourceMapEdit {
44 source_start: u32,
45 source_end: u32,
46 display_start: u32,
47 display_end: u32,
48}
49
50impl SourceMapEdit {
51 #[must_use]
52 pub const fn new(
53 source_start: u32,
54 source_end: u32,
55 display_start: u32,
56 display_end: u32,
57 ) -> Self {
58 Self {
59 source_start,
60 source_end,
61 display_start,
62 display_end,
63 }
64 }
65
66 #[must_use]
67 pub const fn source_start(self) -> u32 {
68 self.source_start
69 }
70
71 #[must_use]
72 pub const fn source_end(self) -> u32 {
73 self.source_end
74 }
75
76 #[must_use]
77 pub const fn display_start(self) -> u32 {
78 self.display_start
79 }
80
81 #[must_use]
82 pub const fn display_end(self) -> u32 {
83 self.display_end
84 }
85
86 #[must_use]
87 pub const fn delta_after(self) -> i64 {
88 self.display_end as i64 - self.source_end as i64
89 }
90}
91
92#[derive(Debug, Clone, PartialEq, Eq)]
93enum SourceMapKind {
94 Identity {
95 len: usize,
96 },
97 Indexed {
98 source_to_display: Arc<[u32]>,
99 },
100 Sparse {
101 source_len: usize,
102 display_len: usize,
103 edits: Arc<[SourceMapEdit]>,
104 },
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct SourceMap {
109 kind: SourceMapKind,
110}
111
112impl SourceMap {
113 #[must_use]
114 pub fn identity(len: usize) -> Self {
115 Self {
116 kind: SourceMapKind::Identity { len },
117 }
118 }
119
120 #[must_use]
121 pub fn from_source_to_display(source_to_display: Vec<u32>) -> Self {
122 Self::from_shared_source_to_display(source_to_display.into())
123 }
124
125 #[must_use]
126 pub fn from_shared_source_to_display(source_to_display: Arc<[u32]>) -> Self {
127 if source_to_display
128 .iter()
129 .enumerate()
130 .all(|(offset, mapped)| *mapped == u32::try_from(offset).unwrap_or(u32::MAX))
131 {
132 return Self::identity(source_to_display.len().saturating_sub(1));
133 }
134 Self {
135 kind: SourceMapKind::Indexed { source_to_display },
136 }
137 }
138
139 #[must_use]
140 pub fn from_sparse_edits(
141 source_len: usize,
142 display_len: usize,
143 edits: Arc<[SourceMapEdit]>,
144 ) -> Self {
145 if source_len == display_len && edits.is_empty() {
146 return Self::identity(source_len);
147 }
148 Self {
149 kind: SourceMapKind::Sparse {
150 source_len,
151 display_len,
152 edits,
153 },
154 }
155 }
156
157 #[must_use]
158 pub fn display_offset(&self, offset: u32) -> usize {
159 match &self.kind {
160 SourceMapKind::Identity { len } => usize::try_from(offset).unwrap_or(*len).min(*len),
161 SourceMapKind::Indexed { source_to_display } => source_to_display
162 .get(offset as usize)
163 .copied()
164 .or_else(|| source_to_display.last().copied())
165 .unwrap_or(offset)
166 as usize,
167 SourceMapKind::Sparse {
168 source_len,
169 display_len,
170 edits,
171 } => sparse_source_to_display(*source_len, *display_len, edits, offset),
172 }
173 }
174
175 #[must_use]
176 pub fn display_range(&self, range: TextRange) -> Range<usize> {
177 self.display_offset(range_start(range))..self.display_offset(range_end(range))
178 }
179
180 #[must_use]
181 pub fn source_offset_for_display(&self, display_offset: usize) -> u32 {
182 match &self.kind {
183 SourceMapKind::Identity { len } => {
184 u32::try_from(display_offset.min(*len)).unwrap_or(u32::MAX)
185 }
186 SourceMapKind::Indexed { source_to_display } => {
187 match source_to_display
188 .binary_search_by(|mapped| mapped.cmp(&(display_offset as u32)))
189 {
190 Ok(mut index) => {
191 while index + 1 < source_to_display.len()
192 && source_to_display[index + 1] <= display_offset as u32
193 {
194 index += 1;
195 }
196 u32::try_from(index).unwrap_or(u32::MAX)
197 }
198 Err(0) => 0,
199 Err(index) => u32::try_from(index - 1).unwrap_or(u32::MAX),
200 }
201 }
202 SourceMapKind::Sparse {
203 source_len,
204 display_len,
205 edits,
206 } => sparse_display_to_source(*source_len, *display_len, edits, display_offset),
207 }
208 }
209
210 #[must_use]
211 pub fn source_range_from_display_range(&self, range: Range<usize>) -> TextRange {
212 text_range(
213 self.source_offset_for_display(range.start),
214 self.source_offset_for_display(range.end),
215 )
216 }
217}
218
219fn sparse_source_to_display(
220 source_len: usize,
221 display_len: usize,
222 edits: &[SourceMapEdit],
223 offset: u32,
224) -> usize {
225 let clamped = usize::try_from(offset)
226 .unwrap_or(source_len)
227 .min(source_len) as u32;
228 let Some(index) = edits
229 .partition_point(|edit| edit.source_start() <= clamped)
230 .checked_sub(1)
231 else {
232 return clamped as usize;
233 };
234 let edit = edits[index];
235 if clamped == edit.source_start() {
236 return edit.display_start() as usize;
237 }
238 if clamped <= edit.source_end() {
239 return edit.display_end() as usize;
240 }
241 let mapped = (clamped as i64 + edit.delta_after()).clamp(0, display_len as i64);
242 mapped as usize
243}
244
245fn sparse_display_to_source(
246 source_len: usize,
247 display_len: usize,
248 edits: &[SourceMapEdit],
249 offset: usize,
250) -> u32 {
251 let clamped = offset.min(display_len) as u32;
252 let Some(index) = edits
253 .partition_point(|edit| edit.display_start() <= clamped)
254 .checked_sub(1)
255 else {
256 return clamped;
257 };
258 let edit = edits[index];
259 if clamped == edit.display_start() {
260 return edit.source_start();
261 }
262 if clamped <= edit.display_end() {
263 return edit.source_end();
264 }
265 let mapped = (clamped as i64 - edit.delta_after()).clamp(0, source_len as i64);
266 mapped as u32
267}
268
269#[derive(Debug, Clone, Copy)]
270pub struct SourceView<'a> {
271 text: &'a str,
272 source_map: &'a SourceMap,
273}
274
275impl<'a> SourceView<'a> {
276 #[must_use]
277 pub fn new(text: &'a str, source_map: &'a SourceMap) -> Self {
278 Self { text, source_map }
279 }
280
281 #[must_use]
282 pub fn text(self) -> &'a str {
283 self.text
284 }
285
286 #[must_use]
287 pub fn source_map(self) -> &'a SourceMap {
288 self.source_map
289 }
290
291 #[must_use]
292 pub fn display_range(self, range: TextRange) -> Range<usize> {
293 self.source_map.display_range(range)
294 }
295
296 #[must_use]
297 pub fn display_slice(self, range: TextRange) -> &'a str {
298 &self.text[self.display_range(range)]
299 }
300
301 #[must_use]
302 pub fn slice(self, range: TextRange) -> &'a str {
303 self.display_slice(range)
304 }
305
306 #[must_use]
307 pub fn source_range_from_display_range(self, range: Range<usize>) -> TextRange {
308 self.source_map.source_range_from_display_range(range)
309 }
310}
311
312#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
313pub enum TokenKind {
314 Whitespace,
315 LineComment,
316 BlockComment,
317 Ident,
318 IntLiteral,
319 FloatLiteral,
320 StringLiteral,
321 Flag,
322 Dollar,
323 Backquote,
324 LParen,
325 RParen,
326 LBracket,
327 RBracket,
328 LBrace,
329 RBrace,
330 Dot,
331 Pipe,
332 Comma,
333 Semi,
334 Assign,
335 PlusEq,
336 MinusEq,
337 StarEq,
338 SlashEq,
339 Plus,
340 PlusPlus,
341 Minus,
342 MinusMinus,
343 Star,
344 Slash,
345 Percent,
346 Caret,
347 Question,
348 Colon,
349 EqEq,
350 NotEq,
351 LtLt,
352 Lt,
353 Le,
354 GtGt,
355 Gt,
356 Ge,
357 AndAnd,
358 OrOr,
359 Bang,
360 Unknown,
361 Eof,
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
365pub struct Token {
366 pub kind: TokenKind,
367 pub range: TextRange,
368}
369
370impl Token {
371 #[must_use]
372 pub const fn new(kind: TokenKind, range: TextRange) -> Self {
373 Self { kind, range }
374 }
375}
376
377impl TokenKind {
378 #[must_use]
379 pub const fn is_trivia(self) -> bool {
380 matches!(
381 self,
382 Self::Whitespace | Self::LineComment | Self::BlockComment
383 )
384 }
385}
386
387#[derive(Debug, Clone, PartialEq, Eq)]
388pub struct LexDiagnostic {
389 pub message: &'static str,
390 pub range: TextRange,
391}
392
393impl LexDiagnostic {
394 #[must_use]
395 pub const fn new(message: &'static str, range: TextRange) -> Self {
396 Self { message, range }
397 }
398}
399
400#[derive(Debug, Clone, PartialEq, Eq, Default)]
401pub struct Lexed {
402 pub tokens: Vec<Token>,
403 pub diagnostics: Vec<LexDiagnostic>,
404}
405
406#[cfg(test)]
407mod tests {
408 use super::{LexDiagnostic, SourceMap, SourceMapEdit, Token, TokenKind, range_len, text_range};
409
410 #[test]
411 fn text_range_helpers_keep_offsets() {
412 let range = text_range(10, 15);
413 assert_eq!(range_len(range), 5);
414 assert!(!range.is_empty());
415 }
416
417 #[test]
418 fn token_constructor_keeps_fields() {
419 let token = Token::new(TokenKind::Semi, text_range(1, 2));
420 assert_eq!(token.kind, TokenKind::Semi);
421 assert_eq!(token.range, text_range(1, 2));
422 }
423
424 #[test]
425 fn lex_diagnostic_constructor_keeps_fields() {
426 let diagnostic = LexDiagnostic::new("bad token", text_range(2, 4));
427 assert_eq!(diagnostic.message, "bad token");
428 assert_eq!(diagnostic.range, text_range(2, 4));
429 }
430
431 #[test]
432 fn trivia_kinds_are_marked_as_trivia() {
433 assert!(TokenKind::Whitespace.is_trivia());
434 assert!(TokenKind::LineComment.is_trivia());
435 assert!(TokenKind::BlockComment.is_trivia());
436 assert!(!TokenKind::Ident.is_trivia());
437 }
438
439 #[test]
440 fn source_map_can_map_display_offsets_back_to_source_offsets() {
441 let map = SourceMap::from_source_to_display(vec![0, 3, 3, 4]);
442 assert_eq!(map.source_offset_for_display(0), 0);
443 assert_eq!(map.source_offset_for_display(3), 2);
444 assert_eq!(map.source_offset_for_display(4), 3);
445 assert_eq!(map.source_range_from_display_range(0..3), text_range(0, 2));
446 }
447
448 #[test]
449 fn identity_source_map_avoids_index_materialization() {
450 let map = SourceMap::identity(8);
451 assert_eq!(map.display_offset(3), 3);
452 assert_eq!(map.display_offset(99), 8);
453 assert_eq!(map.source_offset_for_display(5), 5);
454 assert_eq!(map.source_offset_for_display(99), 8);
455 assert_eq!(map.source_range_from_display_range(2..6), text_range(2, 6));
456 }
457
458 #[test]
459 fn sparse_source_map_handles_positive_delta() {
460 let map = SourceMap::from_sparse_edits(4, 5, vec![SourceMapEdit::new(1, 2, 1, 3)].into());
461 assert_eq!(map.display_offset(0), 0);
462 assert_eq!(map.display_offset(1), 1);
463 assert_eq!(map.display_offset(2), 3);
464 assert_eq!(map.display_offset(4), 5);
465 assert_eq!(map.source_offset_for_display(0), 0);
466 assert_eq!(map.source_offset_for_display(1), 1);
467 assert_eq!(map.source_offset_for_display(2), 2);
468 assert_eq!(map.source_offset_for_display(3), 2);
469 assert_eq!(map.source_offset_for_display(5), 4);
470 }
471
472 #[test]
473 fn sparse_source_map_handles_negative_delta() {
474 let map = SourceMap::from_sparse_edits(6, 4, vec![SourceMapEdit::new(1, 5, 1, 3)].into());
475 assert_eq!(map.display_offset(0), 0);
476 assert_eq!(map.display_offset(1), 1);
477 assert_eq!(map.display_offset(2), 3);
478 assert_eq!(map.display_offset(5), 3);
479 assert_eq!(map.display_offset(6), 4);
480 assert_eq!(map.source_offset_for_display(0), 0);
481 assert_eq!(map.source_offset_for_display(1), 1);
482 assert_eq!(map.source_offset_for_display(2), 5);
483 assert_eq!(map.source_offset_for_display(3), 5);
484 assert_eq!(map.source_offset_for_display(4), 6);
485 }
486}