ilex/file/
mod.rs

1//! Source code file management.
2
3use std::cell::RefCell;
4use std::fmt;
5use std::fmt::Write;
6use std::iter;
7use std::ops::Bound;
8use std::ops::Index;
9use std::ops::RangeBounds;
10use std::ptr;
11use std::slice;
12use std::sync::RwLockReadGuard;
13
14use camino::Utf8Path;
15
16use crate::report::Fatal;
17use crate::report::Report;
18use crate::rt;
19use crate::spec::Spec;
20use crate::token;
21use crate::Never;
22
23mod context;
24pub use context::Context;
25
26/// An input source file.
27#[derive(Copy, Clone)]
28pub struct File<'ctx> {
29  path: &'ctx Utf8Path,
30  text: &'ctx str,
31  ctx: &'ctx Context,
32  idx: usize,
33}
34
35impl<'ctx> File<'ctx> {
36  /// Returns the name of this file, as a path.
37  pub fn path(self) -> &'ctx Utf8Path {
38    self.path
39  }
40
41  /// Returns the textual contents of this file. This function takes a range,
42  /// since immediately slicing the file text is an extremely common operation.
43  ///
44  /// To get the whole file, use `file.text(..)`.
45  pub fn text<R>(self, range: R) -> &'ctx str
46  where
47    str: Index<R, Output = str>,
48  {
49    // Text contains an extra space at the very end for the EOF
50    // span to use if necessary.
51    //
52    // XXX: Apparently rustc forgets about other <str as Index> impls if we use
53    // text[..x] here??
54    let text = &self.text.get(..self.text.len() - 1).unwrap();
55    &text[range]
56  }
57
58  /// Returns the length of this file in bytes.
59  #[allow(clippy::len_without_is_empty)]
60  pub fn len(self) -> usize {
61    self.text(..).len()
62  }
63
64  pub(crate) fn text_with_extra_space(self) -> &'ctx str {
65    self.text
66  }
67
68  /// Returns the [`Context`] that owns this file.
69  pub fn context(self) -> &'ctx Context {
70    self.ctx
71  }
72
73  /// Creates a new [`Span`] for diagnostics from this file.
74  ///
75  /// # Panics
76  ///
77  /// Panics if `start > end`, or if `end` is greater than the length of the
78  /// file.
79  pub fn span(self, range: impl RangeBounds<usize>) -> Span {
80    Span::new(self, range)
81  }
82
83  pub(crate) fn idx(self) -> usize {
84    self.idx
85  }
86
87  /// Tokenizes the this file according to `spec` and generates a token stream.
88  pub fn lex(
89    self,
90    spec: &'ctx Spec,
91    report: &Report,
92  ) -> Result<token::Stream<'ctx>, Fatal> {
93    rt::lex(self, report, spec)
94  }
95}
96
97impl PartialEq for File<'_> {
98  fn eq(&self, other: &Self) -> bool {
99    ptr::eq(self.ctx, other.ctx) && self.idx == other.idx
100  }
101}
102
103/// A range within a [`File`].
104///
105/// Full span information (such as comments) is not necessary for diagnostics,
106/// so anything that implements [`Spanned`] is suitable for placing spanned data
107/// in diagnostics.
108#[derive(Copy, Clone)]
109pub struct Span {
110  file: u32,
111  start: u32,
112  end: u32,
113}
114
115/// An interned [`Span`].
116///
117/// Most tokens' spans will never be inspected after lexing, so it's better to
118/// make them small for memory saving reasons. This abstraction allows the
119/// library to optimize internal handling of spans over time.
120///
121/// This type is just a numeric ID; in order to do anything with it, you'll
122/// need to call one of the functions in [`Spanned`].
123#[derive(Copy, Clone)]
124pub struct SpanId(u32);
125
126impl fmt::Debug for SpanId {
127  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
128    CTX_FOR_SPAN_DEBUG.with(|ctx| {
129      let ctx = ctx.borrow();
130      let Some(ctx) = &*ctx else {
131        return f.write_str("<elided>");
132      };
133
134      fmt::Debug::fmt(&Spanned::span(&self, ctx), f)
135    })
136  }
137}
138
139impl Span {
140  /// Constructs a span from a file and a byte range within it.
141  ///
142  /// # Panics
143  ///
144  /// Panics if `start > end`, or if `end` is greater than the length of the
145  /// file.
146  #[track_caller]
147  pub(crate) fn new<T: Copy + TryInto<u32> + fmt::Debug>(
148    file: File,
149    range: impl RangeBounds<T>,
150  ) -> Span {
151    let start = match range.start_bound() {
152      Bound::Included(&x) => cast(x),
153      Bound::Excluded(&x) => cast(x).saturating_add(1),
154      Bound::Unbounded => 0,
155    };
156
157    let end = match range.end_bound() {
158      Bound::Included(&x) => cast(x).saturating_add(1),
159      Bound::Excluded(&x) => cast(x),
160      Bound::Unbounded => file.len() as u32,
161    };
162
163    assert!(start <= end, "out of order range: {start} > {end}",);
164    assert!(
165      end as usize <= file.text.len(),
166      "got out of bounds range: {end} > {}",
167      file.text.len(),
168    );
169
170    Span { file: file.idx() as u32, start, end }
171  }
172
173  /// Gets the file for this span.
174  ///
175  /// # Panics
176  ///
177  /// May panic if this span is not owned by `ctx` (or it may produce an
178  /// unexpected result).
179  pub fn file(self, ctx: &Context) -> File {
180    ctx.file(self.file as usize).unwrap()
181  }
182
183  /// Returns the start (inclusive) byte offset of this span.
184  pub fn start(self) -> usize {
185    self.start as usize
186  }
187
188  /// Returns the end (exclusive) byte offset of this span.
189  pub fn end(self) -> usize {
190    self.end as usize
191  }
192
193  /// Returns whether this span has zero length.
194  pub fn is_empty(self) -> bool {
195    self.len() == 0
196  }
197
198  /// Returns the length of this span, in bytes.
199  pub fn len(self) -> usize {
200    (self.end - self.start) as usize
201  }
202
203  /// Gets the comment associated with this span, if any.
204  ///
205  /// # Panics
206  ///
207  /// May panic if this span is not owned by `ctx` (or it may produce an
208  /// unexpected result).
209  pub fn comments(self, ctx: &Context) -> Comments {
210    Comments {
211      slice: ctx.lookup_comments(self.file(ctx), self.start()),
212      ctx,
213    }
214  }
215
216  /// Returns a subspan of this range.
217  ///
218  /// # Panics
219  ///
220  /// Panics if `start` > `end` or `end` > `self.len()`.
221  pub fn subspan<T: Copy + TryInto<u32> + fmt::Debug>(
222    self,
223    range: impl RangeBounds<T>,
224  ) -> Span {
225    let start = match range.start_bound() {
226      Bound::Included(&x) => cast(x),
227      Bound::Excluded(&x) => cast(x).saturating_add(1),
228      Bound::Unbounded => 0,
229    };
230
231    let end = match range.end_bound() {
232      Bound::Included(&x) => cast(x).saturating_add(1),
233      Bound::Excluded(&x) => cast(x),
234      Bound::Unbounded => self.len() as u32,
235    };
236
237    assert!(start <= end, "out of order range: {start} > {end}");
238    assert!(
239      end <= (self.len() as u32),
240      "subspan ends past end of range: {end} > {}",
241      self.len()
242    );
243
244    Span {
245      file: self.file,
246      start: self.start + start,
247      end: self.start + end,
248    }
249  }
250
251  /// Splits this range in two at `at`.
252  ///
253  /// # Panics
254  ///
255  /// Panics if `at` is larger than the length of this range.
256  pub fn split_at(self, at: usize) -> (Span, Span) {
257    (self.subspan(..at), self.subspan(at..))
258  }
259
260  /// Splits off a prefix and a suffix from `range`, and returns the split
261  /// parts in order.
262  ///
263  /// # Panics
264  ///
265  /// Panics if `range` is smaller than `pre + suf`.
266  pub fn split_around(self, pre: usize, suf: usize) -> [Span; 3] {
267    let (pre, range) = self.split_at(pre);
268    let (range, suf) = range.split_at(range.len() - suf);
269    [pre, range, suf]
270  }
271
272  /// Looks up the textual content of this range.
273  ///
274  /// # Panics
275  ///
276  /// May panic if this range is not owned by `ctx` (or it may produce an
277  /// unexpected result).
278  pub fn text(self, ctx: &Context) -> &str {
279    self.file(ctx).text(self.start as usize..self.end as usize)
280  }
281
282  /// Joins together a collection of ranges.
283  ///
284  /// # Panics
285  ///
286  /// May panic if not all spans are for the same file, or if the iterator
287  /// is empty.
288  pub fn union(ranges: impl IntoIterator<Item = Span>) -> Span {
289    let mut best = None;
290
291    for range in ranges {
292      let best = best.get_or_insert(range);
293
294      assert_eq!(
295        best.file, range.file,
296        "attempted to join spans of different files"
297      );
298
299      best.start = u32::min(best.start, range.start);
300      best.end = u32::max(best.end, range.end);
301    }
302
303    best.expect("attempted to join zero spans")
304  }
305
306  /// Bakes this range into a span.
307  pub(crate) fn intern(self, ctx: &Context) -> SpanId {
308    ctx.new_span(self)
309  }
310
311  /// Bakes this range into a span.
312  pub(crate) fn intern_nonempty(self, ctx: &Context) -> Option<SpanId> {
313    if self.is_empty() {
314      return None;
315    }
316    Some(self.intern(ctx))
317  }
318
319  /// Sets the comment associated with a given span. The comment must itself
320  /// be specified as a span.
321  pub(crate) fn append_comment_span(self, ctx: &Context, comment: SpanId) {
322    ctx.add_comment(self.file(ctx), self.start(), comment)
323  }
324}
325
326/// A syntax element which contains a span.
327///
328/// You should implement this type for any type which naturally has a single
329/// span that describes it.
330pub trait Spanned {
331  /// Returns the span in this syntax element.
332  fn span(&self, ctx: &Context) -> Span;
333
334  /// Forwards to [`SpanId::file()`].
335  fn file<'ctx>(&self, ctx: &'ctx Context) -> File<'ctx> {
336    self.span(ctx).file(ctx)
337  }
338
339  /// Forwards to [`Span::start()`].
340  fn start(&self, ctx: &Context) -> usize {
341    self.span(ctx).start()
342  }
343
344  /// Forwards to [`Span::end()`].
345  fn end(&self, ctx: &Context) -> usize {
346    self.span(ctx).end()
347  }
348
349  /// Forwards to [`Span::is_empty()`].
350  fn is_empty(&self, ctx: &Context) -> bool {
351    self.span(ctx).is_empty()
352  }
353
354  /// Forwards to [`Span::len()`].
355  fn len(&self, ctx: &Context) -> usize {
356    self.span(ctx).len()
357  }
358
359  /// Forwards to [`SpanId::text()`].
360  fn text<'ctx>(&self, ctx: &'ctx Context) -> &'ctx str {
361    self.span(ctx).text(ctx)
362  }
363
364  /// Forwards to [`SpanId::comments()`].
365  fn comments<'ctx>(&self, ctx: &'ctx Context) -> Comments<'ctx> {
366    self.span(ctx).comments(ctx)
367  }
368}
369
370impl Spanned for SpanId {
371  fn span(&self, ctx: &Context) -> Span {
372    ctx.lookup_range(*self)
373  }
374}
375
376// Spans are spanned by their own spans.
377impl Spanned for Span {
378  fn span(&self, _ctx: &Context) -> Span {
379    *self
380  }
381}
382
383impl<S: Spanned> Spanned for &S {
384  fn span(&self, ctx: &Context) -> Span {
385    S::span(self, ctx)
386  }
387}
388
389impl Spanned for Never {
390  fn span(&self, _ctx: &Context) -> Span {
391    self.from_nothing_anything()
392  }
393}
394
395thread_local! {
396  static CTX_FOR_SPAN_DEBUG: RefCell<Option<Context>> = RefCell::new(None);
397}
398
399impl fmt::Debug for Span {
400  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
401    CTX_FOR_SPAN_DEBUG.with(|ctx| {
402      if let Some(ctx) = &*ctx.borrow() {
403        let text = self.text(ctx);
404        write!(f, "`")?;
405        for c in text.chars() {
406          if ('\x20'..'\x7e').contains(&c) {
407            f.write_char(c)?;
408          } else if c < '\x20' {
409            write!(f, "{}", c.escape_debug())?
410          } else {
411            write!(f, "<U+{:04X}>", c as u32)?;
412          }
413        }
414        write!(f, "` @ {}", self.file(ctx).path())?;
415      } else {
416        write!(f, "<#{}>", self.file)?;
417      }
418
419      write!(f, "[{}..{}]", Span::start(*self), Span::end(*self))
420    })
421  }
422}
423
424/// An iterator over the comment spans attached to a [`SpanId`].
425pub struct Comments<'ctx> {
426  slice: (RwLockReadGuard<'ctx, context::State>, *const [SpanId]),
427  ctx: &'ctx Context,
428}
429
430impl<'ctx> Comments<'ctx> {
431  /// Adapts this iterator to return just the text contents of each [`SpanId`].
432  pub fn as_strings(&self) -> impl Iterator<Item = &'_ str> {
433    unsafe { &*self.slice.1 }
434      .iter()
435      .map(|span| span.text(self.ctx))
436  }
437}
438
439impl<'a> IntoIterator for &'a Comments<'_> {
440  type Item = SpanId;
441  type IntoIter = iter::Copied<slice::Iter<'a, SpanId>>;
442
443  fn into_iter(self) -> Self::IntoIter {
444    unsafe { &*self.slice.1 }.iter().copied()
445  }
446}
447
448#[track_caller]
449fn cast<T: Copy + TryInto<u32> + fmt::Debug>(value: T) -> u32 {
450  value
451    .try_into()
452    .unwrap_or_else(|_| bug!("range bound does not fit into u32: {:?}", value))
453}