qbe_parser/ast/
span.rs

1use line_index::LineCol;
2use std::cmp::Ordering;
3use std::fmt;
4use std::fmt::{Debug, Display, Formatter};
5use std::hash::{Hash, Hasher};
6use std::ops::{Bound, Deref, Range, RangeBounds};
7use text_size::TextSize;
8
9/// An error that occurs when a [`Location`] or [`Span`] is missing.
10///
11/// See [`Location::MISSING`] and [`Span::MISSING`] for more details.
12#[derive(Copy, Clone, Debug, Eq, PartialEq, thiserror::Error)]
13#[error("Missing location information")]
14pub struct MissingLocationError;
15
16/// Either references a location in the original source file,
17/// or is [`Location::MISSING`].
18///
19/// A location has a total order based on the byte offset,
20/// with [`Location::MISSING`] coming after every valid location.
21#[derive(Copy, Clone)]
22pub struct Location {
23    /// This field is either the byte offset or [`u64::MAX`] if the span is unknown.
24    ///
25    /// While the main reason for this representation is efficiency,
26    /// it also means that [`Span::byte_len`] can never overflow.
27    ///
28    /// This field should really be of type `Option<NonMax>`,
29    /// but that is not possible to properly express without pattern types.
30    /// Until this feature becomes stable, it should remain private.
31    byte_offset: u64,
32}
33impl Location {
34    /// Indicates that the location information is missing,
35    /// and doesn't correspond to a known location in the source file.
36    ///
37    /// For the purposes of a comparison using [`Ord`],
38    /// a missing value comes after all present values.
39    pub const MISSING: Location = Location {
40        byte_offset: u64::MAX,
41    };
42
43    /// Create a [`Location`] referencing a byte offset in the source file.
44    ///
45    /// # Panics
46    /// If the byte offset is too large, this will panic.
47    #[inline]
48    pub fn from_byte<T: sealed::PrimUInt>(byte_offset: T) -> Location {
49        let byte_offset = num_traits::cast::<_, u64>(byte_offset)
50            .filter(|&x| x != u64::MAX)
51            .expect("byte offset overflow");
52        Location { byte_offset }
53    }
54
55    /// Determine if the location is [`Self::MISSING`].
56    #[inline]
57    pub const fn is_missing(self) -> bool {
58        self.byte_offset == Self::MISSING.byte_offset
59    }
60
61    /// Return the byte offset of this location,
62    /// or a [`MissingLocationError`] if missing.
63    #[inline]
64    pub const fn byte_offset(self) -> Result<u64, MissingLocationError> {
65        if !self.is_missing() {
66            Ok(self.byte_offset)
67        } else {
68            Err(MissingLocationError)
69        }
70    }
71
72    /// A span which points only to this location.
73    ///
74    /// Returns [`Span::MISSING`] if this location is missing.
75    #[inline]
76    pub fn to_span(self) -> Span {
77        Span {
78            start: self,
79            end: self,
80        }
81    }
82}
83impl PartialEq for Location {
84    #[inline]
85    fn eq(&self, other: &Self) -> bool {
86        self.byte_offset == other.byte_offset
87    }
88}
89impl Eq for Location {}
90impl PartialOrd for Location {
91    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
92        Some(self.cmp(other))
93    }
94}
95impl Ord for Location {
96    #[inline]
97    fn cmp(&self, other: &Self) -> Ordering {
98        const {
99            assert!(
100                Location::MISSING.byte_offset == u64::MAX,
101                "Integer ordering must match semantic ordering"
102            );
103        }
104        self.byte_offset.cmp(&other.byte_offset)
105    }
106}
107impl Display for Location {
108    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
109        match self.byte_offset() {
110            Ok(offset) => write!(f, "{offset}"),
111            Err(MissingLocationError) => f.write_str("?"),
112        }
113    }
114}
115impl Debug for Location {
116    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
117        if self.is_missing() {
118            f.write_str("Location::MISSING")
119        } else {
120            write!(f, "Location({self})")
121        }
122    }
123}
124impl Default for Location {
125    fn default() -> Self {
126        Location::MISSING
127    }
128}
129
130/// References a range of bytes in the original source file.
131///
132/// Can be [missing](Span::MISSING),
133/// in which case doesn't correspond to any actual portion of the source file.
134/// This is the [`Default`] value.
135///
136/// For the purposes of comparisons, all spans are considered equal to each other.
137/// This means that calling [`PartialEq::eq`] just returns `true`,
138/// calling [`Hash::hash`] does nothing
139/// and calling [`Ord::cmp`] always returns [`Ordering::Equal`].
140/// This means that adding a [`Span`] field to a type
141/// can never impact the derived implementations of these traits.
142#[derive(Copy, Clone)]
143pub struct Span {
144    // this is private in the hopes it can someday become a
145    // `pub byte_offsets: Option<std::range::Range<NonMaxU64>>`
146    // See `Location` for a similar  hope
147    start: Location,
148    end: Location,
149}
150impl Span {
151    /// Indicates that the span information is missing,
152    /// and doesn't correspond to a known portion of the source file.
153    pub const MISSING: Span = Span {
154        start: Location::MISSING,
155        end: Location::MISSING,
156    };
157
158    /// Create a span with the specified start and end locations.
159    ///
160    /// If either location is [missing], this will return [`Span::MISSING`].
161    ///
162    /// # Panics
163    /// If the start location comes after the end location, this function will panic.
164    /// If either location is [missing], this function is guaranteed to succeed.
165    ///
166    /// [missing]: Location::MISSING
167    #[inline]
168    #[track_caller]
169    pub fn new(start: Location, end: Location) -> Span {
170        // don't use assert! because missing locations will return false
171        assert!(start <= end, "start > end: {start} > {end}");
172        Span { start, end }
173    }
174
175    /// Create a span from the specified range of byte indexes.
176    ///
177    /// # Panics
178    /// This will panic if the start location comes after the end location,
179    /// just like [`Span::new`].
180    /// This will also panic if either offset overflows a [`Location`],
181    /// just like [`Location::from_byte`].
182    #[inline]
183    #[track_caller]
184    pub fn from_byte_range<T: sealed::PrimUInt>(range: Range<T>) -> Self {
185        assert!(
186            range.start <= range.end,
187            "start > end: {} > {}",
188            range.start,
189            range.end
190        );
191        Span {
192            start: Location::from_byte(range.start),
193            end: Location::from_byte(range.end),
194        }
195    }
196
197    /// Return a wrapper type that implements by-value equality
198    /// rather than having all values be equal.
199    #[inline]
200    pub fn eq(self) -> OrderedSpan {
201        OrderedSpan(self)
202    }
203
204    /// Return a wrapper type that implements by-value ordering
205    /// rather than having all values be equal.
206    #[inline]
207    pub fn ord(self) -> OrderedSpan {
208        OrderedSpan(self)
209    }
210
211    /// Check if the span is [`Self::MISSING`].
212    #[inline]
213    pub const fn is_missing(&self) -> bool {
214        self.start.is_missing() || self.end.is_missing()
215    }
216
217    /// Return the range of bytes in the original file,
218    /// or a [`MissingLocationError`] if missing.
219    #[inline]
220    pub fn byte_range(&self) -> Result<Range<u64>, MissingLocationError> {
221        Ok(self.start.byte_offset()?..self.end.byte_offset()?)
222    }
223
224    /// Returns the length of the span in bytes,
225    /// or a [`MissingLocationError`] if missing.
226    #[inline]
227    pub fn byte_len(&self) -> Result<u64, MissingLocationError> {
228        // NOTE: Overflow is impossible since end cannot be u64::MAX
229        Ok(self.end.byte_offset()? - self.start.byte_offset()?)
230    }
231
232    /// Slice the bytes indices this span, returning a subset of its indexes.
233    ///
234    /// If the original span is [missing](Span::MISSING), the result will be too.
235    ///
236    /// # Panics
237    /// Referencing an index that exceeds the byte length of this span will trigger a panic.
238    /// This will also panic If the start of the range exceeds the end of the range,
239    ///
240    /// # Examples
241    /// ```
242    /// use qbe_parser::ast::Span;
243    /// let x = Span::from_byte_range::<u32>(5..20);
244    /// assert_eq!(
245    ///     x.slice_byte_indexes(2..).byte_range(),
246    ///     Ok(7..20)
247    /// );
248    /// assert_eq!(
249    ///     x.slice_byte_indexes(3..5).byte_range(),
250    ///     Ok(8..10)
251    /// );
252    /// assert_eq!(
253    ///     x.slice_byte_indexes(3..=5).byte_range(),
254    ///     Ok(8..11)
255    /// );
256    /// ```
257    /// The following code will panic due to referencing an out-of-bounds indexes.
258    /// ```should_panic
259    /// use qbe_parser::ast::Span;
260    /// let x = Span::from_byte_range::<u32>(5..20);
261    /// assert_eq!(x.byte_len(), Ok(15));
262    /// x.slice_byte_indexes(100..); // panics
263    /// ```
264    #[track_caller]
265    pub fn slice_byte_indexes<R>(self, range: R) -> Self
266    where
267        R: RangeBounds<u64> + Debug,
268    {
269        let old_byte_range = match self.byte_range() {
270            Ok(range) => range,
271            Err(MissingLocationError) => return Self::MISSING,
272        };
273        let byte_len = self.byte_len().unwrap();
274        let start_offset = match range.start_bound() {
275            Bound::Included(&start) => Some(start),
276            Bound::Excluded(&start) => start.checked_add(1),
277            Bound::Unbounded => Some(0),
278        }
279        .unwrap_or_else(|| panic!("Start of range overflowed a Location: {range:?}"));
280        let end_offset = match range.end_bound() {
281            Bound::Included(&end) => end.checked_add(1),
282            Bound::Excluded(&end) => Some(end),
283            Bound::Unbounded => {
284                // We check `start <= end` first to avoid a `start_offset <= byte_len` check.
285                // A naive implementation returning `byte_len` when `end_bound = Bound::Unbounded`
286                // would give an inaccurate error message when `start_index` overflows the len.
287                // It would claim that issue is `start > end` when the real issue is `start > len`
288                //
289                // To avoid this we insert a `max` operation ,
290                // making the first check pass in this case but still failing the second check.
291                Some(byte_len.max(start_offset))
292            }
293        }
294        .unwrap_or_else(|| panic!("End of range overflowed a Location: {range:?}"));
295        // This catches a more serious issue than the bounds check does.
296        // A range with end >= start is always invalid,
297        // while a range with an index oob can in other contexts be valid.
298        assert!(
299            start_offset <= end_offset,
300            "Invalid range has start > end: {range:?}"
301        );
302        // Can omit `start_offset <= byte_len` check due to the above check
303        // that `start <= end` and our careful handling of `end == Unbounded`.
304        assert!(
305            end_offset <= byte_len,
306            "Range overflows {self:?} ({byte_len} bytes): {range:?}"
307        );
308        // NOTE: Overflow should not be possible here
309        let new_byte_range =
310            (old_byte_range.start + start_offset)..(old_byte_range.start + end_offset);
311        Self::from_byte_range(new_byte_range)
312    }
313
314    /// The starting location of the span (inclusive),
315    /// or [`Location::MISSING`] if missing.
316    #[inline]
317    pub fn start(&self) -> Location {
318        self.start
319    }
320
321    /// The ending location of the span (exclusive),
322    /// or [`Location::MISSING`] if missing.
323    #[inline]
324    pub fn end(&self) -> Location {
325        self.end
326    }
327}
328impl From<chumsky::span::SimpleSpan> for Span {
329    #[inline]
330    fn from(span: chumsky::span::SimpleSpan) -> Self {
331        Self::from_byte_range(span.into_range())
332    }
333}
334impl From<Location> for Span {
335    fn from(value: Location) -> Self {
336        value.to_span()
337    }
338}
339impl Display for Span {
340    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
341        match self.byte_range() {
342            Ok(Range { start, end }) => write!(f, "{start}..{end}"),
343            Err(MissingLocationError) => f.write_str("?"),
344        }
345    }
346}
347impl Debug for Span {
348    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
349        if f.alternate() {
350            // If we are doing alternate debug, omit the details from the span
351            // This way it doesn't show up in the diff of `similar_asserts::assert_eq`
352            f.debug_tuple("Span").finish_non_exhaustive()
353        } else if self.is_missing() {
354            f.write_str("Span::MISSING")
355        } else {
356            write!(f, "Span({self})")
357        }
358    }
359}
360impl Default for Span {
361    fn default() -> Self {
362        Span::MISSING
363    }
364}
365/// Unconditionally returns `true`,
366/// so that adding a Span to a struct will not affect `derive(Eq)`
367impl PartialEq for Span {
368    fn eq(&self, _other: &Span) -> bool {
369        // all spans are equal
370        true
371    }
372}
373impl Eq for Span {}
374/// Hashes the span.
375///
376/// This is guaranteed to do nothing,
377/// so adding it to a struct will not affect the result of `derive(Hash)`.
378impl Hash for Span {
379    fn hash<H: Hasher>(&self, state: &mut H) {
380        // does not do anything, per type guarantees
381        let _ = state;
382    }
383}
384impl PartialOrd for Span {
385    #[inline]
386    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
387        Some(self.cmp(other))
388    }
389}
390impl Ord for Span {
391    #[inline]
392    fn cmp(&self, _other: &Self) -> Ordering {
393        // all spans are equal
394        Ordering::Equal
395    }
396}
397impl From<OrderedSpan> for Span {
398    #[inline]
399    fn from(value: OrderedSpan) -> Self {
400        value.0
401    }
402}
403/// A wrapper around [`Span`] that properly implements [`Eq`] and [`Ord`]
404/// installed of always being [`Ordering::Equal`].
405///
406/// A [`Span::MISSING`] is considered greater than all other spans.
407#[derive(Debug, Copy, Clone)]
408pub struct OrderedSpan(pub Span);
409impl OrderedSpan {
410    /// The [`OrderedSpan`] corresponding to [`Span::MISSING`].
411    pub const MISSING: Self = OrderedSpan(Span::MISSING);
412}
413impl From<Span> for OrderedSpan {
414    #[inline]
415    fn from(span: Span) -> Self {
416        OrderedSpan(span)
417    }
418}
419impl Hash for OrderedSpan {
420    fn hash<H: Hasher>(&self, state: &mut H) {
421        match self.byte_range() {
422            Ok(o) => (o.start, o.end).hash(state),
423            Err(MissingLocationError) => state.write_u8(0),
424        }
425    }
426}
427impl PartialEq for OrderedSpan {
428    #[inline]
429    fn eq(&self, other: &Self) -> bool {
430        self.byte_range() == other.byte_range()
431    }
432}
433impl PartialEq<Span> for OrderedSpan {
434    fn eq(&self, other: &Span) -> bool {
435        self.byte_range() == other.byte_range()
436    }
437}
438impl Eq for OrderedSpan {}
439impl PartialOrd for OrderedSpan {
440    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
441        Some(self.cmp(other))
442    }
443}
444impl PartialOrd<Span> for OrderedSpan {
445    fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
446        Some(self.cmp(&other.ord()))
447    }
448}
449impl Ord for OrderedSpan {
450    fn cmp(&self, other: &Self) -> Ordering {
451        match (self.0.byte_range(), other.0.byte_range()) {
452            (Ok(this_range), Ok(other_range)) => this_range
453                .start
454                .cmp(&other_range.start)
455                .then_with(|| this_range.end.cmp(&other_range.end)),
456            (Err(_), Ok(_)) => Ordering::Greater,
457            (Ok(_), Err(_)) => Ordering::Less,
458            (Err(_), Err(_)) => Ordering::Equal,
459        }
460    }
461}
462impl Deref for OrderedSpan {
463    type Target = Span;
464    #[inline]
465    fn deref(&self) -> &Self::Target {
466        &self.0
467    }
468}
469impl chumsky::span::Span for Span {
470    type Context = ();
471    type Offset = ByteLocation;
472
473    fn new(_context: Self::Context, range: Range<Self::Offset>) -> Self {
474        Span::new(*range.start, *range.end)
475    }
476
477    #[inline]
478    fn context(&self) -> Self::Context {}
479
480    #[inline]
481    fn start(&self) -> Self::Offset {
482        ByteLocation(self.start)
483    }
484
485    #[inline]
486    fn end(&self) -> Self::Offset {
487        ByteLocation(self.end)
488    }
489}
490/// A wrapper around a [`Location`] that implements `From<usize>`.
491///
492/// The [`Location`] type itself doesn't implement `From<usize>`,
493/// because it wants to be absolutely clear that byte offsets are being used
494/// instead of than codepoint offsets.
495///
496/// Needed to implement [`chumsky::span::Span`] in a manner compatible with [`chumsky::input::MappedSpan`].
497#[derive(Copy, Clone, Debug, Eq, PartialEq)]
498pub struct ByteLocation(pub Location);
499impl Deref for ByteLocation {
500    type Target = Location;
501    #[inline]
502    fn deref(&self) -> &Self::Target {
503        &self.0
504    }
505}
506impl From<Location> for ByteLocation {
507    #[inline]
508    fn from(location: Location) -> Self {
509        ByteLocation(location)
510    }
511}
512impl From<usize> for ByteLocation {
513    #[inline]
514    fn from(value: usize) -> Self {
515        ByteLocation(Location::from_byte(value))
516    }
517}
518impl From<ByteLocation> for Location {
519    #[inline]
520    fn from(location: ByteLocation) -> Self {
521        location.0
522    }
523}
524impl From<ByteLocation> for Span {
525    fn from(location: ByteLocation) -> Self {
526        location.0.to_span()
527    }
528}
529
530/// A [`Location`] resolved into line/column information.
531#[derive(Copy, Clone)]
532pub struct ResolvedLocation {
533    original: Location,
534    resolved: LineCol,
535}
536impl ResolvedLocation {
537    pub const MISSING: ResolvedLocation = ResolvedLocation {
538        original: Location::MISSING,
539        resolved: LineCol { line: 0, col: 0 },
540    };
541
542    #[inline]
543    pub fn is_missing(&self) -> bool {
544        self.original.is_missing()
545    }
546    #[inline]
547    pub fn line(&self) -> Result<u64, MissingLocationError> {
548        if !self.is_missing() {
549            Ok(u64::from(self.resolved.line).strict_add(1))
550        } else {
551            Err(MissingLocationError)
552        }
553    }
554    #[inline]
555    pub fn column(&self) -> Result<u64, MissingLocationError> {
556        if !self.is_missing() {
557            Ok(self.resolved.col.into())
558        } else {
559            Err(MissingLocationError)
560        }
561    }
562    #[inline]
563    pub fn to_location(&self) -> Location {
564        self.original
565    }
566}
567#[allow(clippy::missing_fields_in_debug, reason = "intentional")]
568impl Debug for ResolvedLocation {
569    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
570        if self.is_missing() {
571            f.write_str("ResolvedLocation::MISSING")
572        } else {
573            f.debug_struct("ResolvedLocation")
574                .field("byte_offset", &self.original.byte_offset().unwrap())
575                .field("line", &self.line().unwrap())
576                .field("column", &self.column().unwrap())
577                .finish()
578        }
579    }
580}
581impl Display for ResolvedLocation {
582    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
583        if self.is_missing() {
584            f.write_str("ResolvedLocation::MISSING")
585        } else {
586            write!(f, "{}:{}", self.line().unwrap(), self.column().unwrap())
587        }
588    }
589}
590#[derive(Copy, Clone, Debug)]
591pub struct ResolvedSpan {
592    start: ResolvedLocation,
593    end: ResolvedLocation,
594}
595impl ResolvedSpan {
596    #[inline]
597    pub fn is_missing(&self) -> bool {
598        self.start.is_missing() || self.end.is_missing()
599    }
600    pub fn to_span(&self) -> Span {
601        Span::new(self.start.to_location(), self.end.to_location())
602    }
603}
604impl Display for ResolvedSpan {
605    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
606        if self.is_missing() {
607            f.write_str("ResolvedSpan::MISSING")
608        } else {
609            write!(f, "{}..", self.start)?;
610            if self.start.line() != self.end.line() {
611                write!(f, "{}:", self.end.line().unwrap())?;
612            }
613            write!(f, "{}", self.end.column().unwrap())
614        }
615    }
616}
617pub struct LocationIndex {
618    line_index: line_index::LineIndex,
619}
620impl LocationIndex {
621    pub fn new(text: &str) -> Self {
622        LocationIndex {
623            line_index: line_index::LineIndex::new(text),
624        }
625    }
626    pub fn resolve(&self, span: Span) -> Result<ResolvedSpan, LocationResolveError> {
627        Ok(ResolvedSpan {
628            start: self.resolve_location(span.start())?,
629            end: self.resolve_location(span.end())?,
630        })
631    }
632    pub fn resolve_location(
633        &self,
634        loc: Location,
635    ) -> Result<ResolvedLocation, LocationResolveError> {
636        let Ok(byte_offset) = loc.byte_offset() else {
637            return Ok(ResolvedLocation::MISSING);
638        };
639        let byte_offset = u32::try_from(byte_offset).map_err(|_| LocationResolveError {
640            location: loc,
641            _reason_overflowed_a_usize: (),
642        })?;
643        Ok(ResolvedLocation {
644            original: Location::from_byte(byte_offset),
645            resolved: self.line_index.line_col(TextSize::new(byte_offset)),
646        })
647    }
648}
649#[derive(Debug, thiserror::Error)]
650#[error("Cannot resolve {location}: overflowed a usize")]
651pub struct LocationResolveError {
652    location: Location,
653    _reason_overflowed_a_usize: (),
654}
655
656mod sealed {
657    use std::fmt::{Debug, Display};
658
659    /// An internal trait for unsigned primitive integers.
660    ///
661    /// Used for [`Location::from_byte`] and [`Span::from_byte_range`].
662    ///
663    /// Unlike [`num_traits::PrimInt`], this is is restricted to primitive integers only.
664    pub trait PrimUInt: Display + Debug + num_traits::PrimInt + num_traits::Unsigned {}
665    macro_rules! impl_prim_uints {
666        ($($target:ident),+ $(,)?) => {
667            $(impl PrimUInt for $target {})*
668        };
669    }
670    impl_prim_uints!(u8, u16, u32, u64, usize, u128);
671}