Skip to main content

rustledger_core/
span.rs

1//! Source location tracking.
2
3use serde::{Deserialize, Serialize};
4use std::fmt;
5use std::ops::Range;
6
7/// A span in the source code, represented as a byte range.
8///
9/// # `#[non_exhaustive]` policy
10///
11/// Deliberately NOT `#[non_exhaustive]`, unlike
12/// `rustledger_parser::{ParseResult, ParseError, ParseErrorKind}`.
13/// `Span` is constructed via struct literal in hundreds of call sites
14/// across the workspace (every parser rule, every test fixture, every
15/// LSP/FFI/loader path that synthesizes a location). Marking it
16/// non-exhaustive would force a workspace-wide migration to
17/// [`Span::new`] for zero practical benefit — the struct has carried
18/// the same two fields since the project's inception and there is no
19/// realistic future field that would justify breaking that surface.
20/// If a future need arises (e.g., `line: Option<u32>` for faster LSP
21/// position lookups), the right move is to add a sibling type with
22/// `non_exhaustive` rather than retrofit it onto `Span`.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
24#[cfg_attr(
25    feature = "rkyv",
26    derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
27)]
28pub struct Span {
29    /// Start byte offset (inclusive).
30    pub start: usize,
31    /// End byte offset (exclusive).
32    pub end: usize,
33}
34
35impl Span {
36    /// The zero span (`0..0`). Used as the location for programmatically
37    /// synthesized values that have no source representation. Pair with
38    /// [`SYNTHESIZED_FILE_ID`] on the containing [`Spanned`] to make the
39    /// "no source" intent unambiguous.
40    ///
41    /// ```
42    /// use rustledger_core::Span;
43    /// assert_eq!(Span::ZERO, Span::new(0, 0));
44    /// assert!(Span::ZERO.is_empty());
45    /// ```
46    pub const ZERO: Self = Self { start: 0, end: 0 };
47
48    /// Create a new span.
49    #[must_use]
50    pub const fn new(start: usize, end: usize) -> Self {
51        Self { start, end }
52    }
53
54    /// Create a span from a range.
55    #[must_use]
56    pub const fn from_range(range: Range<usize>) -> Self {
57        Self {
58            start: range.start,
59            end: range.end,
60        }
61    }
62
63    /// Get the length of this span in bytes.
64    #[must_use]
65    pub const fn len(&self) -> usize {
66        self.end - self.start
67    }
68
69    /// Check if the span is empty.
70    #[must_use]
71    pub const fn is_empty(&self) -> bool {
72        self.start == self.end
73    }
74
75    /// Merge this span with another, returning a span that covers both.
76    #[must_use]
77    pub fn merge(&self, other: &Self) -> Self {
78        Self {
79            start: self.start.min(other.start),
80            end: self.end.max(other.end),
81        }
82    }
83
84    /// Get the source text for this span.
85    #[must_use]
86    pub fn text<'a>(&self, source: &'a str) -> &'a str {
87        &source[self.start..self.end]
88    }
89
90    /// Convert to a byte-offset `Range<usize>` for downstream span consumers.
91    #[must_use]
92    pub const fn into_range(self) -> Range<usize> {
93        self.start..self.end
94    }
95}
96
97impl From<Range<usize>> for Span {
98    fn from(range: Range<usize>) -> Self {
99        Self::from_range(range)
100    }
101}
102
103impl From<Span> for Range<usize> {
104    fn from(span: Span) -> Self {
105        span.start..span.end
106    }
107}
108
109impl fmt::Display for Span {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        write!(f, "{}..{}", self.start, self.end)
112    }
113}
114
115/// Sentinel `file_id` indicating a directive was synthesized by a plugin
116/// rather than parsed from a source file.
117///
118/// Regular source files get sequential IDs starting at 0 (see
119/// `rustledger_loader::SourceMap::add_file`), so this sentinel is safely out
120/// of the normal range. Code that formats error locations or looks up files
121/// in a `SourceMap` should treat this as "no source location" and, where
122/// appropriate, hint to the user that a plugin generated the directive.
123///
124/// See issue #896.
125pub const SYNTHESIZED_FILE_ID: u16 = u16::MAX;
126
127/// A value with an associated source location (span and file).
128///
129/// `PartialEq` / `Eq` / `Hash` are implemented manually to delegate to
130/// the inner value only — two `Spanned<T>` values are considered equal
131/// when their `T`s are equal, regardless of where they came from in
132/// source. This matches the principle that "what" a value is should
133/// not depend on where it lives. Consumers that genuinely need
134/// location-sensitive equality compare `.span` and `.file_id`
135/// explicitly.
136///
137/// Note: the rkyv-archived form (`ArchivedSpanned<T>`, present under the
138/// `rkyv` feature) does **not** automatically receive `PartialEq` /
139/// `Eq`. The host doesn't compare archived values today; if a future
140/// code path needs to, add `rkyv(compare = (PartialEq))` to the derive
141/// attribute below or hand-roll a manual impl on the archived type.
142///
143/// # `#[non_exhaustive]` policy
144///
145/// Deliberately NOT `#[non_exhaustive]`, for the same reason as
146/// [`Span`]: it is constructed via struct literal in hundreds of
147/// call sites and the field set is intentionally minimal and stable.
148/// Add fields cautiously; if a new field is genuinely needed, prefer
149/// a sibling/wrapper type over modifying this one in place.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151#[cfg_attr(
152    feature = "rkyv",
153    derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
154)]
155pub struct Spanned<T> {
156    /// The value.
157    pub value: T,
158    /// The source span (byte offsets within the file).
159    pub span: Span,
160    /// The source file ID (index into `SourceMap`).
161    /// Uses `u16` to minimize struct size (max 65,535 files).
162    pub file_id: u16,
163}
164
165impl<T> Spanned<T> {
166    /// Create a new spanned value with `file_id` defaulting to 0.
167    ///
168    /// Use `with_file_id` to set the correct file ID after creation.
169    #[must_use]
170    pub const fn new(value: T, span: Span) -> Self {
171        Self {
172            value,
173            span,
174            file_id: 0,
175        }
176    }
177
178    /// Wrap a value that was programmatically synthesized (no source
179    /// representation). Uses [`Span::ZERO`] and [`SYNTHESIZED_FILE_ID`]
180    /// so downstream consumers can detect "no source" without sentinel
181    /// checks on the inner value's fields.
182    ///
183    /// Used by plugin-synthesized AST nodes, test fixtures, CLI commands
184    /// that build directives in-memory, and any other producer that does
185    /// not parse from source bytes.
186    #[must_use]
187    pub const fn synthesized(value: T) -> Self {
188        Self {
189            value,
190            span: Span::ZERO,
191            file_id: SYNTHESIZED_FILE_ID,
192        }
193    }
194
195    /// Set the file ID for this spanned value.
196    ///
197    /// Accepts `usize` for API convenience but stores as `u16` internally.
198    ///
199    /// # Panics
200    ///
201    /// Debug builds will panic if `file_id` exceeds `u16::MAX` (65,535).
202    #[must_use]
203    pub fn with_file_id(mut self, file_id: usize) -> Self {
204        debug_assert!(
205            u16::try_from(file_id).is_ok(),
206            "file_id {} exceeds u16::MAX; at most {} files are supported",
207            file_id,
208            u16::MAX
209        );
210        self.file_id = file_id as u16;
211        self
212    }
213
214    /// Map the inner value, preserving span and `file_id`.
215    #[must_use]
216    pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Spanned<U> {
217        Spanned {
218            value: f(self.value),
219            span: self.span,
220            file_id: self.file_id,
221        }
222    }
223
224    /// Get a reference to the inner value.
225    #[must_use]
226    pub const fn inner(&self) -> &T {
227        &self.value
228    }
229
230    /// Unwrap the spanned value, discarding the span and `file_id`.
231    #[must_use]
232    pub fn into_inner(self) -> T {
233        self.value
234    }
235}
236
237impl<T: fmt::Display> fmt::Display for Spanned<T> {
238    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239        write!(f, "{}", self.value)
240    }
241}
242
243impl<T: PartialEq> PartialEq for Spanned<T> {
244    fn eq(&self, other: &Self) -> bool {
245        self.value == other.value
246    }
247}
248
249impl<T: Eq> Eq for Spanned<T> {}
250
251impl<T: std::hash::Hash> std::hash::Hash for Spanned<T> {
252    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
253        self.value.hash(state);
254    }
255}
256
257/// `Spanned<T>` is a transparent wrapper that adds source location to a
258/// value. Following the convention used by other transparent wrappers in
259/// the standard library (`Box<T>`, `Rc<T>`, `Cow<'_, T>`, `MutexGuard<T>`),
260/// it implements `Deref` so callers can read inner fields and call inner
261/// methods without spelling `.value` everywhere. Consumers that genuinely
262/// need to inspect the source location reach for `.span`, `.file_id`, or
263/// `.value` (for ownership) explicitly.
264impl<T> std::ops::Deref for Spanned<T> {
265    type Target = T;
266
267    fn deref(&self) -> &T {
268        &self.value
269    }
270}
271
272impl<T> std::ops::DerefMut for Spanned<T> {
273    fn deref_mut(&mut self) -> &mut T {
274        &mut self.value
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn test_span_new() {
284        let span = Span::new(10, 20);
285        assert_eq!(span.start, 10);
286        assert_eq!(span.end, 20);
287    }
288
289    #[test]
290    fn test_span_from_range() {
291        let span = Span::from_range(5..15);
292        assert_eq!(span.start, 5);
293        assert_eq!(span.end, 15);
294    }
295
296    #[test]
297    fn test_span_len() {
298        let span = Span::new(10, 25);
299        assert_eq!(span.len(), 15);
300    }
301
302    #[test]
303    fn test_span_is_empty() {
304        let empty = Span::new(5, 5);
305        let non_empty = Span::new(5, 10);
306        assert!(empty.is_empty());
307        assert!(!non_empty.is_empty());
308    }
309
310    #[test]
311    fn test_span_merge() {
312        let a = Span::new(10, 20);
313        let b = Span::new(15, 30);
314        let merged = a.merge(&b);
315        assert_eq!(merged.start, 10);
316        assert_eq!(merged.end, 30);
317
318        // Test with non-overlapping spans
319        let c = Span::new(5, 8);
320        let merged2 = a.merge(&c);
321        assert_eq!(merged2.start, 5);
322        assert_eq!(merged2.end, 20);
323    }
324
325    #[test]
326    fn test_span_text() {
327        let source = "hello world";
328        let span = Span::new(0, 5);
329        assert_eq!(span.text(source), "hello");
330
331        let span2 = Span::new(6, 11);
332        assert_eq!(span2.text(source), "world");
333    }
334
335    #[test]
336    fn test_span_into_range() {
337        let span = Span::new(3, 7);
338        let range: Range<usize> = span.into_range();
339        assert_eq!(range, 3..7);
340    }
341
342    #[test]
343    fn test_span_from_impl() {
344        let span: Span = (5..10).into();
345        assert_eq!(span.start, 5);
346        assert_eq!(span.end, 10);
347    }
348
349    #[test]
350    fn test_range_from_span() {
351        let span = Span::new(2, 8);
352        let range: Range<usize> = span.into();
353        assert_eq!(range, 2..8);
354    }
355
356    #[test]
357    fn test_span_display() {
358        let span = Span::new(10, 20);
359        assert_eq!(format!("{span}"), "10..20");
360    }
361
362    #[test]
363    fn test_spanned_new() {
364        let spanned = Spanned::new("value", Span::new(0, 5));
365        assert_eq!(spanned.value, "value");
366        assert_eq!(spanned.span, Span::new(0, 5));
367    }
368
369    #[test]
370    fn test_spanned_map() {
371        let spanned = Spanned::new(5, Span::new(0, 1));
372        let mapped = spanned.map(|x| x * 2);
373        assert_eq!(mapped.value, 10);
374        assert_eq!(mapped.span, Span::new(0, 1));
375    }
376
377    #[test]
378    fn test_spanned_inner() {
379        let spanned = Spanned::new("test", Span::new(0, 4));
380        assert_eq!(spanned.inner(), &"test");
381    }
382
383    #[test]
384    fn test_spanned_into_inner() {
385        let spanned = Spanned::new(String::from("owned"), Span::new(0, 5));
386        let inner = spanned.into_inner();
387        assert_eq!(inner, "owned");
388    }
389
390    #[test]
391    fn test_spanned_display() {
392        let spanned = Spanned::new(42, Span::new(0, 2));
393        assert_eq!(format!("{spanned}"), "42");
394    }
395
396    #[test]
397    fn test_spanned_with_file_id() {
398        let spanned = Spanned::new("value", Span::new(0, 5)).with_file_id(3);
399        assert_eq!(spanned.value, "value");
400        assert_eq!(spanned.span, Span::new(0, 5));
401        assert_eq!(spanned.file_id, 3);
402    }
403
404    #[test]
405    fn test_spanned_eq_ignores_location() {
406        // PartialEq/Eq/Hash on Spanned<T> delegate to the inner value:
407        // two values with the same content but different source
408        // locations are equal. Anyone who needs location-sensitive
409        // equality compares .span / .file_id explicitly.
410        use std::collections::HashSet;
411        let a = Spanned::new("x", Span::new(0, 1)).with_file_id(0);
412        let b = Spanned::new("x", Span::new(100, 200)).with_file_id(7);
413        let c = Spanned::new("y", Span::new(0, 1)).with_file_id(0);
414        assert_eq!(a, b, "different locations, same value → equal");
415        assert_ne!(a, c, "same location, different value → not equal");
416        let mut set: HashSet<Spanned<&str>> = HashSet::new();
417        set.insert(a);
418        set.insert(b);
419        assert_eq!(set.len(), 1, "Hash also delegates to inner value");
420    }
421
422    #[test]
423    fn test_span_zero_constant() {
424        assert_eq!(Span::ZERO, Span::new(0, 0));
425        assert!(Span::ZERO.is_empty());
426    }
427
428    #[test]
429    fn test_spanned_synthesized_uses_synth_file_id_and_zero_span() {
430        // Programmatically-built values get Span::ZERO + SYNTHESIZED_FILE_ID
431        // so consumers can detect "no source" without sentinel checks on
432        // the inner value.
433        let s = Spanned::synthesized("anything");
434        assert_eq!(s.span, Span::ZERO);
435        assert_eq!(s.file_id, SYNTHESIZED_FILE_ID);
436    }
437
438    /// `ShiftSpans` on a `Spanned<T>` shifts the outer span AND
439    /// recurses into the inner value. Pins the contract that
440    /// compound type impls inherit shifting via their fields'
441    /// `ShiftSpans` impls.
442    #[test]
443    fn test_shift_spans_recurses_through_spanned() {
444        let mut sp = Spanned::new(Span::new(10, 20), Span::new(100, 200));
445        sp.shift_spans(&|s: &mut Span| {
446            s.start += 3;
447            s.end += 3;
448        });
449        // Outer span shifted.
450        assert_eq!(sp.span, Span::new(103, 203));
451        // Inner Span (the value) also shifted via Span's own impl.
452        assert_eq!(sp.value, Span::new(13, 23));
453    }
454}
455
456/// Shift every `Span` reachable inside `self` by applying `shift`.
457///
458/// Used by the parser at the public `parse()` boundary to map
459/// inner-parser spans (in BOM-stripped coordinates) back to the
460/// caller's frame when a leading BOM was stripped before
461/// tokenization.
462///
463/// **Architectural discipline (round-18).** Pre-round-18, span
464/// shifting was a single monolithic function in the parser that did
465/// named-field destructure on every `Directive` variant. That caught
466/// added fields but missed added Spanned-bearing VARIANTS of a nested
467/// type (e.g., a future `MetaValue::String(Spanned<String>)` would
468/// silently bypass shifting because the destructure binds `meta: _`).
469/// Round 18 propagates the discipline into the type system: every
470/// type reachable from `Directive` either implements `ShiftSpans` to
471/// delegate into its fields (compound types) or implements it as a
472/// no-op (leaf types with no spans). Adding a new field or new
473/// Spanned-bearing variant requires updating the type's own impl —
474/// the parser's shift call doesn't change.
475///
476/// Implementors must recurse into every field that COULD contain
477/// (transitively) a Span. The provided impls for `Vec<T>`,
478/// `Option<T>`, `Box<T>`, and `Spanned<T>` handle the common
479/// compound shapes; concrete leaf types handle themselves.
480pub trait ShiftSpans {
481    /// Apply `shift` to every `Span` reachable in `self`.
482    fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F);
483}
484
485impl ShiftSpans for Span {
486    // `clippy::use_self` would suggest `&mut Self` in the closure
487    // bound, but the trait's `F: Fn(&mut Span)` requires the literal
488    // type — substituting Self in the impl breaks bound matching.
489    #[allow(clippy::use_self, reason = "trait bound names the literal type Span")]
490    fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
491        shift(self);
492    }
493}
494
495impl<T: ShiftSpans> ShiftSpans for Spanned<T> {
496    fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
497        shift(&mut self.span);
498        self.value.shift_spans(shift);
499    }
500}
501
502impl<T: ShiftSpans> ShiftSpans for Vec<T> {
503    fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
504        for item in self {
505            item.shift_spans(shift);
506        }
507    }
508}
509
510impl<T: ShiftSpans> ShiftSpans for Option<T> {
511    fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
512        if let Some(v) = self {
513            v.shift_spans(shift);
514        }
515    }
516}
517
518impl<T: ShiftSpans + ?Sized> ShiftSpans for Box<T> {
519    fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
520        (**self).shift_spans(shift);
521    }
522}
523
524/// Helper macro for declaring `ShiftSpans` no-op impls on leaf types.
525///
526/// Using the macro (rather than a blanket `impl<T: NoSpans> ShiftSpans
527/// for T`) means each "this type has no spans" decision is explicit
528/// and grep-able — a contributor extending one of these types with a
529/// `Spanned<U>` field will notice the no-op impl and have to choose
530/// between leaving it (silently no-op) or removing the no-op and
531/// writing a recursing impl. The blanket-with-marker approach hides
532/// that decision behind a single marker impl.
533#[macro_export]
534macro_rules! impl_shift_spans_noop {
535    ($($t:ty),* $(,)?) => {
536        $(
537            impl $crate::ShiftSpans for $t {
538                #[inline]
539                fn shift_spans<F: Fn(&mut $crate::Span)>(&mut self, _shift: &F) {}
540            }
541        )*
542    };
543}
544
545// No-op impls for the primitive-ish leaf types that appear in
546// directive payloads but never carry Span values themselves.
547impl_shift_spans_noop!(
548    String, bool, u8, u16, u32, u64, i8, i16, i32, i64, usize, isize,
549);