rustledger_core/span.rs
1//! Source location tracking.
2
3use serde::{Deserialize, Serialize};
4use std::fmt;
5use std::ops::Range;
6
7/// A span in the source code, represented as a byte range.
8///
9/// # `#[non_exhaustive]` policy
10///
11/// Deliberately NOT `#[non_exhaustive]`, unlike
12/// `rustledger_parser::{ParseResult, ParseError, ParseErrorKind}`.
13/// `Span` is constructed via struct literal in hundreds of call sites
14/// across the workspace (every parser rule, every test fixture, every
15/// LSP/FFI/loader path that synthesizes a location). Marking it
16/// non-exhaustive would force a workspace-wide migration to
17/// [`Span::new`] for zero practical benefit — the struct has carried
18/// the same two fields since the project's inception and there is no
19/// realistic future field that would justify breaking that surface.
20/// If a future need arises (e.g., `line: Option<u32>` for faster LSP
21/// position lookups), the right move is to add a sibling type with
22/// `non_exhaustive` rather than retrofit it onto `Span`.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
24#[cfg_attr(
25 feature = "rkyv",
26 derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
27)]
28pub struct Span {
29 /// Start byte offset (inclusive).
30 pub start: usize,
31 /// End byte offset (exclusive).
32 pub end: usize,
33}
34
35impl Span {
36 /// The zero span (`0..0`). Used as the location for programmatically
37 /// synthesized values that have no source representation. Pair with
38 /// [`SYNTHESIZED_FILE_ID`] on the containing [`Spanned`] to make the
39 /// "no source" intent unambiguous.
40 ///
41 /// ```
42 /// use rustledger_core::Span;
43 /// assert_eq!(Span::ZERO, Span::new(0, 0));
44 /// assert!(Span::ZERO.is_empty());
45 /// ```
46 pub const ZERO: Self = Self { start: 0, end: 0 };
47
48 /// Create a new span.
49 #[must_use]
50 pub const fn new(start: usize, end: usize) -> Self {
51 Self { start, end }
52 }
53
54 /// Create a span from a range.
55 #[must_use]
56 pub const fn from_range(range: Range<usize>) -> Self {
57 Self {
58 start: range.start,
59 end: range.end,
60 }
61 }
62
63 /// Get the length of this span in bytes.
64 #[must_use]
65 pub const fn len(&self) -> usize {
66 self.end - self.start
67 }
68
69 /// Check if the span is empty.
70 #[must_use]
71 pub const fn is_empty(&self) -> bool {
72 self.start == self.end
73 }
74
75 /// Merge this span with another, returning a span that covers both.
76 #[must_use]
77 pub fn merge(&self, other: &Self) -> Self {
78 Self {
79 start: self.start.min(other.start),
80 end: self.end.max(other.end),
81 }
82 }
83
84 /// Get the source text for this span.
85 #[must_use]
86 pub fn text<'a>(&self, source: &'a str) -> &'a str {
87 &source[self.start..self.end]
88 }
89
90 /// Convert to a byte-offset `Range<usize>` for downstream span consumers.
91 #[must_use]
92 pub const fn into_range(self) -> Range<usize> {
93 self.start..self.end
94 }
95}
96
97impl From<Range<usize>> for Span {
98 fn from(range: Range<usize>) -> Self {
99 Self::from_range(range)
100 }
101}
102
103impl From<Span> for Range<usize> {
104 fn from(span: Span) -> Self {
105 span.start..span.end
106 }
107}
108
109impl fmt::Display for Span {
110 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111 write!(f, "{}..{}", self.start, self.end)
112 }
113}
114
115/// Sentinel `file_id` indicating a directive was synthesized by a plugin
116/// rather than parsed from a source file.
117///
118/// Regular source files get sequential IDs starting at 0 (see
119/// `rustledger_loader::SourceMap::add_file`), so this sentinel is safely out
120/// of the normal range. Code that formats error locations or looks up files
121/// in a `SourceMap` should treat this as "no source location" and, where
122/// appropriate, hint to the user that a plugin generated the directive.
123///
124/// See issue #896.
125pub const SYNTHESIZED_FILE_ID: u16 = u16::MAX;
126
127/// A value with an associated source location (span and file).
128///
129/// `PartialEq` / `Eq` / `Hash` are implemented manually to delegate to
130/// the inner value only — two `Spanned<T>` values are considered equal
131/// when their `T`s are equal, regardless of where they came from in
132/// source. This matches the principle that "what" a value is should
133/// not depend on where it lives. Consumers that genuinely need
134/// location-sensitive equality compare `.span` and `.file_id`
135/// explicitly.
136///
137/// Note: the rkyv-archived form (`ArchivedSpanned<T>`, present under the
138/// `rkyv` feature) does **not** automatically receive `PartialEq` /
139/// `Eq`. The host doesn't compare archived values today; if a future
140/// code path needs to, add `rkyv(compare = (PartialEq))` to the derive
141/// attribute below or hand-roll a manual impl on the archived type.
142///
143/// # `#[non_exhaustive]` policy
144///
145/// Deliberately NOT `#[non_exhaustive]`, for the same reason as
146/// [`Span`]: it is constructed via struct literal in hundreds of
147/// call sites and the field set is intentionally minimal and stable.
148/// Add fields cautiously; if a new field is genuinely needed, prefer
149/// a sibling/wrapper type over modifying this one in place.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151#[cfg_attr(
152 feature = "rkyv",
153 derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
154)]
155pub struct Spanned<T> {
156 /// The value.
157 pub value: T,
158 /// The source span (byte offsets within the file).
159 pub span: Span,
160 /// The source file ID (index into `SourceMap`).
161 /// Uses `u16` to minimize struct size (max 65,535 files).
162 pub file_id: u16,
163}
164
165impl<T> Spanned<T> {
166 /// Create a new spanned value with `file_id` defaulting to 0.
167 ///
168 /// Use `with_file_id` to set the correct file ID after creation.
169 #[must_use]
170 pub const fn new(value: T, span: Span) -> Self {
171 Self {
172 value,
173 span,
174 file_id: 0,
175 }
176 }
177
178 /// Wrap a value that was programmatically synthesized (no source
179 /// representation). Uses [`Span::ZERO`] and [`SYNTHESIZED_FILE_ID`]
180 /// so downstream consumers can detect "no source" without sentinel
181 /// checks on the inner value's fields.
182 ///
183 /// Used by plugin-synthesized AST nodes, test fixtures, CLI commands
184 /// that build directives in-memory, and any other producer that does
185 /// not parse from source bytes.
186 #[must_use]
187 pub const fn synthesized(value: T) -> Self {
188 Self {
189 value,
190 span: Span::ZERO,
191 file_id: SYNTHESIZED_FILE_ID,
192 }
193 }
194
195 /// Set the file ID for this spanned value.
196 ///
197 /// Accepts `usize` for API convenience but stores as `u16` internally.
198 ///
199 /// # Panics
200 ///
201 /// Debug builds will panic if `file_id` exceeds `u16::MAX` (65,535).
202 #[must_use]
203 pub fn with_file_id(mut self, file_id: usize) -> Self {
204 debug_assert!(
205 u16::try_from(file_id).is_ok(),
206 "file_id {} exceeds u16::MAX; at most {} files are supported",
207 file_id,
208 u16::MAX
209 );
210 self.file_id = file_id as u16;
211 self
212 }
213
214 /// Map the inner value, preserving span and `file_id`.
215 #[must_use]
216 pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Spanned<U> {
217 Spanned {
218 value: f(self.value),
219 span: self.span,
220 file_id: self.file_id,
221 }
222 }
223
224 /// Get a reference to the inner value.
225 #[must_use]
226 pub const fn inner(&self) -> &T {
227 &self.value
228 }
229
230 /// Unwrap the spanned value, discarding the span and `file_id`.
231 #[must_use]
232 pub fn into_inner(self) -> T {
233 self.value
234 }
235}
236
237impl<T: fmt::Display> fmt::Display for Spanned<T> {
238 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239 write!(f, "{}", self.value)
240 }
241}
242
243impl<T: PartialEq> PartialEq for Spanned<T> {
244 fn eq(&self, other: &Self) -> bool {
245 self.value == other.value
246 }
247}
248
249impl<T: Eq> Eq for Spanned<T> {}
250
251impl<T: std::hash::Hash> std::hash::Hash for Spanned<T> {
252 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
253 self.value.hash(state);
254 }
255}
256
257/// `Spanned<T>` is a transparent wrapper that adds source location to a
258/// value. Following the convention used by other transparent wrappers in
259/// the standard library (`Box<T>`, `Rc<T>`, `Cow<'_, T>`, `MutexGuard<T>`),
260/// it implements `Deref` so callers can read inner fields and call inner
261/// methods without spelling `.value` everywhere. Consumers that genuinely
262/// need to inspect the source location reach for `.span`, `.file_id`, or
263/// `.value` (for ownership) explicitly.
264impl<T> std::ops::Deref for Spanned<T> {
265 type Target = T;
266
267 fn deref(&self) -> &T {
268 &self.value
269 }
270}
271
272impl<T> std::ops::DerefMut for Spanned<T> {
273 fn deref_mut(&mut self) -> &mut T {
274 &mut self.value
275 }
276}
277
278#[cfg(test)]
279mod tests {
280 use super::*;
281
282 #[test]
283 fn test_span_new() {
284 let span = Span::new(10, 20);
285 assert_eq!(span.start, 10);
286 assert_eq!(span.end, 20);
287 }
288
289 #[test]
290 fn test_span_from_range() {
291 let span = Span::from_range(5..15);
292 assert_eq!(span.start, 5);
293 assert_eq!(span.end, 15);
294 }
295
296 #[test]
297 fn test_span_len() {
298 let span = Span::new(10, 25);
299 assert_eq!(span.len(), 15);
300 }
301
302 #[test]
303 fn test_span_is_empty() {
304 let empty = Span::new(5, 5);
305 let non_empty = Span::new(5, 10);
306 assert!(empty.is_empty());
307 assert!(!non_empty.is_empty());
308 }
309
310 #[test]
311 fn test_span_merge() {
312 let a = Span::new(10, 20);
313 let b = Span::new(15, 30);
314 let merged = a.merge(&b);
315 assert_eq!(merged.start, 10);
316 assert_eq!(merged.end, 30);
317
318 // Test with non-overlapping spans
319 let c = Span::new(5, 8);
320 let merged2 = a.merge(&c);
321 assert_eq!(merged2.start, 5);
322 assert_eq!(merged2.end, 20);
323 }
324
325 #[test]
326 fn test_span_text() {
327 let source = "hello world";
328 let span = Span::new(0, 5);
329 assert_eq!(span.text(source), "hello");
330
331 let span2 = Span::new(6, 11);
332 assert_eq!(span2.text(source), "world");
333 }
334
335 #[test]
336 fn test_span_into_range() {
337 let span = Span::new(3, 7);
338 let range: Range<usize> = span.into_range();
339 assert_eq!(range, 3..7);
340 }
341
342 #[test]
343 fn test_span_from_impl() {
344 let span: Span = (5..10).into();
345 assert_eq!(span.start, 5);
346 assert_eq!(span.end, 10);
347 }
348
349 #[test]
350 fn test_range_from_span() {
351 let span = Span::new(2, 8);
352 let range: Range<usize> = span.into();
353 assert_eq!(range, 2..8);
354 }
355
356 #[test]
357 fn test_span_display() {
358 let span = Span::new(10, 20);
359 assert_eq!(format!("{span}"), "10..20");
360 }
361
362 #[test]
363 fn test_spanned_new() {
364 let spanned = Spanned::new("value", Span::new(0, 5));
365 assert_eq!(spanned.value, "value");
366 assert_eq!(spanned.span, Span::new(0, 5));
367 }
368
369 #[test]
370 fn test_spanned_map() {
371 let spanned = Spanned::new(5, Span::new(0, 1));
372 let mapped = spanned.map(|x| x * 2);
373 assert_eq!(mapped.value, 10);
374 assert_eq!(mapped.span, Span::new(0, 1));
375 }
376
377 #[test]
378 fn test_spanned_inner() {
379 let spanned = Spanned::new("test", Span::new(0, 4));
380 assert_eq!(spanned.inner(), &"test");
381 }
382
383 #[test]
384 fn test_spanned_into_inner() {
385 let spanned = Spanned::new(String::from("owned"), Span::new(0, 5));
386 let inner = spanned.into_inner();
387 assert_eq!(inner, "owned");
388 }
389
390 #[test]
391 fn test_spanned_display() {
392 let spanned = Spanned::new(42, Span::new(0, 2));
393 assert_eq!(format!("{spanned}"), "42");
394 }
395
396 #[test]
397 fn test_spanned_with_file_id() {
398 let spanned = Spanned::new("value", Span::new(0, 5)).with_file_id(3);
399 assert_eq!(spanned.value, "value");
400 assert_eq!(spanned.span, Span::new(0, 5));
401 assert_eq!(spanned.file_id, 3);
402 }
403
404 #[test]
405 fn test_spanned_eq_ignores_location() {
406 // PartialEq/Eq/Hash on Spanned<T> delegate to the inner value:
407 // two values with the same content but different source
408 // locations are equal. Anyone who needs location-sensitive
409 // equality compares .span / .file_id explicitly.
410 use std::collections::HashSet;
411 let a = Spanned::new("x", Span::new(0, 1)).with_file_id(0);
412 let b = Spanned::new("x", Span::new(100, 200)).with_file_id(7);
413 let c = Spanned::new("y", Span::new(0, 1)).with_file_id(0);
414 assert_eq!(a, b, "different locations, same value → equal");
415 assert_ne!(a, c, "same location, different value → not equal");
416 let mut set: HashSet<Spanned<&str>> = HashSet::new();
417 set.insert(a);
418 set.insert(b);
419 assert_eq!(set.len(), 1, "Hash also delegates to inner value");
420 }
421
422 #[test]
423 fn test_span_zero_constant() {
424 assert_eq!(Span::ZERO, Span::new(0, 0));
425 assert!(Span::ZERO.is_empty());
426 }
427
428 #[test]
429 fn test_spanned_synthesized_uses_synth_file_id_and_zero_span() {
430 // Programmatically-built values get Span::ZERO + SYNTHESIZED_FILE_ID
431 // so consumers can detect "no source" without sentinel checks on
432 // the inner value.
433 let s = Spanned::synthesized("anything");
434 assert_eq!(s.span, Span::ZERO);
435 assert_eq!(s.file_id, SYNTHESIZED_FILE_ID);
436 }
437
438 /// `ShiftSpans` on a `Spanned<T>` shifts the outer span AND
439 /// recurses into the inner value. Pins the contract that
440 /// compound type impls inherit shifting via their fields'
441 /// `ShiftSpans` impls.
442 #[test]
443 fn test_shift_spans_recurses_through_spanned() {
444 let mut sp = Spanned::new(Span::new(10, 20), Span::new(100, 200));
445 sp.shift_spans(&|s: &mut Span| {
446 s.start += 3;
447 s.end += 3;
448 });
449 // Outer span shifted.
450 assert_eq!(sp.span, Span::new(103, 203));
451 // Inner Span (the value) also shifted via Span's own impl.
452 assert_eq!(sp.value, Span::new(13, 23));
453 }
454}
455
456/// Shift every `Span` reachable inside `self` by applying `shift`.
457///
458/// Used by the parser at the public `parse()` boundary to map
459/// inner-parser spans (in BOM-stripped coordinates) back to the
460/// caller's frame when a leading BOM was stripped before
461/// tokenization.
462///
463/// **Architectural discipline (round-18).** Pre-round-18, span
464/// shifting was a single monolithic function in the parser that did
465/// named-field destructure on every `Directive` variant. That caught
466/// added fields but missed added Spanned-bearing VARIANTS of a nested
467/// type (e.g., a future `MetaValue::String(Spanned<String>)` would
468/// silently bypass shifting because the destructure binds `meta: _`).
469/// Round 18 propagates the discipline into the type system: every
470/// type reachable from `Directive` either implements `ShiftSpans` to
471/// delegate into its fields (compound types) or implements it as a
472/// no-op (leaf types with no spans). Adding a new field or new
473/// Spanned-bearing variant requires updating the type's own impl —
474/// the parser's shift call doesn't change.
475///
476/// Implementors must recurse into every field that COULD contain
477/// (transitively) a Span. The provided impls for `Vec<T>`,
478/// `Option<T>`, `Box<T>`, and `Spanned<T>` handle the common
479/// compound shapes; concrete leaf types handle themselves.
480pub trait ShiftSpans {
481 /// Apply `shift` to every `Span` reachable in `self`.
482 fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F);
483}
484
485impl ShiftSpans for Span {
486 // `clippy::use_self` would suggest `&mut Self` in the closure
487 // bound, but the trait's `F: Fn(&mut Span)` requires the literal
488 // type — substituting Self in the impl breaks bound matching.
489 #[allow(clippy::use_self, reason = "trait bound names the literal type Span")]
490 fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
491 shift(self);
492 }
493}
494
495impl<T: ShiftSpans> ShiftSpans for Spanned<T> {
496 fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
497 shift(&mut self.span);
498 self.value.shift_spans(shift);
499 }
500}
501
502impl<T: ShiftSpans> ShiftSpans for Vec<T> {
503 fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
504 for item in self {
505 item.shift_spans(shift);
506 }
507 }
508}
509
510impl<T: ShiftSpans> ShiftSpans for Option<T> {
511 fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
512 if let Some(v) = self {
513 v.shift_spans(shift);
514 }
515 }
516}
517
518impl<T: ShiftSpans + ?Sized> ShiftSpans for Box<T> {
519 fn shift_spans<F: Fn(&mut Span)>(&mut self, shift: &F) {
520 (**self).shift_spans(shift);
521 }
522}
523
524/// Helper macro for declaring `ShiftSpans` no-op impls on leaf types.
525///
526/// Using the macro (rather than a blanket `impl<T: NoSpans> ShiftSpans
527/// for T`) means each "this type has no spans" decision is explicit
528/// and grep-able — a contributor extending one of these types with a
529/// `Spanned<U>` field will notice the no-op impl and have to choose
530/// between leaving it (silently no-op) or removing the no-op and
531/// writing a recursing impl. The blanket-with-marker approach hides
532/// that decision behind a single marker impl.
533#[macro_export]
534macro_rules! impl_shift_spans_noop {
535 ($($t:ty),* $(,)?) => {
536 $(
537 impl $crate::ShiftSpans for $t {
538 #[inline]
539 fn shift_spans<F: Fn(&mut $crate::Span)>(&mut self, _shift: &F) {}
540 }
541 )*
542 };
543}
544
545// No-op impls for the primitive-ish leaf types that appear in
546// directive payloads but never carry Span values themselves.
547impl_shift_spans_noop!(
548 String, bool, u8, u16, u32, u64, i8, i16, i32, i64, usize, isize,
549);