Skip to main content

marque_ism/
span.rs

1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5//! Byte-offset spans into source buffers — zero-copy position tracking.
6
7/// A byte-offset span into the original source buffer.
8/// Never owns data; always references the original input.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10pub struct Span {
11    pub start: usize,
12    pub end: usize,
13}
14
15impl Span {
16    /// Construct a span. Panics in both debug and release builds if
17    /// `start > end`, because such a span will inevitably panic later
18    /// at slice time and the early panic gives a better error message.
19    #[inline]
20    pub fn new(start: usize, end: usize) -> Self {
21        assert!(
22            start <= end,
23            "Span::new: start ({start}) must not exceed end ({end})"
24        );
25        Self { start, end }
26    }
27
28    #[inline]
29    pub fn len(&self) -> usize {
30        self.end - self.start
31    }
32
33    #[inline]
34    pub fn is_empty(&self) -> bool {
35        self.start == self.end
36    }
37
38    /// Borrow the span's bytes from `source`. Panics if the span is
39    /// out of bounds for `source` — use [`Span::try_as_slice`] when the
40    /// caller cannot guarantee bounds.
41    #[inline]
42    pub fn as_slice<'a>(&self, source: &'a [u8]) -> &'a [u8] {
43        &source[self.start..self.end]
44    }
45
46    /// Borrow the span's bytes from `source`, returning `None` if the
47    /// span lies outside the buffer instead of panicking.
48    #[inline]
49    pub fn try_as_slice<'a>(&self, source: &'a [u8]) -> Option<&'a [u8]> {
50        source.get(self.start..self.end)
51    }
52
53    /// Extract the spanned bytes as a UTF-8 string slice.
54    ///
55    /// Returns `Err` if the span does not cover valid UTF-8.
56    /// Callers that know the source is ASCII can use `.unwrap()` in tests
57    /// or `.expect("...")` with context.
58    #[inline]
59    pub fn as_str<'a>(&self, source: &'a [u8]) -> Result<&'a str, std::str::Utf8Error> {
60        std::str::from_utf8(self.as_slice(source))
61    }
62}
63
64/// Classification marking candidate type, determined by scanner heuristics.
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66pub enum MarkingType {
67    /// `(TS//SI//NF)` — parenthesized, typically at paragraph start.
68    Portion,
69    /// `TOP SECRET//SENSITIVE INTELLIGENCE//NOFORN` — standalone line.
70    Banner,
71    /// Multi-line Classification Authority Block (Classified By / Derived From / Declassify On).
72    Cab,
73    /// Document page break — `\f` (form feed) or `\n\n\n+` heuristic.
74    /// Carries a zero-length span at the boundary offset. The engine uses
75    /// this to reset its `PageContext` so banner/CAB rules on the next page
76    /// see a fresh aggregate (Phase 3, plan §Task 1).
77    PageBreak,
78}
79
80/// A scanner-identified candidate with its type and source span.
81#[derive(Debug, Clone, Copy)]
82pub struct MarkingCandidate {
83    pub span: Span,
84    pub kind: MarkingType,
85}
86
87/// Document zone — where in the document structure a marking appears.
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89pub enum Zone {
90    Header,
91    Footer,
92    Body,
93    /// Classification Authority Block (Classified By / Derived From / Declassify On).
94    Cab,
95}
96
97/// Coarse position within the document (for banner detection heuristics).
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub enum DocumentPosition {
100    Start,
101    Body,
102    End,
103}
104
105#[cfg(test)]
106#[cfg_attr(coverage_nightly, coverage(off))]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn span_new_accepts_equal_bounds() {
112        let s = Span::new(5, 5);
113        assert_eq!(s.len(), 0);
114    }
115
116    #[test]
117    fn span_new_accepts_normal_range() {
118        let s = Span::new(2, 7);
119        assert_eq!(s.len(), 5);
120    }
121
122    #[test]
123    fn span_len_calculates_correctly() {
124        assert_eq!(Span::new(0, 0).len(), 0);
125        assert_eq!(Span::new(0, 10).len(), 10);
126        assert_eq!(Span::new(5, 10).len(), 5);
127        assert_eq!(Span::new(100, 100).len(), 0);
128        assert_eq!(Span::new(100, 250).len(), 150);
129    }
130
131    #[test]
132    #[should_panic(expected = "Span::new")]
133    fn span_new_panics_on_inverted_bounds() {
134        let _ = Span::new(7, 2);
135    }
136
137    #[test]
138    fn try_as_slice_returns_none_when_out_of_bounds() {
139        let buf = b"hello";
140        let s = Span::new(2, 100);
141        assert!(s.try_as_slice(buf).is_none());
142    }
143
144    #[test]
145    fn try_as_slice_returns_bytes_when_in_bounds() {
146        let buf = b"hello";
147        let s = Span::new(1, 4);
148        assert_eq!(s.try_as_slice(buf), Some(&b"ell"[..]));
149    }
150
151    #[test]
152    fn as_str_returns_utf8_slice() {
153        let buf = b"abc";
154        let s = Span::new(0, 3);
155        assert_eq!(s.as_str(buf).unwrap(), "abc");
156    }
157
158    #[test]
159    fn span_is_empty_returns_true_when_bounds_are_equal() {
160        let s = Span::new(42, 42);
161        assert!(s.is_empty());
162    }
163
164    #[test]
165    fn span_is_empty_returns_false_when_bounds_differ() {
166        let s = Span::new(42, 43);
167        assert!(!s.is_empty());
168    }
169
170    #[test]
171    fn as_slice_returns_bytes_when_in_bounds() {
172        let buf = b"hello";
173        let s = Span::new(1, 4);
174        assert_eq!(s.as_slice(buf), b"ell");
175    }
176
177    #[test]
178    #[should_panic]
179    fn as_slice_panics_when_end_out_of_bounds() {
180        let buf = b"hello";
181        let s = Span::new(2, 100);
182        let _ = s.as_slice(buf);
183    }
184
185    #[test]
186    #[should_panic]
187    fn as_slice_panics_when_start_out_of_bounds() {
188        let buf = b"hello";
189        let s = Span::new(100, 101);
190        let _ = s.as_slice(buf);
191    }
192}