Skip to main content

marque_ism/
span.rs

1//! Byte-offset spans into source buffers — zero-copy position tracking.
2
3/// A byte-offset span into the original source buffer.
4/// Never owns data; always references the original input.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
6pub struct Span {
7    pub start: usize,
8    pub end: usize,
9}
10
11impl Span {
12    /// Construct a span. Panics in both debug and release builds if
13    /// `start > end`, because such a span will inevitably panic later
14    /// at slice time and the early panic gives a better error message.
15    #[inline]
16    pub fn new(start: usize, end: usize) -> Self {
17        assert!(
18            start <= end,
19            "Span::new: start ({start}) must not exceed end ({end})"
20        );
21        Self { start, end }
22    }
23
24    #[inline]
25    pub fn len(&self) -> usize {
26        self.end - self.start
27    }
28
29    #[inline]
30    pub fn is_empty(&self) -> bool {
31        self.start == self.end
32    }
33
34    /// Borrow the span's bytes from `source`. Panics if the span is
35    /// out of bounds for `source` — use [`Span::try_as_slice`] when the
36    /// caller cannot guarantee bounds.
37    #[inline]
38    pub fn as_slice<'a>(&self, source: &'a [u8]) -> &'a [u8] {
39        &source[self.start..self.end]
40    }
41
42    /// Borrow the span's bytes from `source`, returning `None` if the
43    /// span lies outside the buffer instead of panicking.
44    #[inline]
45    pub fn try_as_slice<'a>(&self, source: &'a [u8]) -> Option<&'a [u8]> {
46        source.get(self.start..self.end)
47    }
48
49    /// Extract the spanned bytes as a UTF-8 string slice.
50    ///
51    /// Returns `Err` if the span does not cover valid UTF-8.
52    /// Callers that know the source is ASCII can use `.unwrap()` in tests
53    /// or `.expect("...")` with context.
54    #[inline]
55    pub fn as_str<'a>(&self, source: &'a [u8]) -> Result<&'a str, std::str::Utf8Error> {
56        std::str::from_utf8(self.as_slice(source))
57    }
58}
59
60/// Classification marking candidate type, determined by scanner heuristics.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum MarkingType {
63    /// `(TS//SI//NF)` — parenthesized, typically at paragraph start.
64    Portion,
65    /// `TOP SECRET//SENSITIVE INTELLIGENCE//NOFORN` — standalone line.
66    Banner,
67    /// Multi-line Classification Authority Block (Classified By / Derived From / Declassify On).
68    Cab,
69    /// Document page break — `\f` (form feed) or `\n\n\n+` heuristic.
70    /// Carries a zero-length span at the boundary offset. The engine uses
71    /// this to reset its `PageContext` so banner/CAB rules on the next page
72    /// see a fresh aggregate (Phase 3, plan §Task 1).
73    PageBreak,
74}
75
76/// A scanner-identified candidate with its type and source span.
77#[derive(Debug, Clone, Copy)]
78pub struct MarkingCandidate {
79    pub span: Span,
80    pub kind: MarkingType,
81}
82
83/// Document zone — where in the document structure a marking appears.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub enum Zone {
86    Header,
87    Footer,
88    Body,
89    /// Classification Authority Block (Classified By / Derived From / Declassify On).
90    Cab,
91}
92
93/// Coarse position within the document (for banner detection heuristics).
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum DocumentPosition {
96    Start,
97    Body,
98    End,
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    #[test]
106    fn span_new_accepts_equal_bounds() {
107        let s = Span::new(5, 5);
108        assert!(s.is_empty());
109        assert_eq!(s.len(), 0);
110    }
111
112    #[test]
113    fn span_new_accepts_normal_range() {
114        let s = Span::new(2, 7);
115        assert!(!s.is_empty());
116        assert_eq!(s.len(), 5);
117    }
118
119    #[test]
120    #[should_panic(expected = "Span::new")]
121    fn span_new_panics_on_inverted_bounds() {
122        let _ = Span::new(7, 2);
123    }
124
125    #[test]
126    fn try_as_slice_returns_none_when_out_of_bounds() {
127        let buf = b"hello";
128        let s = Span::new(2, 100);
129        assert!(s.try_as_slice(buf).is_none());
130    }
131
132    #[test]
133    fn try_as_slice_returns_bytes_when_in_bounds() {
134        let buf = b"hello";
135        let s = Span::new(1, 4);
136        assert_eq!(s.try_as_slice(buf), Some(&b"ell"[..]));
137    }
138
139    #[test]
140    fn as_str_returns_utf8_slice() {
141        let buf = b"abc";
142        let s = Span::new(0, 3);
143        assert_eq!(s.as_str(buf).unwrap(), "abc");
144    }
145}