Skip to main content

cortex_core/
summary.rs

1//! Span-level summary provenance primitives for the schema v2 cutover.
2//!
3//! These types are additive compatibility shapes while [`crate::SCHEMA_VERSION`]
4//! remains 1. Persisting them on `Episode`/`Memory` rows happens in Lane S2.
5
6use schemars::JsonSchema;
7use serde::{Deserialize, Serialize};
8
9use crate::EventId;
10
11/// Coarse source authority for summary-span folding.
12#[derive(
13    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
14)]
15#[serde(rename_all = "snake_case")]
16pub enum SourceAuthority {
17    /// Derived, tool, runtime, external, missing, or failed-verify source.
18    Derived,
19    /// Verified child-agent source.
20    Agent,
21    /// Verified user or manual-correction source.
22    User,
23}
24
25/// A cited byte span inside a summary-like string.
26#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
27pub struct SummarySpan {
28    /// Inclusive byte offset where this span starts.
29    pub byte_start: u32,
30    /// Exclusive byte offset where this span ends.
31    pub byte_end: u32,
32    /// Source event ids whose payload contributed to this span.
33    pub derived_from_event_ids: Vec<EventId>,
34    /// Cached authority fold for the cited source events.
35    pub max_source_authority: SourceAuthority,
36}
37
38/// Span validation failure.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum SummarySpanError {
41    /// A range does not satisfy start <= end <= summary length.
42    OutOfBounds {
43        /// Index of the offending span.
44        index: usize,
45    },
46    /// A range boundary splits a UTF-8 scalar.
47    InvalidUtf8Boundary {
48        /// Index of the offending span.
49        index: usize,
50    },
51    /// A span starts before the prior span ends.
52    OverlapOrUnordered {
53        /// Index of the offending span.
54        index: usize,
55    },
56    /// Non-whitespace summary text has no covering span.
57    UncoveredNonWhitespace {
58        /// Byte offset of the uncovered character.
59        byte: usize,
60    },
61    /// Cached max authority disagrees with recomputed authority.
62    AuthorityMismatch {
63        /// Index of the offending span.
64        index: usize,
65        /// Recomputed authority.
66        expected: SourceAuthority,
67        /// Stored authority.
68        observed: SourceAuthority,
69    },
70}
71
72impl SummarySpanError {
73    /// Stable invariant name for operator-facing verification.
74    #[must_use]
75    pub const fn invariant(&self) -> &'static str {
76        match self {
77            Self::OutOfBounds { .. } => "summary_span.range.in_bounds",
78            Self::InvalidUtf8Boundary { .. } => "summary_span.range.utf8_boundary",
79            Self::OverlapOrUnordered { .. } => "summary_span.range.ordered_non_overlapping",
80            Self::UncoveredNonWhitespace { .. } => "summary_span.coverage.non_whitespace",
81            Self::AuthorityMismatch { .. } => "summary_span.authority.cache_matches_fold",
82        }
83    }
84}
85
86/// Validate summary spans against ADR 0015 structural invariants.
87///
88/// `authority_fold` recomputes the authority for one span from its
89/// `derived_from_event_ids`. The cached `max_source_authority` must match it.
90pub fn validate_summary_spans<F>(
91    summary: &str,
92    spans: &[SummarySpan],
93    mut authority_fold: F,
94) -> Result<(), SummarySpanError>
95where
96    F: FnMut(&[EventId]) -> SourceAuthority,
97{
98    if summary.trim().is_empty() && spans.is_empty() {
99        return Ok(());
100    }
101
102    let len = summary.len();
103    let mut previous_end = 0usize;
104    let mut covered = vec![false; len];
105
106    for (index, span) in spans.iter().enumerate() {
107        let start = span.byte_start as usize;
108        let end = span.byte_end as usize;
109        if start > end || end > len {
110            return Err(SummarySpanError::OutOfBounds { index });
111        }
112        if !summary.is_char_boundary(start) || !summary.is_char_boundary(end) {
113            return Err(SummarySpanError::InvalidUtf8Boundary { index });
114        }
115        if start < previous_end {
116            return Err(SummarySpanError::OverlapOrUnordered { index });
117        }
118
119        let expected = authority_fold(&span.derived_from_event_ids);
120        if span.max_source_authority != expected {
121            return Err(SummarySpanError::AuthorityMismatch {
122                index,
123                expected,
124                observed: span.max_source_authority,
125            });
126        }
127
128        for slot in covered.iter_mut().take(end).skip(start) {
129            *slot = true;
130        }
131        previous_end = end;
132    }
133
134    for (byte, ch) in summary.char_indices() {
135        if !ch.is_whitespace() && !covered[byte] {
136            return Err(SummarySpanError::UncoveredNonWhitespace { byte });
137        }
138    }
139
140    Ok(())
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    fn event_id() -> EventId {
148        "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".parse().unwrap()
149    }
150
151    #[test]
152    fn summary_spans_validate_full_non_whitespace_coverage() {
153        let spans = vec![SummarySpan {
154            byte_start: 0,
155            byte_end: 5,
156            derived_from_event_ids: vec![event_id()],
157            max_source_authority: SourceAuthority::Derived,
158        }];
159
160        validate_summary_spans("hello", &spans, |_| SourceAuthority::Derived).unwrap();
161    }
162
163    #[test]
164    fn summary_spans_reject_uncovered_claim_text() {
165        let spans = vec![SummarySpan {
166            byte_start: 0,
167            byte_end: 5,
168            derived_from_event_ids: vec![event_id()],
169            max_source_authority: SourceAuthority::Derived,
170        }];
171
172        let err = validate_summary_spans("hello world", &spans, |_| SourceAuthority::Derived)
173            .expect_err("world is uncovered");
174        assert_eq!(err.invariant(), "summary_span.coverage.non_whitespace");
175    }
176
177    #[test]
178    fn summary_spans_reject_authority_cache_mismatch() {
179        let spans = vec![SummarySpan {
180            byte_start: 0,
181            byte_end: 5,
182            derived_from_event_ids: vec![event_id()],
183            max_source_authority: SourceAuthority::User,
184        }];
185
186        let err = validate_summary_spans("hello", &spans, |_| SourceAuthority::Derived)
187            .expect_err("cache uplift must be rejected");
188        assert_eq!(err.invariant(), "summary_span.authority.cache_matches_fold");
189    }
190
191    #[test]
192    fn summary_spans_reject_non_utf8_boundary() {
193        let spans = vec![SummarySpan {
194            byte_start: 0,
195            byte_end: 1,
196            derived_from_event_ids: vec![event_id()],
197            max_source_authority: SourceAuthority::Derived,
198        }];
199
200        let err = validate_summary_spans("é", &spans, |_| SourceAuthority::Derived)
201            .expect_err("split utf8 scalar must fail");
202        assert_eq!(err.invariant(), "summary_span.range.utf8_boundary");
203    }
204}