use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::EventId;
#[derive(
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
#[serde(rename_all = "snake_case")]
pub enum SourceAuthority {
Derived,
Agent,
User,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct SummarySpan {
pub byte_start: u32,
pub byte_end: u32,
pub derived_from_event_ids: Vec<EventId>,
pub max_source_authority: SourceAuthority,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SummarySpanError {
OutOfBounds {
index: usize,
},
InvalidUtf8Boundary {
index: usize,
},
OverlapOrUnordered {
index: usize,
},
UncoveredNonWhitespace {
byte: usize,
},
AuthorityMismatch {
index: usize,
expected: SourceAuthority,
observed: SourceAuthority,
},
}
impl SummarySpanError {
#[must_use]
pub const fn invariant(&self) -> &'static str {
match self {
Self::OutOfBounds { .. } => "summary_span.range.in_bounds",
Self::InvalidUtf8Boundary { .. } => "summary_span.range.utf8_boundary",
Self::OverlapOrUnordered { .. } => "summary_span.range.ordered_non_overlapping",
Self::UncoveredNonWhitespace { .. } => "summary_span.coverage.non_whitespace",
Self::AuthorityMismatch { .. } => "summary_span.authority.cache_matches_fold",
}
}
}
pub fn validate_summary_spans<F>(
summary: &str,
spans: &[SummarySpan],
mut authority_fold: F,
) -> Result<(), SummarySpanError>
where
F: FnMut(&[EventId]) -> SourceAuthority,
{
if summary.trim().is_empty() && spans.is_empty() {
return Ok(());
}
let len = summary.len();
let mut previous_end = 0usize;
let mut covered = vec![false; len];
for (index, span) in spans.iter().enumerate() {
let start = span.byte_start as usize;
let end = span.byte_end as usize;
if start > end || end > len {
return Err(SummarySpanError::OutOfBounds { index });
}
if !summary.is_char_boundary(start) || !summary.is_char_boundary(end) {
return Err(SummarySpanError::InvalidUtf8Boundary { index });
}
if start < previous_end {
return Err(SummarySpanError::OverlapOrUnordered { index });
}
let expected = authority_fold(&span.derived_from_event_ids);
if span.max_source_authority != expected {
return Err(SummarySpanError::AuthorityMismatch {
index,
expected,
observed: span.max_source_authority,
});
}
for slot in covered.iter_mut().take(end).skip(start) {
*slot = true;
}
previous_end = end;
}
for (byte, ch) in summary.char_indices() {
if !ch.is_whitespace() && !covered[byte] {
return Err(SummarySpanError::UncoveredNonWhitespace { byte });
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn event_id() -> EventId {
"evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".parse().unwrap()
}
#[test]
fn summary_spans_validate_full_non_whitespace_coverage() {
let spans = vec![SummarySpan {
byte_start: 0,
byte_end: 5,
derived_from_event_ids: vec![event_id()],
max_source_authority: SourceAuthority::Derived,
}];
validate_summary_spans("hello", &spans, |_| SourceAuthority::Derived).unwrap();
}
#[test]
fn summary_spans_reject_uncovered_claim_text() {
let spans = vec![SummarySpan {
byte_start: 0,
byte_end: 5,
derived_from_event_ids: vec![event_id()],
max_source_authority: SourceAuthority::Derived,
}];
let err = validate_summary_spans("hello world", &spans, |_| SourceAuthority::Derived)
.expect_err("world is uncovered");
assert_eq!(err.invariant(), "summary_span.coverage.non_whitespace");
}
#[test]
fn summary_spans_reject_authority_cache_mismatch() {
let spans = vec![SummarySpan {
byte_start: 0,
byte_end: 5,
derived_from_event_ids: vec![event_id()],
max_source_authority: SourceAuthority::User,
}];
let err = validate_summary_spans("hello", &spans, |_| SourceAuthority::Derived)
.expect_err("cache uplift must be rejected");
assert_eq!(err.invariant(), "summary_span.authority.cache_matches_fold");
}
#[test]
fn summary_spans_reject_non_utf8_boundary() {
let spans = vec![SummarySpan {
byte_start: 0,
byte_end: 1,
derived_from_event_ids: vec![event_id()],
max_source_authority: SourceAuthority::Derived,
}];
let err = validate_summary_spans("é", &spans, |_| SourceAuthority::Derived)
.expect_err("split utf8 scalar must fail");
assert_eq!(err.invariant(), "summary_span.range.utf8_boundary");
}
}