use marque_capco::{CapcoMarking, CapcoScheme};
use marque_core::Parser;
use marque_ism::{
CapcoTokenSet, Classification, IsmAttributes, MarkingClassification,
span::{MarkingCandidate, MarkingType, Span},
};
use marque_scheme::ambiguity::Parsed;
use marque_scheme::recognizer::{ParseContext, Recognizer};
#[derive(Debug, Default, Clone, Copy)]
pub struct StrictRecognizer;
impl StrictRecognizer {
pub const fn new() -> Self {
Self
}
}
impl Recognizer<CapcoScheme> for StrictRecognizer {
fn recognize(&self, bytes: &[u8], _cx: &ParseContext) -> Parsed<CapcoMarking> {
let Some(kind) = infer_marking_type(bytes) else {
return Parsed::Ambiguous {
candidates: Vec::new(),
};
};
let token_set = CapcoTokenSet;
let parser = Parser::new(&token_set);
let leading_ws = if matches!(kind, MarkingType::Portion) {
bytes.iter().take_while(|b| b.is_ascii_whitespace()).count()
} else {
0
};
let parse_bytes = &bytes[leading_ws..];
let candidate = MarkingCandidate {
span: Span::new(0, parse_bytes.len()),
kind,
};
match parser.parse(&candidate, parse_bytes) {
Ok(mut parsed) => {
if leading_ws != 0 {
shift_token_spans(&mut parsed.attrs, leading_ws);
}
let marking = CapcoMarking::new(parsed.attrs);
if is_us_restricted(&marking) {
return Parsed::Ambiguous {
candidates: Vec::new(),
};
}
Parsed::Unambiguous(marking)
}
Err(_) => Parsed::Ambiguous {
candidates: Vec::new(),
},
}
}
}
pub(crate) fn is_us_restricted(marking: &CapcoMarking) -> bool {
matches!(
marking.0.classification,
Some(MarkingClassification::Us(Classification::Restricted))
)
}
pub(crate) fn shift_token_spans(attrs: &mut IsmAttributes, delta: usize) {
if delta == 0 {
return;
}
for ts in attrs.token_spans.iter_mut() {
ts.span = Span::new(ts.span.start + delta, ts.span.end + delta);
}
}
fn infer_marking_type(bytes: &[u8]) -> Option<MarkingType> {
let first = bytes.iter().copied().find(|&b| !b.is_ascii_whitespace())?;
if first == b'(' {
return Some(MarkingType::Portion);
}
if is_cab_head(bytes) {
return Some(MarkingType::Cab);
}
Some(MarkingType::Banner)
}
fn is_cab_head(bytes: &[u8]) -> bool {
let Ok(text) = std::str::from_utf8(bytes) else {
return false;
};
let trimmed = text.trim_start();
trimmed.starts_with("Classified By:")
|| trimmed.starts_with("Derived From:")
|| trimmed.starts_with("Declassify On:")
}
#[cfg(test)]
#[cfg_attr(coverage_nightly, coverage(off))]
mod tests {
use super::*;
#[test]
fn infer_marking_type_portion_on_leading_paren() {
assert_eq!(infer_marking_type(b"(TS//SI)"), Some(MarkingType::Portion));
assert_eq!(infer_marking_type(b" (S//NF)"), Some(MarkingType::Portion));
}
#[test]
fn infer_marking_type_cab_on_authority_head() {
assert_eq!(
infer_marking_type(b"Classified By: X\nDerived From: Y"),
Some(MarkingType::Cab)
);
assert_eq!(
infer_marking_type(b"Declassify On: 20350101"),
Some(MarkingType::Cab)
);
}
#[test]
fn infer_marking_type_bare_reason_prefix_is_not_cab() {
assert_eq!(
infer_marking_type(b"Reason: 1.4(c)"),
Some(MarkingType::Banner),
);
}
#[test]
fn infer_marking_type_banner_otherwise() {
assert_eq!(
infer_marking_type(b"TOP SECRET//NOFORN"),
Some(MarkingType::Banner)
);
}
#[test]
fn infer_marking_type_empty_input_returns_none() {
assert_eq!(infer_marking_type(b""), None);
assert_eq!(infer_marking_type(b" "), None);
}
#[test]
fn strict_recognizer_resolves_portion_unambiguously() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"(S//NF)", &cx) {
Parsed::Unambiguous(_) => {}
other => panic!("expected Unambiguous, got {other:?}"),
}
}
#[test]
fn strict_recognizer_rejects_bare_restricted_portion() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"(R)", &cx) {
Parsed::Ambiguous { candidates } => assert!(
candidates.is_empty(),
"bare (R) must be zero-candidate, got {} candidates",
candidates.len()
),
Parsed::Unambiguous(m) => panic!(
"bare (R) must be rejected, got Unambiguous({:?})",
m.0.classification
),
}
}
#[test]
fn strict_recognizer_rejects_restricted_with_dissem_only() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"(R//NF)", &cx) {
Parsed::Ambiguous { candidates } => assert!(
candidates.is_empty(),
"(R//NF) must be zero-candidate, got {} candidates",
candidates.len()
),
Parsed::Unambiguous(m) => panic!(
"(R//NF) must be rejected — `Us(Restricted)` with dissem \
control but no FGI marker is invalid; got Unambiguous({:?})",
m.0.classification
),
}
}
#[test]
fn strict_recognizer_rejects_restricted_with_rel_to_only() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"R//USA, GBR", &cx) {
Parsed::Ambiguous { candidates } => assert!(
candidates.is_empty(),
"R//USA, GBR must be zero-candidate, got {} candidates",
candidates.len()
),
Parsed::Unambiguous(m) => panic!(
"R//USA, GBR must be rejected — banner-shape \
`Us(Restricted)` with REL TO but no FGI marker is \
invalid; got Unambiguous({:?})",
m.0.classification
),
}
}
#[test]
fn strict_recognizer_rejects_us_restricted_with_fgi_marker() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"RESTRICTED//FGI DEU//NOFORN", &cx) {
Parsed::Ambiguous { candidates } => assert!(
candidates.is_empty(),
"RESTRICTED//FGI DEU//NOFORN must be zero-candidate, \
got {} candidates",
candidates.len()
),
Parsed::Unambiguous(m) => panic!(
"RESTRICTED//FGI DEU//NOFORN must be rejected — an FGI \
marker block does not redeem a Us(Restricted) \
classification; got Unambiguous({:?}, fgi_marker={:?})",
m.0.classification, m.0.fgi_marker
),
}
}
#[test]
fn strict_recognizer_accepts_fgi_axis_restricted() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"(//FGI R//NF)", &cx) {
Parsed::Unambiguous(m) => {
assert!(
!is_us_restricted(&m),
"FGI-axis RESTRICTED must not match the bare-`Us(Restricted)` predicate; \
classification = {:?}",
m.0.classification,
);
}
other => panic!("expected Unambiguous for `(//FGI R//NF)`, got {other:?}"),
}
}
#[test]
fn is_us_restricted_distinguishes_us_secret() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
let Parsed::Unambiguous(m) = rx.recognize(b"(S)", &cx) else {
panic!("(S) must parse to a SECRET portion");
};
assert!(
!is_us_restricted(&m),
"Us(Secret) must not match the bare-RESTRICTED predicate",
);
}
#[test]
fn strict_recognizer_returns_zero_candidate_on_parse_failure() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
match rx.recognize(b"(S//NF", &cx) {
Parsed::Ambiguous { candidates } => assert!(candidates.is_empty()),
other => panic!("expected zero-candidate Ambiguous, got {other:?}"),
}
}
#[test]
fn shift_token_spans_is_identity_for_zero_delta() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
let Parsed::Unambiguous(mut marking) = rx.recognize(b"(S//NF)", &cx) else {
panic!("strict parse should succeed");
};
let before: Vec<Span> = marking.0.token_spans.iter().map(|t| t.span).collect();
shift_token_spans(&mut marking.0, 0);
let after: Vec<Span> = marking.0.token_spans.iter().map(|t| t.span).collect();
assert_eq!(before, after);
}
#[test]
fn shift_token_spans_shifts_by_delta() {
let rx = StrictRecognizer::new();
let cx = ParseContext::default();
let Parsed::Unambiguous(mut marking) = rx.recognize(b"(S//NF)", &cx) else {
panic!("strict parse should succeed");
};
let before: Vec<(usize, usize)> = marking
.0
.token_spans
.iter()
.map(|t| (t.span.start, t.span.end))
.collect();
shift_token_spans(&mut marking.0, 100);
let after: Vec<(usize, usize)> = marking
.0
.token_spans
.iter()
.map(|t| (t.span.start, t.span.end))
.collect();
for (b, a) in before.iter().zip(after.iter()) {
assert_eq!(a.0, b.0 + 100);
assert_eq!(a.1, b.1 + 100);
}
}
#[test]
fn strict_recognizer_is_send_sync_as_trait_object() {
fn assert_send_sync<T: Send + Sync + ?Sized>() {}
assert_send_sync::<StrictRecognizer>();
assert_send_sync::<std::sync::Arc<dyn Recognizer<CapcoScheme>>>();
assert_send_sync::<Box<dyn Recognizer<CapcoScheme>>>();
}
}