use crate::error::CoreError;
use marque_ism::attrs::{
AeaMarking, Classification, CountryCode, DeclassExemption, DissemControl, FgiClassification,
FgiMarker, ForeignClassification, IsmAttributes, JointClassification, MarkingClassification,
NatoClassification, NonIcDissem, SarCompartment, SarIndicator, SarMarking, SarProgram,
SciCompartment, SciControl, SciControlBare, SciControlSystem, SciMarking, TokenKind, TokenSpan,
};
use marque_ism::date::IsmDate;
use marque_ism::is_bare_cve_value;
use marque_ism::span::{MarkingCandidate, MarkingType, Span};
use marque_ism::token_set::TokenSet;
use std::str::FromStr;
#[derive(Debug)]
pub struct ParsedMarking {
pub attrs: IsmAttributes,
pub source_span: Span,
pub kind: MarkingType,
}
pub struct Parser<'t> {
tokens: &'t dyn TokenSet,
}
impl<'t> Parser<'t> {
pub fn new(tokens: &'t dyn TokenSet) -> Self {
Self { tokens }
}
pub fn parse(
&self,
candidate: &MarkingCandidate,
source: &[u8],
) -> Result<ParsedMarking, CoreError> {
let text = candidate
.span
.as_str(source)
.map_err(|_| CoreError::InvalidUtf8(candidate.span))?;
match candidate.kind {
MarkingType::Portion => self.parse_portion(text, candidate),
MarkingType::Banner => self.parse_banner(text, candidate),
MarkingType::Cab => self.parse_cab(text, candidate),
MarkingType::PageBreak => Err(CoreError::MalformedMarking(
"page-break candidate must not be parsed".to_owned(),
)),
}
}
fn parse_portion(
&self,
text: &str,
candidate: &MarkingCandidate,
) -> Result<ParsedMarking, CoreError> {
let inner = text
.strip_prefix('(')
.and_then(|s| s.strip_suffix(')'))
.ok_or_else(|| CoreError::MalformedMarking(text.to_owned()))?;
let attrs =
self.parse_marking_string(inner, MarkingType::Portion, candidate.span.start + 1)?;
Ok(ParsedMarking {
attrs,
source_span: candidate.span,
kind: MarkingType::Portion,
})
}
fn parse_banner(
&self,
text: &str,
candidate: &MarkingCandidate,
) -> Result<ParsedMarking, CoreError> {
let trimmed = text.trim_start();
let lead_ws = text.len() - trimmed.len();
let trimmed = trimmed.trim_end();
let attrs = self.parse_marking_string(
trimmed,
MarkingType::Banner,
candidate.span.start + lead_ws,
)?;
Ok(ParsedMarking {
attrs,
source_span: candidate.span,
kind: MarkingType::Banner,
})
}
fn parse_cab(
&self,
text: &str,
candidate: &MarkingCandidate,
) -> Result<ParsedMarking, CoreError> {
let mut attrs = IsmAttributes::default();
for line in text.lines() {
if let Some(val) = line.strip_prefix("Classified By:") {
attrs.classified_by = Some(val.trim().into());
} else if let Some(val) = line.strip_prefix("Derived From:") {
attrs.derived_from = Some(val.trim().into());
} else if let Some(val) = line.strip_prefix("Declassify On:") {
let s = val.trim();
if let Some(exemption) = DeclassExemption::parse(s) {
attrs.declass_exemption = Some(exemption);
} else {
attrs.declassify_on = IsmDate::from_str(s).ok();
}
}
}
Ok(ParsedMarking {
attrs,
source_span: candidate.span,
kind: MarkingType::Cab,
})
}
fn parse_marking_string(
&self,
s: &str,
context: MarkingType,
s_offset: usize,
) -> Result<IsmAttributes, CoreError> {
let mut attrs = IsmAttributes::default();
if s.is_empty() {
return Err(CoreError::MalformedMarking(s.to_owned()));
}
let separators: Vec<usize> = s.match_indices("//").map(|(i, _)| i).collect();
let mut block_ranges: Vec<(usize, usize)> = Vec::with_capacity(separators.len() + 1);
let mut prev_end = 0usize;
for &sep_start in &separators {
block_ranges.push((prev_end, sep_start));
prev_end = sep_start + 2; }
block_ranges.push((prev_end, s.len()));
let mut token_spans: Vec<TokenSpan> = Vec::new();
let mut sci: Vec<SciControl> = Vec::new();
let mut sci_markings: Vec<SciMarking> = Vec::new();
let mut sar_captured = false;
let mut aea: Vec<AeaMarking> = Vec::new();
let mut dissem: Vec<DissemControl> = Vec::new();
let mut non_ic: Vec<NonIcDissem> = Vec::new();
let mut rel_to: Vec<CountryCode> = Vec::new();
let is_non_us = s.starts_with("//");
for (idx, &(rel_start, rel_end)) in block_ranges.iter().enumerate() {
let raw = &s[rel_start..rel_end];
let trimmed = raw.trim();
if trimmed.is_empty() {
continue;
}
let trim_lead = raw.len() - raw.trim_start().len();
let abs_start = s_offset + rel_start + trim_lead;
let abs_end = abs_start + trimmed.len();
let span = Span::new(abs_start, abs_end);
if idx == 0 && !is_non_us {
attrs.classification = parse_classification(trimmed).map(MarkingClassification::Us);
token_spans.push(TokenSpan {
kind: TokenKind::Classification,
span,
text: trimmed.into(),
});
continue;
}
if idx == 1 && is_non_us {
if let Some(nato) = parse_nato_classification(trimmed) {
attrs.classification = Some(MarkingClassification::Nato(nato));
} else if let Some(joint) = parse_joint_classification(trimmed) {
attrs.classification = Some(MarkingClassification::Joint(joint));
} else if let Some(fgi) = parse_fgi_classification(trimmed) {
attrs.classification = Some(MarkingClassification::Fgi(fgi));
} else {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span,
text: trimmed.into(),
});
continue;
}
token_spans.push(TokenSpan {
kind: TokenKind::Classification,
span,
text: trimmed.into(),
});
continue;
}
if trimmed.starts_with("SAR-") || trimmed.starts_with("SPECIAL ACCESS REQUIRED-") {
if sar_captured {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span,
text: trimmed.into(),
});
continue;
}
if let Some((marking, sar_spans)) = parse_sar_category(trimmed, abs_start) {
attrs.sar_markings = Some(marking);
token_spans.extend(sar_spans);
sar_captured = true;
continue;
}
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span,
text: trimmed.into(),
});
continue;
}
if trimmed.starts_with("REL TO") || trimmed.starts_with("REL ") {
token_spans.push(TokenSpan {
kind: TokenKind::RelToBlock,
span,
text: trimmed.into(),
});
let parsed =
parse_rel_to_with_spans(trimmed, abs_start, self.tokens, &mut token_spans);
rel_to.extend(parsed.countries);
dissem.extend(parsed.trailing_dissem);
non_ic.extend(parsed.trailing_non_ic);
} else if (trimmed.contains('-')
|| trimmed.contains('/')
|| is_bare_cve_value(trimmed)
|| (is_valid_custom_control(trimmed)
&& trimmed.bytes().any(|b| b.is_ascii_digit())
&& !is_known_non_sci_token(trimmed)
&& !is_declass_date(trimmed)))
&& let Some(markings) = parse_sci_block(trimmed, abs_start, &mut token_spans)
{
for marking in &markings {
if let Some(ctrl) = marking.canonical_enum {
sci.push(ctrl);
}
}
sci_markings.extend(markings);
} else if let Some(ctrl) = SciControl::parse(trimmed) {
sci.push(ctrl);
token_spans.push(TokenSpan {
kind: TokenKind::SciControl,
span,
text: trimmed.into(),
});
} else if trimmed.starts_with("FGI")
&& matches!(attrs.classification, Some(MarkingClassification::Us(_)))
{
if let Some(marker) = parse_fgi_marker(trimmed) {
attrs.fgi_marker = Some(marker);
token_spans.push(TokenSpan {
kind: TokenKind::FgiMarker,
span,
text: trimmed.into(),
});
}
} else if let Some(ctrl) =
DissemControl::parse(trimmed).or_else(|| parse_dissem_full_form(trimmed))
{
dissem.push(ctrl);
token_spans.push(TokenSpan {
kind: TokenKind::DissemControl,
span,
text: trimmed.into(),
});
} else if let Some(nic) = parse_non_ic_full_form(trimmed) {
non_ic.push(nic);
token_spans.push(TokenSpan {
kind: TokenKind::NonIcDissem,
span,
text: trimmed.into(),
});
} else if let Some(aea_marking) = AeaMarking::parse(trimmed) {
aea.push(aea_marking);
token_spans.push(TokenSpan {
kind: TokenKind::AeaMarking,
span,
text: trimmed.into(),
});
} else if let Some(exemption) = DeclassExemption::parse(trimmed) {
attrs.declass_exemption = Some(exemption);
token_spans.push(TokenSpan {
kind: TokenKind::DeclassExemption,
span,
text: trimmed.into(),
});
} else if is_declass_date(trimmed) {
attrs.declassify_on = IsmDate::from_str(trimmed).ok();
token_spans.push(TokenSpan {
kind: TokenKind::DeclassDate,
span,
text: trimmed.into(),
});
} else if let Some(foreign) = try_parse_foreign_classification(trimmed) {
if let Some(MarkingClassification::Us(us_level)) = attrs.classification {
let foreign_equiv = match &foreign {
ForeignClassification::Nato(n) => n.us_equivalent(),
ForeignClassification::Fgi(f) => f.level,
ForeignClassification::Joint(j) => j.level,
};
let max_level = us_level.max(foreign_equiv);
attrs.classification = Some(MarkingClassification::Conflict {
us: max_level,
foreign: Box::new(foreign),
});
token_spans.push(TokenSpan {
kind: TokenKind::Classification,
span,
text: trimmed.into(),
});
} else {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span,
text: trimmed.into(),
});
}
} else if trimmed.contains('/') && !trimmed.starts_with("REL") {
#[derive(Clone, Copy, PartialEq, Eq)]
enum SubKind {
Sci,
Dissem,
NonIc,
Aea,
Unknown,
}
struct SubResult<'a> {
kind: SubKind,
tok: &'a str,
span: Span,
sci: Option<SciControl>,
dissem: Option<DissemControl>,
nic: Option<NonIcDissem>,
aea: Option<AeaMarking>,
}
let mut results: Vec<SubResult<'_>> = Vec::new();
for (sub_off, sub_tok) in split_slash_with_offsets(trimmed) {
let sub_abs_start = abs_start + sub_off;
let sub_span = Span::new(sub_abs_start, sub_abs_start + sub_tok.len());
if let Some(ctrl) = SciControl::parse(sub_tok) {
results.push(SubResult {
kind: SubKind::Sci,
tok: sub_tok,
span: sub_span,
sci: Some(ctrl),
dissem: None,
nic: None,
aea: None,
});
} else if let Some(ctrl) =
DissemControl::parse(sub_tok).or_else(|| parse_dissem_full_form(sub_tok))
{
results.push(SubResult {
kind: SubKind::Dissem,
tok: sub_tok,
span: sub_span,
sci: None,
dissem: Some(ctrl),
nic: None,
aea: None,
});
} else if let Some(nic) = parse_non_ic_full_form(sub_tok) {
results.push(SubResult {
kind: SubKind::NonIc,
tok: sub_tok,
span: sub_span,
sci: None,
dissem: None,
nic: Some(nic),
aea: None,
});
} else if let Some(aea_marking) = AeaMarking::parse(sub_tok) {
results.push(SubResult {
kind: SubKind::Aea,
tok: sub_tok,
span: sub_span,
sci: None,
dissem: None,
nic: None,
aea: Some(aea_marking),
});
} else {
results.push(SubResult {
kind: SubKind::Unknown,
tok: sub_tok,
span: sub_span,
sci: None,
dissem: None,
nic: None,
aea: None,
});
}
}
let first_parsed_kind = results
.iter()
.find(|r| r.kind != SubKind::Unknown)
.map(|r| r.kind);
let all_same_category = first_parsed_kind.is_some_and(|first| {
results
.iter()
.filter(|r| r.kind != SubKind::Unknown)
.all(|r| r.kind == first)
});
if first_parsed_kind.is_some() && !all_same_category {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span,
text: trimmed.into(),
});
} else {
for r in results {
match r.kind {
SubKind::Sci => {
sci.push(r.sci.unwrap());
token_spans.push(TokenSpan {
kind: TokenKind::SciControl,
span: r.span,
text: r.tok.into(),
});
}
SubKind::Dissem => {
dissem.push(r.dissem.unwrap());
token_spans.push(TokenSpan {
kind: TokenKind::DissemControl,
span: r.span,
text: r.tok.into(),
});
}
SubKind::NonIc => {
non_ic.push(r.nic.unwrap());
token_spans.push(TokenSpan {
kind: TokenKind::NonIcDissem,
span: r.span,
text: r.tok.into(),
});
}
SubKind::Aea => {
aea.push(r.aea.unwrap());
token_spans.push(TokenSpan {
kind: TokenKind::AeaMarking,
span: r.span,
text: r.tok.into(),
});
}
SubKind::Unknown => {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span: r.span,
text: r.tok.into(),
});
}
}
}
}
} else {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span,
text: trimmed.into(),
});
}
}
attrs.sci_controls = sci.into_boxed_slice();
attrs.sci_markings = sci_markings.into_boxed_slice();
attrs.aea_markings = aea.into_boxed_slice();
attrs.dissem_controls = dissem.into_boxed_slice();
attrs.non_ic_dissem = non_ic.into_boxed_slice();
attrs.rel_to = rel_to.into_boxed_slice();
for &sep_start in &separators {
token_spans.push(TokenSpan {
kind: TokenKind::Separator,
span: Span::new(s_offset + sep_start, s_offset + sep_start + 2),
text: "//".into(),
});
}
token_spans.sort_unstable_by_key(|ts| ts.span.start);
attrs.token_spans = token_spans.into_boxed_slice();
let _ = context;
Ok(attrs)
}
}
fn parse_classification(s: &str) -> Option<Classification> {
match s {
"TS" | "TOP SECRET" => Some(Classification::TopSecret),
"S" | "SECRET" => Some(Classification::Secret),
"C" | "CONFIDENTIAL" => Some(Classification::Confidential),
"R" | "RESTRICTED" => Some(Classification::Restricted),
"U" | "UNCLASSIFIED" => Some(Classification::Unclassified),
_ => None,
}
}
fn parse_sci_block(
text: &str,
base: usize,
tokens: &mut Vec<TokenSpan>,
) -> Option<Vec<SciMarking>> {
if text.is_empty() {
return None;
}
let mut local_tokens: Vec<TokenSpan> = Vec::new();
let mut markings: Vec<SciMarking> = Vec::new();
let mut chunk_start = 0usize;
let chunks: Vec<(usize, &str)> = {
let mut v = Vec::new();
for (i, ch) in text.char_indices() {
if ch == '/' {
v.push((chunk_start, &text[chunk_start..i]));
chunk_start = i + 1;
}
}
v.push((chunk_start, &text[chunk_start..]));
v
};
for (chunk_off, chunk) in chunks {
if chunk.is_empty() {
return None;
}
if chunk.starts_with('-') {
return None;
}
let (ctrl_str, rest_opt) = match chunk.find('-') {
Some(i) => (&chunk[..i], Some(&chunk[i + 1..])),
None => (chunk, None),
};
if ctrl_str.is_empty() {
return None;
}
let system: SciControlSystem = if let Some(bare) = SciControlBare::parse(ctrl_str) {
SciControlSystem::Published(bare)
} else if is_valid_custom_control(ctrl_str) && !is_known_non_sci_token(ctrl_str) {
SciControlSystem::Custom(ctrl_str.into())
} else {
return None;
};
let chunk_abs = base + chunk_off;
local_tokens.push(TokenSpan {
kind: TokenKind::SciControl,
span: Span::new(chunk_abs, chunk_abs + chunk.len()),
text: chunk.into(),
});
let ctrl_abs = base + chunk_off;
local_tokens.push(TokenSpan {
kind: TokenKind::SciSystem,
span: Span::new(ctrl_abs, ctrl_abs + ctrl_str.len()),
text: ctrl_str.into(),
});
let mut compartments: Vec<SciCompartment> = Vec::new();
if let Some(rest) = rest_opt {
let rest_abs_base = base + chunk_off + ctrl_str.len() + 1; let mut seg_start = 0usize;
let mut seg_offs: Vec<(usize, &str)> = Vec::new();
for (i, ch) in rest.char_indices() {
if ch == '-' {
seg_offs.push((seg_start, &rest[seg_start..i]));
seg_start = i + 1;
}
}
seg_offs.push((seg_start, &rest[seg_start..]));
for (seg_off, seg) in seg_offs {
if seg.is_empty() {
return None; }
let mut parts = seg.split(' ');
let comp_id = parts.next().unwrap(); if comp_id.is_empty() || !is_alnum_upper(comp_id) {
return None;
}
let comp_abs = rest_abs_base + seg_off;
local_tokens.push(TokenSpan {
kind: TokenKind::SciCompartment,
span: Span::new(comp_abs, comp_abs + comp_id.len()),
text: comp_id.into(),
});
let mut subs: Vec<Box<str>> = Vec::new();
let mut sub_cursor = comp_id.len() + 1; for sub in parts {
if sub.is_empty() || !is_alnum_upper(sub) {
return None;
}
let sub_abs = rest_abs_base + seg_off + sub_cursor;
local_tokens.push(TokenSpan {
kind: TokenKind::SciSubCompartment,
span: Span::new(sub_abs, sub_abs + sub.len()),
text: sub.into(),
});
subs.push(sub.into());
sub_cursor += sub.len() + 1;
}
compartments.push(SciCompartment::new(comp_id.into(), subs.into_boxed_slice()));
}
}
let canonical_enum = if compartments.is_empty() {
SciControl::parse(ctrl_str)
} else {
compartments
.first()
.filter(|c| c.sub_compartments.is_empty())
.and_then(|c| {
let composite = format!("{}-{}", ctrl_str, c.identifier);
SciControl::parse(&composite)
})
};
markings.push(SciMarking::new(
system,
compartments.into_boxed_slice(),
canonical_enum,
));
}
tokens.extend(local_tokens);
Some(markings)
}
fn is_valid_custom_control(s: &str) -> bool {
let len = s.len();
(2..=5).contains(&len) && is_alnum_upper(s)
}
fn is_alnum_upper(s: &str) -> bool {
!s.is_empty()
&& s.bytes()
.all(|b| b.is_ascii_uppercase() || b.is_ascii_digit())
}
fn is_known_non_sci_token(s: &str) -> bool {
DissemControl::parse(s).is_some()
|| parse_dissem_full_form(s).is_some()
|| parse_non_ic_full_form(s).is_some()
|| AeaMarking::parse(s).is_some()
|| DeclassExemption::parse(s).is_some()
}
fn parse_nato_classification(s: &str) -> Option<NatoClassification> {
match s {
"COSMIC TOP SECRET ATOMAL" => Some(NatoClassification::CosmicTopSecretAtomal),
"COSMIC TOP SECRET-BOHEMIA" => Some(NatoClassification::CosmicTopSecretBohemia),
"COSMIC TOP SECRET-BALK" => Some(NatoClassification::CosmicTopSecretBalk),
"COSMIC TOP SECRET" => Some(NatoClassification::CosmicTopSecret),
"NATO SECRET ATOMAL" => Some(NatoClassification::NatoSecretAtomal),
"NATO SECRET" => Some(NatoClassification::NatoSecret),
"NATO CONFIDENTIAL ATOMAL" => Some(NatoClassification::NatoConfidentialAtomal),
"NATO CONFIDENTIAL" => Some(NatoClassification::NatoConfidential),
"NATO RESTRICTED" => Some(NatoClassification::NatoRestricted),
"NATO UNCLASSIFIED" => Some(NatoClassification::NatoUnclassified),
"CTSA" | "CTS-A" => Some(NatoClassification::CosmicTopSecretAtomal),
"CTS-B" => Some(NatoClassification::CosmicTopSecretBohemia),
"CTS-BALK" => Some(NatoClassification::CosmicTopSecretBalk),
"CTS" => Some(NatoClassification::CosmicTopSecret),
"NSAT" | "NS-A" => Some(NatoClassification::NatoSecretAtomal),
"NS" => Some(NatoClassification::NatoSecret),
"NCA" | "NC-A" => Some(NatoClassification::NatoConfidentialAtomal),
"NC" => Some(NatoClassification::NatoConfidential),
"NR" => Some(NatoClassification::NatoRestricted),
"NU" => Some(NatoClassification::NatoUnclassified),
_ => None,
}
}
fn parse_joint_classification(s: &str) -> Option<JointClassification> {
let rest = s.strip_prefix("JOINT ")?;
let mut tokens = rest.split_whitespace();
let first = tokens.next()?;
let (level, remaining_start) = if first == "TOP" {
let mut peek_tokens = rest.split_whitespace();
peek_tokens.next(); if peek_tokens.next() == Some("SECRET") {
let level = parse_classification("TOP SECRET")?;
let after_ts = rest.find("SECRET").map(|i| i + "SECRET".len())?;
(level, after_ts)
} else {
return None; }
} else {
let level = parse_classification(first)?;
let after_level = rest.find(first).map(|i| i + first.len())?;
(level, after_level)
};
let country_str = rest[remaining_start..].trim();
let mut countries = Vec::new();
for token in country_str.split_whitespace() {
if token.len() == 3 {
if let Some(t) = CountryCode::try_new(token.as_bytes()) {
countries.push(t);
}
}
}
if countries.is_empty() {
return None; }
Some(JointClassification {
level,
countries: countries.into(),
})
}
fn parse_fgi_classification(s: &str) -> Option<FgiClassification> {
let tokens: Vec<&str> = s.split_whitespace().collect();
if tokens.len() < 2 {
return None; }
let (level, country_end) = if tokens.len() >= 3
&& tokens[tokens.len() - 2] == "TOP"
&& tokens[tokens.len() - 1] == "SECRET"
{
(parse_classification("TOP SECRET")?, tokens.len() - 2)
} else {
(
parse_classification(tokens[tokens.len() - 1])?,
tokens.len() - 1,
)
};
let mut countries = Vec::new();
for &token in &tokens[..country_end] {
if token == "FGI" {
continue;
}
if token.len() == 3 {
let t = CountryCode::try_new(token.as_bytes())?;
countries.push(t);
} else {
return None; }
}
Some(FgiClassification {
countries: countries.into(),
level,
})
}
fn parse_fgi_marker(s: &str) -> Option<FgiMarker> {
if s == "FGI" {
return Some(FgiMarker {
countries: Box::new([]),
});
}
let rest = s.strip_prefix("FGI ")?;
let mut countries = Vec::new();
for token in rest.split_whitespace() {
if token.len() == 3 {
if let Some(t) = CountryCode::try_new(token.as_bytes()) {
countries.push(t);
}
}
}
Some(FgiMarker {
countries: countries.into(),
})
}
fn try_parse_foreign_classification(s: &str) -> Option<ForeignClassification> {
if let Some(nato) = parse_nato_classification(s) {
Some(ForeignClassification::Nato(nato))
} else if let Some(joint) = parse_joint_classification(s) {
Some(ForeignClassification::Joint(joint))
} else {
parse_fgi_classification(s).map(ForeignClassification::Fgi)
}
}
fn parse_dissem_full_form(s: &str) -> Option<DissemControl> {
let portion = marque_ism::marking_forms::banner_to_portion(s)
.or_else(|| marque_ism::marking_forms::title_to_portion(s))?;
DissemControl::parse(portion)
}
fn parse_non_ic_full_form(s: &str) -> Option<NonIcDissem> {
NonIcDissem::parse(s).or_else(|| {
let portion = marque_ism::marking_forms::title_to_portion(s)?;
NonIcDissem::parse(portion)
})
}
struct RelToParseResult {
countries: Vec<CountryCode>,
trailing_dissem: Vec<DissemControl>,
trailing_non_ic: Vec<NonIcDissem>,
}
fn parse_rel_to_with_spans(
block: &str,
block_offset: usize,
tokens: &dyn TokenSet,
token_spans: &mut Vec<TokenSpan>,
) -> RelToParseResult {
let prefix_skip = if let Some(rest) = block.strip_prefix("REL TO") {
block.len() - rest.len()
} else if let Some(rest) = block.strip_prefix("REL") {
block.len() - rest.len()
} else {
0
};
let after_rel = &block[prefix_skip..];
let mut countries: Vec<CountryCode> = Vec::new();
let mut trailing_dissem: Vec<DissemControl> = Vec::new();
let mut trailing_non_ic: Vec<NonIcDissem> = Vec::new();
let mut cursor = 0usize;
for entry in after_rel.split(',') {
let entry_start_in_after = cursor;
cursor += entry.len() + 1;
let trim_lead = entry.len() - entry.trim_start().len();
let trimmed = entry.trim();
if trimmed.is_empty() {
continue;
}
let abs_start = block_offset + prefix_skip + entry_start_in_after + trim_lead;
if let Some(slash_pos) = trimmed.find('/') {
let country_part = trimmed[..slash_pos].trim();
let tail = trimmed[slash_pos + 1..].trim();
if !country_part.is_empty() {
if tokens.is_trigraph(country_part) {
if let Some(t) = CountryCode::try_new(country_part.as_bytes()) {
countries.push(t);
token_spans.push(TokenSpan {
kind: TokenKind::RelToTrigraph,
span: Span::new(abs_start, abs_start + country_part.len()),
text: country_part.into(),
});
}
} else {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span: Span::new(abs_start, abs_start + country_part.len()),
text: country_part.into(),
});
}
}
let tail_base = abs_start + slash_pos + 1;
let mut tail_cursor = 0usize;
for part in tail.split('/') {
let part_trim_lead = part.len() - part.trim_start().len();
let part = part.trim();
let part_abs = tail_base + tail_cursor + part_trim_lead;
tail_cursor += part.len() + part_trim_lead + 1; if part.is_empty() {
continue;
}
if let Some(ctrl) =
DissemControl::parse(part).or_else(|| parse_dissem_full_form(part))
{
trailing_dissem.push(ctrl);
token_spans.push(TokenSpan {
kind: TokenKind::DissemControl,
span: Span::new(part_abs, part_abs + part.len()),
text: part.into(),
});
} else if let Some(nic) = parse_non_ic_full_form(part) {
trailing_non_ic.push(nic);
token_spans.push(TokenSpan {
kind: TokenKind::NonIcDissem,
span: Span::new(part_abs, part_abs + part.len()),
text: part.into(),
});
} else {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span: Span::new(part_abs, part_abs + part.len()),
text: part.into(),
});
}
}
continue;
}
if !tokens.is_trigraph(trimmed) {
token_spans.push(TokenSpan {
kind: TokenKind::Unknown,
span: Span::new(abs_start, abs_start + trimmed.len()),
text: trimmed.into(),
});
continue;
}
let Some(t) = CountryCode::try_new(trimmed.as_bytes()) else {
continue;
};
countries.push(t);
token_spans.push(TokenSpan {
kind: TokenKind::RelToTrigraph,
span: Span::new(abs_start, abs_start + trimmed.len()),
text: trimmed.into(),
});
}
RelToParseResult {
countries,
trailing_dissem,
trailing_non_ic,
}
}
fn is_declass_date(s: &str) -> bool {
let bytes = s.as_bytes();
if !matches!(bytes.len(), 4 | 8) || !bytes.iter().all(u8::is_ascii_digit) {
return false;
}
IsmDate::from_str(s).is_ok()
}
fn split_slash_with_offsets(s: &str) -> Vec<(usize, &str)> {
let mut result = Vec::new();
let mut pos = 0usize;
for part in s.split('/') {
let trim_lead = part.len() - part.trim_start().len();
let trimmed = part.trim();
if !trimmed.is_empty() {
result.push((pos + trim_lead, trimmed));
}
pos += part.len() + 1; }
result
}
fn parse_sar_category(block_text: &str, base: usize) -> Option<(SarMarking, Vec<TokenSpan>)> {
if block_text.contains("//") {
return None;
}
let (indicator, indicator_lit) = if block_text.starts_with("SPECIAL ACCESS REQUIRED-") {
(SarIndicator::Full, "SPECIAL ACCESS REQUIRED-")
} else if block_text.starts_with("SAR-") {
(SarIndicator::Abbrev, "SAR-")
} else {
return None;
};
let rest_offset = indicator_lit.len();
let rest = &block_text[rest_offset..];
if rest.is_empty() {
return None;
}
let mut spans: Vec<TokenSpan> = Vec::new();
spans.push(TokenSpan {
kind: TokenKind::SarIndicator,
span: Span::new(base, base + indicator_lit.len()),
text: indicator_lit.into(),
});
let mut programs: Vec<SarProgram> = Vec::new();
let mut chunk_offset = rest_offset; for (i, prog_chunk) in rest.split('/').enumerate() {
if i > 0 {
chunk_offset += 1; }
let program_base = base + chunk_offset;
let program = parse_sar_program(prog_chunk, program_base, indicator, &mut spans)?;
programs.push(program);
chunk_offset += prog_chunk.len();
}
if programs.is_empty() {
return None;
}
Some((
SarMarking::new(indicator, programs.into_boxed_slice()),
spans,
))
}
fn parse_sar_program(
chunk: &str,
base: usize,
indicator: SarIndicator,
spans: &mut Vec<TokenSpan>,
) -> Option<SarProgram> {
if chunk.is_empty() {
return None;
}
let mut segments = split_with_offsets(chunk, '-');
if segments.is_empty() {
return None;
}
let (prog_off, prog_id) = segments.remove(0);
if prog_id.is_empty() {
return None;
}
let prog_shape_ok = match indicator {
SarIndicator::Abbrev => {
(2..=3).contains(&prog_id.len()) && prog_id.bytes().all(|b| b.is_ascii_alphanumeric())
}
SarIndicator::Full => {
prog_id.bytes().all(|b| b == b' ' || b.is_ascii_uppercase())
&& prog_id.bytes().any(|b| b != b' ')
}
};
if !prog_shape_ok {
return None;
}
spans.push(TokenSpan {
kind: TokenKind::SarProgram,
span: Span::new(base + prog_off, base + prog_off + prog_id.len()),
text: prog_id.into(),
});
let mut compartments: Vec<SarCompartment> = Vec::with_capacity(segments.len());
for (seg_off, seg) in segments {
if seg.is_empty() {
return None;
}
let mut parts = split_with_offsets(seg, ' ');
let (comp_rel_off, comp_id) = parts.remove(0);
if comp_id.is_empty() || !comp_id.bytes().all(|b| b.is_ascii_alphanumeric()) {
return None;
}
let comp_abs_off = seg_off + comp_rel_off;
spans.push(TokenSpan {
kind: TokenKind::SarCompartment,
span: Span::new(base + comp_abs_off, base + comp_abs_off + comp_id.len()),
text: comp_id.into(),
});
let mut subs: Vec<Box<str>> = Vec::with_capacity(parts.len());
for (sub_rel_off, sub_id) in parts {
if sub_id.is_empty() || !sub_id.bytes().all(|b| b.is_ascii_alphanumeric()) {
return None;
}
let sub_abs_off = seg_off + sub_rel_off;
spans.push(TokenSpan {
kind: TokenKind::SarSubCompartment,
span: Span::new(base + sub_abs_off, base + sub_abs_off + sub_id.len()),
text: sub_id.into(),
});
subs.push(sub_id.into());
}
compartments.push(SarCompartment::new(comp_id.into(), subs.into_boxed_slice()));
}
Some(SarProgram::new(
prog_id.into(),
compartments.into_boxed_slice(),
))
}
fn split_with_offsets(s: &str, delim: char) -> Vec<(usize, &str)> {
let mut result = Vec::new();
let mut pos = 0usize;
let delim_len = delim.len_utf8();
for part in s.split(delim) {
result.push((pos, part));
pos += part.len() + delim_len;
}
result
}
#[cfg(test)]
#[cfg_attr(coverage_nightly, coverage(off))]
mod tests {
use super::*;
use marque_ism::span::{MarkingCandidate, MarkingType, Span};
use marque_ism::token_set::CapcoTokenSet;
fn make_candidate(text: &[u8], kind: MarkingType, offset: usize) -> MarkingCandidate {
MarkingCandidate {
span: Span::new(offset, offset + text.len()),
kind,
}
}
fn parse_banner(text: &str) -> ParsedMarking {
let source = text.as_bytes();
let tokens = CapcoTokenSet;
let parser = Parser::new(&tokens);
let candidate = make_candidate(source, MarkingType::Banner, 0);
parser
.parse(&candidate, source)
.expect("parse should succeed")
}
fn parse_portion(text: &str) -> ParsedMarking {
let source = text.as_bytes();
let tokens = CapcoTokenSet;
let parser = Parser::new(&tokens);
let candidate = make_candidate(source, MarkingType::Portion, 0);
parser
.parse(&candidate, source)
.expect("parse should succeed")
}
#[test]
fn banner_with_declass_exemption_populates_attrs() {
let parsed = parse_banner("SECRET//25X1//NOFORN");
assert!(
parsed.attrs.declass_exemption.is_some(),
"declass_exemption should be populated when 25X1 appears in banner"
);
use marque_ism::DeclassExemption;
assert_eq!(
parsed.attrs.declass_exemption,
Some(DeclassExemption::X25x1)
);
}
#[test]
fn portion_with_declass_exemption_populates_attrs() {
let parsed = parse_portion("(SECRET//50X1-HUM)");
assert!(parsed.attrs.declass_exemption.is_some());
}
#[test]
fn banner_with_declass_date_populates_attrs() {
let parsed = parse_banner("SECRET//20301231//NOFORN");
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Date(2030, 12, 31)),
"declassify_on should be populated when YYYYMMDD appears in banner"
);
}
#[test]
fn banner_with_four_digit_year_populates_attrs() {
let parsed = parse_banner("SECRET//2035");
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Year(2035))
);
}
#[test]
fn banner_without_declass_leaves_fields_none() {
let parsed = parse_banner("TOP SECRET//SI//NOFORN");
assert!(parsed.attrs.declassify_on.is_none());
assert!(parsed.attrs.declass_exemption.is_none());
}
#[test]
fn is_declass_date_accepts_yyyymmdd() {
assert!(is_declass_date("20301231"));
}
#[test]
fn is_declass_date_accepts_yyyy() {
assert!(is_declass_date("2035"));
}
#[test]
fn is_declass_date_rejects_non_digit() {
assert!(!is_declass_date("2030X231"));
assert!(!is_declass_date("YYYYMMDD"));
}
#[test]
fn is_declass_date_rejects_wrong_length() {
assert!(!is_declass_date("203012"));
assert!(!is_declass_date("203012311"));
}
#[test]
fn is_declass_date_rejects_impossible_calendar_dates() {
assert!(!is_declass_date("20301340"));
assert!(!is_declass_date("20300100"));
assert!(!is_declass_date("20030231"));
assert!(!is_declass_date("20030431"));
}
#[test]
fn token_spans_track_offsets_in_banner() {
let parsed = parse_banner("TOP SECRET//SI//NF");
let kinds: Vec<TokenKind> = parsed.attrs.token_spans.iter().map(|t| t.kind).collect();
assert!(kinds.contains(&TokenKind::Separator));
assert!(kinds.contains(&TokenKind::Classification));
assert!(kinds.contains(&TokenKind::SciControl));
assert!(kinds.contains(&TokenKind::DissemControl));
let src = b"TOP SECRET//SI//NF";
let cls = parsed
.attrs
.token_spans
.iter()
.find(|t| t.kind == TokenKind::Classification)
.unwrap();
assert_eq!(cls.span.as_str(src).unwrap(), "TOP SECRET");
let sci = parsed
.attrs
.token_spans
.iter()
.find(|t| t.kind == TokenKind::SciControl)
.unwrap();
assert_eq!(sci.span.as_str(src).unwrap(), "SI");
let dissem = parsed
.attrs
.token_spans
.iter()
.find(|t| t.kind == TokenKind::DissemControl)
.unwrap();
assert_eq!(dissem.span.as_str(src).unwrap(), "NF");
}
#[test]
fn token_spans_strip_paren_in_portion() {
let parsed = parse_portion("(SECRET//NF)");
let src = b"(SECRET//NF)";
let cls = parsed
.attrs
.token_spans
.iter()
.find(|t| t.kind == TokenKind::Classification)
.unwrap();
assert_eq!(cls.span.start, 1);
assert_eq!(cls.span.end, 7);
assert_eq!(cls.span.as_str(src).unwrap(), "SECRET");
let dissem = parsed
.attrs
.token_spans
.iter()
.find(|t| t.kind == TokenKind::DissemControl)
.unwrap();
assert_eq!(dissem.span.start, 9);
assert_eq!(dissem.span.end, 11);
}
#[test]
fn token_spans_record_unknown_token() {
let parsed = parse_banner("SECRET//XYZZY//NOFORN");
let unknowns: Vec<&TokenSpan> = parsed
.attrs
.token_spans
.iter()
.filter(|t| t.kind == TokenKind::Unknown)
.collect();
assert_eq!(unknowns.len(), 1);
assert_eq!(
unknowns[0].span.as_str(b"SECRET//XYZZY//NOFORN").unwrap(),
"XYZZY"
);
}
#[test]
fn token_spans_record_rel_to_trigraphs() {
let parsed = parse_banner("SECRET//REL TO USA, GBR, AUS");
let trigraphs: Vec<&TokenSpan> = parsed
.attrs
.token_spans
.iter()
.filter(|t| t.kind == TokenKind::RelToTrigraph)
.collect();
assert_eq!(trigraphs.len(), 3);
let src = b"SECRET//REL TO USA, GBR, AUS";
assert_eq!(trigraphs[0].span.as_str(src).unwrap(), "USA");
assert_eq!(trigraphs[1].span.as_str(src).unwrap(), "GBR");
assert_eq!(trigraphs[2].span.as_str(src).unwrap(), "AUS");
}
#[test]
fn rel_to_preserves_tetragraph_fvey() {
let parsed = parse_banner("SECRET//REL TO USA, FVEY, GBR");
let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
assert_eq!(
codes,
vec!["USA", "FVEY", "GBR"],
"FVEY tetragraph must land in rel_to (issue #183 silent-drop fix)"
);
}
#[test]
fn rel_to_preserves_opaque_tetragraph_nato() {
let parsed = parse_banner("SECRET//REL TO USA, NATO, GBR");
let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
assert_eq!(
codes,
vec!["USA", "NATO", "GBR"],
"NATO is in CVE TRIGRAPHS recognition set; rel_to must preserve it \
even though membership expansion is deferred to Phase F"
);
}
#[test]
fn rel_to_preserves_two_byte_eu() {
let parsed = parse_banner("SECRET//REL TO USA, EU");
let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
assert_eq!(
codes,
vec!["USA", "EU"],
"EU (2-byte CVE entry) must round-trip through the parser"
);
}
#[test]
fn rel_to_preserves_long_australia_group() {
let parsed = parse_banner("SECRET//REL TO USA, AUSTRALIA_GROUP");
let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
assert_eq!(
codes,
vec!["USA", "AUSTRALIA_GROUP"],
"AUSTRALIA_GROUP (15-byte CVE entry, contains underscore) \
must round-trip through the parser"
);
}
#[test]
fn rel_to_token_span_widens_to_actual_code_length() {
let parsed = parse_banner("SECRET//REL TO USA, FVEY, AUSTRALIA_GROUP");
let trigraph_spans: Vec<&TokenSpan> = parsed
.attrs
.token_spans
.iter()
.filter(|t| t.kind == TokenKind::RelToTrigraph)
.collect();
let src = b"SECRET//REL TO USA, FVEY, AUSTRALIA_GROUP";
assert_eq!(trigraph_spans[0].span.as_str(src).unwrap(), "USA");
assert_eq!(trigraph_spans[1].span.as_str(src).unwrap(), "FVEY");
assert_eq!(
trigraph_spans[2].span.as_str(src).unwrap(),
"AUSTRALIA_GROUP"
);
}
#[test]
fn rel_to_drops_unrecognized_token_silently() {
let parsed = parse_banner("SECRET//REL TO USA, XYZQ, GBR");
let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
assert_eq!(codes, vec!["USA", "GBR"]);
}
#[test]
fn token_spans_record_separators() {
let parsed = parse_banner("SECRET//NF");
let seps: Vec<&TokenSpan> = parsed
.attrs
.token_spans
.iter()
.filter(|t| t.kind == TokenKind::Separator)
.collect();
assert_eq!(seps.len(), 1);
let src = b"SECRET//NF";
assert_eq!(seps[0].span.as_str(src).unwrap(), "//");
}
#[test]
fn nato_banner_parses_all_variants() {
for (input, expected) in [
("//NATO UNCLASSIFIED", NatoClassification::NatoUnclassified),
("//NATO RESTRICTED", NatoClassification::NatoRestricted),
("//NATO CONFIDENTIAL", NatoClassification::NatoConfidential),
(
"//NATO CONFIDENTIAL ATOMAL",
NatoClassification::NatoConfidentialAtomal,
),
("//NATO SECRET", NatoClassification::NatoSecret),
("//NATO SECRET ATOMAL", NatoClassification::NatoSecretAtomal),
("//COSMIC TOP SECRET", NatoClassification::CosmicTopSecret),
(
"//COSMIC TOP SECRET ATOMAL",
NatoClassification::CosmicTopSecretAtomal,
),
(
"//COSMIC TOP SECRET-BOHEMIA",
NatoClassification::CosmicTopSecretBohemia,
),
(
"//COSMIC TOP SECRET-BALK",
NatoClassification::CosmicTopSecretBalk,
),
] {
let parsed = parse_banner(input);
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Nato(expected)),
"failed for banner: {input}"
);
}
}
#[test]
fn nato_portion_parses_all_variants() {
for (input, expected) in [
("(//NU)", NatoClassification::NatoUnclassified),
("(//NR)", NatoClassification::NatoRestricted),
("(//NC)", NatoClassification::NatoConfidential),
("(//NCA)", NatoClassification::NatoConfidentialAtomal),
("(//NC-A)", NatoClassification::NatoConfidentialAtomal),
("(//NS)", NatoClassification::NatoSecret),
("(//NSAT)", NatoClassification::NatoSecretAtomal),
("(//NS-A)", NatoClassification::NatoSecretAtomal),
("(//CTS)", NatoClassification::CosmicTopSecret),
("(//CTSA)", NatoClassification::CosmicTopSecretAtomal),
("(//CTS-A)", NatoClassification::CosmicTopSecretAtomal),
("(//CTS-B)", NatoClassification::CosmicTopSecretBohemia),
("(//CTS-BALK)", NatoClassification::CosmicTopSecretBalk),
] {
let parsed = parse_portion(input);
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Nato(expected)),
"failed for portion: {input}"
);
}
}
#[test]
fn nato_banner_with_rel_to() {
let parsed = parse_banner("//NATO SECRET//REL TO USA, GBR");
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Nato(NatoClassification::NatoSecret)),
);
assert_eq!(parsed.attrs.rel_to.len(), 2);
assert_eq!(parsed.attrs.rel_to[0], CountryCode::USA);
}
#[test]
fn joint_banner_parses_correctly() {
let parsed = parse_banner("//JOINT S USA GBR");
match &parsed.attrs.classification {
Some(MarkingClassification::Joint(j)) => {
assert_eq!(j.level, Classification::Secret);
assert_eq!(j.countries.len(), 2);
assert_eq!(j.countries[0], CountryCode::USA);
assert_eq!(j.countries[1].as_str(), "GBR");
}
other => panic!("expected Joint, got: {other:?}"),
}
}
#[test]
fn joint_banner_parses_top_secret_multi_word_level() {
let parsed = parse_banner("//JOINT TOP SECRET USA GBR");
match &parsed.attrs.classification {
Some(MarkingClassification::Joint(j)) => {
assert_eq!(j.level, Classification::TopSecret);
assert_eq!(j.countries.len(), 2);
assert_eq!(j.countries[0], CountryCode::USA);
assert_eq!(j.countries[1].as_str(), "GBR");
}
other => panic!("expected Joint(TopSecret), got: {other:?}"),
}
}
#[test]
fn joint_banner_rejects_bare_top_without_secret() {
let parsed = parse_banner("//JOINT TOP USA GBR");
assert!(
!matches!(
parsed.attrs.classification,
Some(MarkingClassification::Joint(_))
),
"bare TOP must not parse as a JOINT classification"
);
}
#[test]
fn joint_portion_with_rel_to() {
let parsed = parse_portion("(//JOINT TS USA AUS GBR//REL TO USA, AUS, GBR)");
match &parsed.attrs.classification {
Some(MarkingClassification::Joint(j)) => {
assert_eq!(j.level, Classification::TopSecret);
assert_eq!(j.countries.len(), 3);
}
other => panic!("expected Joint, got: {other:?}"),
}
assert_eq!(parsed.attrs.rel_to.len(), 3);
}
#[test]
fn fgi_single_country_parses() {
let parsed = parse_portion("(//GBR S//NF)");
match &parsed.attrs.classification {
Some(MarkingClassification::Fgi(f)) => {
assert_eq!(f.level, Classification::Secret);
assert_eq!(f.countries.len(), 1);
assert_eq!(f.countries[0].as_str(), "GBR");
}
other => panic!("expected Fgi, got: {other:?}"),
}
}
#[test]
fn fgi_multiple_countries_parses() {
let parsed = parse_banner("//GBR DEU TS//NF");
match &parsed.attrs.classification {
Some(MarkingClassification::Fgi(f)) => {
assert_eq!(f.level, Classification::TopSecret);
assert_eq!(f.countries.len(), 2);
}
other => panic!("expected Fgi, got: {other:?}"),
}
}
#[test]
fn fgi_placeholder_country_parses() {
let parsed = parse_portion("(//FGI S//NF)");
match &parsed.attrs.classification {
Some(MarkingClassification::Fgi(f)) => {
assert_eq!(f.level, Classification::Secret);
assert!(
f.countries.is_empty(),
"FGI placeholder should have no countries"
);
}
other => panic!("expected Fgi, got: {other:?}"),
}
}
#[test]
fn fgi_non_uppercase_trigraph_rejected() {
let parsed = parse_banner("//Gbr S//NF");
assert!(
!matches!(
parsed.attrs.classification,
Some(MarkingClassification::Fgi(_))
),
"Gbr should not parse as a valid FGI classification: {:?}",
parsed.attrs.classification,
);
}
#[test]
fn fgi_no_level_is_error() {
let parsed = parse_banner("//FGI//NF");
assert!(
parsed.attrs.classification.is_none()
|| matches!(
parsed.attrs.classification,
Some(MarkingClassification::Us(_))
),
"bare FGI with no level should not produce a valid non-US classification: {:?}",
parsed.attrs.classification,
);
}
#[test]
fn fgi_marker_in_us_marking() {
let parsed = parse_banner("SECRET//FGI DEU//NOFORN");
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Us(Classification::Secret)),
);
let marker = parsed
.attrs
.fgi_marker
.as_ref()
.expect("should have FGI marker");
assert_eq!(marker.countries.len(), 1);
assert_eq!(marker.countries[0].as_str(), "DEU");
}
#[test]
fn fgi_marker_no_countries() {
let parsed = parse_banner("SECRET//FGI//NOFORN");
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Us(Classification::Secret)),
);
let marker = parsed
.attrs
.fgi_marker
.as_ref()
.expect("should have FGI marker");
assert!(marker.countries.is_empty());
}
#[test]
fn conflict_us_and_nato() {
let parsed = parse_banner("SECRET//NATO SECRET//NOFORN");
match &parsed.attrs.classification {
Some(MarkingClassification::Conflict { us, foreign }) => {
assert_eq!(*us, Classification::Secret);
assert!(matches!(
foreign.as_ref(),
ForeignClassification::Nato(NatoClassification::NatoSecret)
));
}
other => panic!("expected Conflict, got: {other:?}"),
}
}
#[test]
fn conflict_level_escalation() {
let parsed = parse_banner("SECRET//COSMIC TOP SECRET//NOFORN");
match &parsed.attrs.classification {
Some(MarkingClassification::Conflict { us, foreign }) => {
assert_eq!(*us, Classification::TopSecret);
assert!(matches!(
foreign.as_ref(),
ForeignClassification::Nato(NatoClassification::CosmicTopSecret)
));
}
other => panic!("expected Conflict with escalation, got: {other:?}"),
}
}
#[test]
fn restricted_classification_parses() {
let parsed = parse_banner("RESTRICTED//NF");
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Us(Classification::Restricted)),
);
}
#[test]
fn restricted_portion_parses() {
let parsed = parse_portion("(R//NF)");
assert_eq!(
parsed.attrs.classification,
Some(MarkingClassification::Us(Classification::Restricted)),
);
}
#[test]
fn non_ic_dissem_limdis_banner_form() {
let parsed = parse_banner("UNCLASSIFIED//LIMDIS");
assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::Limdis,);
}
#[test]
fn non_ic_dissem_ds_portion_form() {
let parsed = parse_portion("(U//DS)");
assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::Limdis);
}
#[test]
fn non_ic_dissem_les_nf() {
let parsed = parse_portion("(U//LES-NF)");
assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::LesNf);
assert!(parsed.attrs.non_ic_dissem[0].carries_noforn());
}
#[test]
fn non_ic_dissem_sbu_nf_banner() {
let parsed = parse_banner("UNCLASSIFIED//SBU NOFORN");
assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::SbuNf);
}
#[test]
fn non_ic_dissem_not_confused_with_ic_dissem() {
let parsed = parse_portion("(U//SSI)");
assert!(parsed.attrs.dissem_controls.is_empty());
assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::Ssi);
}
#[test]
fn non_ic_dissem_alongside_ic_dissem() {
let parsed = parse_portion("(C//NF//DS)");
assert_eq!(parsed.attrs.dissem_controls.len(), 1); assert_eq!(parsed.attrs.non_ic_dissem.len(), 1); }
#[test]
fn aea_rd_parses() {
let parsed = parse_banner("TOP SECRET//RD//NOFORN");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
assert_eq!(
parsed.attrs.aea_markings[0],
AeaMarking::Rd(marque_ism::RdBlock::default()),
);
}
#[test]
fn aea_rd_cnwdi_compound() {
let parsed = parse_banner("SECRET//RD-CNWDI//NOFORN");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
match &parsed.attrs.aea_markings[0] {
AeaMarking::Rd(rd) => {
assert!(rd.cnwdi);
assert!(rd.sigma.is_empty());
}
other => panic!("expected Rd with CNWDI, got: {other:?}"),
}
}
#[test]
fn aea_rd_sigma_compound() {
let parsed = parse_banner("SECRET//RD-SIGMA 20//NOFORN");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
match &parsed.attrs.aea_markings[0] {
AeaMarking::Rd(rd) => {
assert!(!rd.cnwdi);
assert_eq!(&*rd.sigma, &[20]);
}
other => panic!("expected Rd with SIGMA, got: {other:?}"),
}
}
#[test]
fn aea_rd_cnwdi_sigma_compound() {
let parsed = parse_banner("SECRET//RD-CNWDI-SIGMA 18 20//NOFORN");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
match &parsed.attrs.aea_markings[0] {
AeaMarking::Rd(rd) => {
assert!(rd.cnwdi);
assert_eq!(&*rd.sigma, &[18, 20]);
}
other => panic!("expected Rd with CNWDI+SIGMA, got: {other:?}"),
}
}
#[test]
fn aea_rd_sigma_portion() {
let parsed = parse_portion("(TS//RD-SG 14//NF)");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
match &parsed.attrs.aea_markings[0] {
AeaMarking::Rd(rd) => {
assert_eq!(&*rd.sigma, &[14]);
}
other => panic!("expected Rd with SG, got: {other:?}"),
}
}
#[test]
fn aea_frd_parses() {
let parsed = parse_portion("(S//FRD//NF)");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
assert_eq!(
parsed.attrs.aea_markings[0],
AeaMarking::Frd(marque_ism::FrdBlock::default()),
);
}
#[test]
fn aea_frd_sigma_compound() {
let parsed = parse_banner("SECRET//FRD-SIGMA 14//NOFORN");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
match &parsed.attrs.aea_markings[0] {
AeaMarking::Frd(frd) => {
assert_eq!(&*frd.sigma, &[14]);
}
other => panic!("expected Frd with SIGMA, got: {other:?}"),
}
}
#[test]
fn aea_dod_ucni_parses() {
let parsed = parse_banner("UNCLASSIFIED//DOD UCNI");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
assert_eq!(parsed.attrs.aea_markings[0], AeaMarking::DodUcni);
}
#[test]
fn aea_dcni_portion_parses() {
let parsed = parse_portion("(U//DCNI)");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
assert_eq!(parsed.attrs.aea_markings[0], AeaMarking::DodUcni);
}
#[test]
fn aea_tfni_parses() {
let parsed = parse_banner("SECRET//TFNI//NOFORN");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
assert_eq!(parsed.attrs.aea_markings[0], AeaMarking::Tfni);
}
#[test]
fn aea_rd_n_shorthand() {
let parsed = parse_portion("(S//RD-N//NF)");
assert_eq!(parsed.attrs.aea_markings.len(), 1);
match &parsed.attrs.aea_markings[0] {
AeaMarking::Rd(rd) => assert!(rd.cnwdi),
other => panic!("expected Rd with CNWDI from RD-N, got: {other:?}"),
}
}
#[test]
fn slash_separated_sci_in_single_block_parses() {
use marque_ism::SciControl;
let parsed = parse_portion("(TS//SI/TK//NF)");
assert_eq!(
parsed.attrs.sci_controls.as_ref(),
&[SciControl::Si, SciControl::Tk],
"SI/TK block must yield two SCI controls"
);
assert!(
parsed
.attrs
.token_spans
.iter()
.all(|t| t.kind != TokenKind::Unknown),
"no Unknown spans expected: {:?}",
parsed.attrs.token_spans
);
}
#[test]
fn slash_separated_sci_banner_parses() {
use marque_ism::SciControl;
let parsed = parse_banner("TOP SECRET//SI/TK//NOFORN");
assert_eq!(
parsed.attrs.sci_controls.as_ref(),
&[SciControl::Si, SciControl::Tk],
);
}
#[test]
fn slash_separated_dissem_in_single_block_parses() {
use marque_ism::DissemControl;
let parsed = parse_banner("SECRET//SI//NF/RELIDO");
let dissem: Vec<DissemControl> = parsed.attrs.dissem_controls.to_vec();
assert!(dissem.contains(&DissemControl::Nf), "must contain NF");
assert!(
dissem.contains(&DissemControl::Relido),
"must contain RELIDO"
);
}
#[test]
fn unrecognized_slash_token_emits_unknown() {
let parsed = parse_portion("(S//XYZZY)");
assert!(
parsed
.attrs
.token_spans
.iter()
.any(|t| t.kind == TokenKind::Unknown),
"XYZZY must produce Unknown span"
);
}
#[test]
fn sci_bare_single_still_parses_via_structural_path() {
use marque_ism::{SciControl, SciControlBare, SciControlSystem};
let parsed = parse_portion("(U//SI//NF)");
assert_eq!(parsed.attrs.sci_controls.as_ref(), &[SciControl::Si]);
assert_eq!(parsed.attrs.sci_markings.len(), 1);
let m = &parsed.attrs.sci_markings[0];
assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
assert!(m.compartments.is_empty());
assert_eq!(m.canonical_enum, Some(SciControl::Si));
}
#[test]
fn sci_published_compound_si_g_parses() {
use marque_ism::{SciControl, SciControlBare, SciControlSystem};
let parsed = parse_banner("SECRET//SI-G//NOFORN");
let m = &parsed.attrs.sci_markings[0];
assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
assert_eq!(m.compartments.len(), 1);
assert_eq!(m.compartments[0].identifier.as_ref(), "G");
assert!(m.compartments[0].sub_compartments.is_empty());
assert_eq!(m.canonical_enum, Some(SciControl::SiG));
assert_eq!(parsed.attrs.sci_controls.as_ref(), &[SciControl::SiG]);
}
#[test]
fn sci_published_compound_hcs_p_parses() {
use marque_ism::{SciControl, SciControlBare, SciControlSystem};
let parsed = parse_banner("TOP SECRET//HCS-P//NOFORN");
let m = &parsed.attrs.sci_markings[0];
assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Hcs));
assert_eq!(m.compartments[0].identifier.as_ref(), "P");
assert_eq!(m.canonical_enum, Some(SciControl::HcsP));
}
#[test]
fn sci_bare_tk_parses() {
use marque_ism::{SciControl, SciControlBare, SciControlSystem};
let parsed = parse_banner("SECRET//TK//NOFORN");
let m = &parsed.attrs.sci_markings[0];
assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Tk));
assert!(m.compartments.is_empty());
assert_eq!(m.canonical_enum, Some(SciControl::Tk));
}
#[test]
fn sci_multi_system_si_tk_parses() {
use marque_ism::SciControl;
let parsed = parse_portion("(TS//SI/TK//NF)");
assert_eq!(
parsed.attrs.sci_controls.as_ref(),
&[SciControl::Si, SciControl::Tk]
);
assert_eq!(parsed.attrs.sci_markings.len(), 2);
}
#[test]
fn sci_compound_with_sub_compartment_sets_canonical_none() {
use marque_ism::{SciControlBare, SciControlSystem};
let parsed = parse_banner("SECRET//SI-G ABCD//NOFORN");
assert_eq!(parsed.attrs.sci_markings.len(), 1);
let m = &parsed.attrs.sci_markings[0];
assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
assert_eq!(m.compartments.len(), 1);
assert_eq!(m.compartments[0].identifier.as_ref(), "G");
assert_eq!(m.compartments[0].sub_compartments.len(), 1);
assert_eq!(m.compartments[0].sub_compartments[0].as_ref(), "ABCD");
assert_eq!(m.canonical_enum, None);
assert!(parsed.attrs.sci_controls.is_empty());
}
#[test]
fn sci_capco_canonical_example_parses() {
use marque_ism::{SciControlBare, SciControlSystem};
let parsed = parse_banner("TOP SECRET//123/SI-G ABCD DEFG-MMM AACD//ORCON/NOFORN");
assert_eq!(parsed.attrs.sci_markings.len(), 2);
let m0 = &parsed.attrs.sci_markings[0];
assert!(matches!(&m0.system, SciControlSystem::Custom(s) if s.as_ref() == "123"));
assert!(m0.compartments.is_empty());
assert_eq!(m0.canonical_enum, None);
let m1 = &parsed.attrs.sci_markings[1];
assert_eq!(m1.system, SciControlSystem::Published(SciControlBare::Si));
assert_eq!(m1.compartments.len(), 2);
assert_eq!(m1.compartments[0].identifier.as_ref(), "G");
assert_eq!(m1.compartments[0].sub_compartments.len(), 2);
assert_eq!(m1.compartments[0].sub_compartments[0].as_ref(), "ABCD");
assert_eq!(m1.compartments[0].sub_compartments[1].as_ref(), "DEFG");
assert_eq!(m1.compartments[1].identifier.as_ref(), "MMM");
assert_eq!(m1.compartments[1].sub_compartments.len(), 1);
assert_eq!(m1.compartments[1].sub_compartments[0].as_ref(), "AACD");
assert_eq!(m1.canonical_enum, None);
let sci_block_has_unknown = parsed
.attrs
.token_spans
.iter()
.any(|t| t.kind == TokenKind::Unknown);
assert!(
!sci_block_has_unknown,
"canonical example must not produce Unknown tokens; got: {:?}",
parsed.attrs.token_spans
);
}
#[test]
fn sci_custom_numeric_99_direct_parse() {
use marque_ism::SciControlSystem;
let mut tokens = Vec::new();
let result = parse_sci_block("99", 0, &mut tokens).expect("99 must parse");
assert_eq!(result.len(), 1);
assert!(matches!(&result[0].system, SciControlSystem::Custom(s) if s.as_ref() == "99"));
assert!(result[0].compartments.is_empty());
assert_eq!(result[0].canonical_enum, None);
}
#[test]
fn sci_structural_rejections_return_none() {
let mut tokens = Vec::new();
assert!(parse_sci_block("SI-", 0, &mut tokens).is_none());
let mut tokens = Vec::new();
assert!(parse_sci_block("-SI", 0, &mut tokens).is_none());
let mut tokens = Vec::new();
assert!(parse_sci_block("", 0, &mut tokens).is_none());
let mut tokens = Vec::new();
assert!(parse_sci_block("si-g", 0, &mut tokens).is_none());
let mut tokens = Vec::new();
assert!(parse_sci_block("SI--G", 0, &mut tokens).is_none());
let mut tokens = Vec::new();
assert!(parse_sci_block("SI/", 0, &mut tokens).is_none());
}
#[test]
fn sci_mixed_category_slash_block_falls_through() {
let parsed = parse_banner("SECRET//SI/NF");
let has_unknown_block = parsed
.attrs
.token_spans
.iter()
.any(|t| t.kind == TokenKind::Unknown);
assert!(
has_unknown_block,
"SI/NF must surface as Unknown for E004; got: {:?}",
parsed.attrs.token_spans
);
}
#[test]
fn sci_weird_sub_compartment_parses() {
use marque_ism::{SciControlBare, SciControlSystem};
let parsed = parse_banner("SECRET//SI-G WEIRD FOO//NOFORN");
let m = &parsed.attrs.sci_markings[0];
assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
assert_eq!(m.compartments.len(), 1);
assert_eq!(m.compartments[0].identifier.as_ref(), "G");
assert_eq!(m.compartments[0].sub_compartments.len(), 2);
assert_eq!(m.compartments[0].sub_compartments[0].as_ref(), "WEIRD");
assert_eq!(m.compartments[0].sub_compartments[1].as_ref(), "FOO");
}
fn parse_cab_text(text: &str) -> ParsedMarking {
let source = text.as_bytes();
let tokens = CapcoTokenSet;
let parser = Parser::new(&tokens);
let candidate = make_candidate(source, MarkingType::Cab, 0);
parser
.parse(&candidate, source)
.expect("CAB parse should succeed")
}
#[test]
fn cab_declassify_on_yyyymmdd_populates_declassify_on() {
let text = "Classified By: Jane Doe\nDeclassify On: 20301231";
let parsed = parse_cab_text(text);
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Date(2030, 12, 31)),
"YYYYMMDD in CAB should set declassify_on to Date"
);
assert!(parsed.attrs.declass_exemption.is_none());
}
#[test]
fn cab_declassify_on_yyyy_populates_declassify_on() {
let text = "Declassify On: 2035";
let parsed = parse_cab_text(text);
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Year(2035)),
"YYYY in CAB should set declassify_on to Year"
);
}
#[test]
fn cab_declassify_on_iso_date_populates_declassify_on() {
let text = "Declassify On: 2030-12-31";
let parsed = parse_cab_text(text);
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Date(2030, 12, 31)),
"YYYY-MM-DD in CAB should set declassify_on to Date"
);
}
#[test]
fn cab_declassify_on_exemption_sets_exemption_not_date() {
let text = "Declassify On: 50X1-HUM";
let parsed = parse_cab_text(text);
assert!(
parsed.attrs.declassify_on.is_none(),
"exemption code must not set declassify_on"
);
assert!(
parsed.attrs.declass_exemption.is_some(),
"exemption code must set declass_exemption"
);
}
#[test]
fn cab_declassify_on_invalid_date_silently_ignored() {
let text = "Declassify On: UNRECOGNIZED";
let parsed = parse_cab_text(text);
assert!(
parsed.attrs.declassify_on.is_none(),
"unrecognized Declassify On value should leave declassify_on as None"
);
assert!(parsed.attrs.declass_exemption.is_none());
}
#[test]
fn cab_classified_by_and_derived_from_populated() {
let text = "Classified By: Jane Doe\nDerived From: SCG-2024\nDeclassify On: 20301231";
let parsed = parse_cab_text(text);
assert_eq!(
parsed.attrs.classified_by.as_deref(),
Some("Jane Doe"),
"classified_by should be populated"
);
assert_eq!(
parsed.attrs.derived_from.as_deref(),
Some("SCG-2024"),
"derived_from should be populated"
);
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Date(2030, 12, 31))
);
}
#[test]
fn cab_without_declassify_on_leaves_both_none() {
let text = "Classified By: Jane Doe\nDerived From: SCG-2024";
let parsed = parse_cab_text(text);
assert!(parsed.attrs.declassify_on.is_none());
assert!(parsed.attrs.declass_exemption.is_none());
}
#[test]
fn portion_with_yyyymmdd_sets_declassify_on() {
let parsed = parse_portion("(SECRET//20301231//NOFORN)");
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Date(2030, 12, 31)),
"YYYYMMDD in portion should set declassify_on"
);
}
#[test]
fn portion_with_yyyy_sets_declassify_on() {
let parsed = parse_portion("(SECRET//2035)");
assert_eq!(
parsed.attrs.declassify_on,
Some(marque_ism::IsmDate::Year(2035)),
"YYYY in portion should set declassify_on"
);
}
#[test]
fn is_declass_date_rejects_leap_day_non_leap_year() {
assert!(!is_declass_date("20030229"));
}
#[test]
fn is_declass_date_accepts_leap_day_in_leap_year() {
assert!(is_declass_date("20040229")); assert!(is_declass_date("20000229")); }
#[test]
fn is_declass_date_rejects_day_zero() {
assert!(!is_declass_date("20030100")); }
}
#[cfg(test)]
#[cfg_attr(coverage_nightly, coverage(off))]
mod sar_parse_tests {
use super::*;
use marque_ism::span::{MarkingCandidate, MarkingType, Span};
use marque_ism::token_set::CapcoTokenSet;
#[test]
fn single_program_no_compartments() {
let (marking, spans) = parse_sar_category("SAR-BP", 0).expect("grammar accepts SAR-BP");
assert_eq!(marking.indicator, SarIndicator::Abbrev);
assert_eq!(marking.programs.len(), 1);
assert_eq!(&*marking.programs[0].identifier, "BP");
assert_eq!(marking.programs[0].compartments.len(), 0);
assert_eq!(
spans
.iter()
.filter(|s| s.kind == TokenKind::SarIndicator)
.count(),
1
);
assert_eq!(
spans
.iter()
.filter(|s| s.kind == TokenKind::SarProgram)
.count(),
1
);
}
#[test]
fn three_programs_no_compartments() {
let (marking, _) =
parse_sar_category("SAR-BP/CD/XR", 0).expect("grammar accepts three programs");
assert_eq!(marking.programs.len(), 3);
let ids: Vec<&str> = marking.programs.iter().map(|p| &*p.identifier).collect();
assert_eq!(ids, vec!["BP", "CD", "XR"]);
for p in marking.programs.iter() {
assert_eq!(p.compartments.len(), 0);
}
}
#[test]
fn program_with_single_compartment() {
let (marking, _) = parse_sar_category("SAR-BP-J12", 0).expect("grammar accepts");
assert_eq!(marking.programs.len(), 1);
let p = &marking.programs[0];
assert_eq!(&*p.identifier, "BP");
assert_eq!(p.compartments.len(), 1);
assert_eq!(&*p.compartments[0].identifier, "J12");
assert_eq!(p.compartments[0].sub_compartments.len(), 0);
}
#[test]
fn program_with_compartment_and_sub_compartment() {
let (marking, _) = parse_sar_category("SAR-BP-J12 J54", 0).expect("grammar accepts");
let p = &marking.programs[0];
assert_eq!(p.compartments.len(), 1);
let c = &p.compartments[0];
assert_eq!(&*c.identifier, "J12");
assert_eq!(c.sub_compartments.len(), 1);
assert_eq!(&*c.sub_compartments[0], "J54");
}
#[test]
fn canonical_h5_p100_multi_program_example() {
let block = "SAR-BP-J12 J54-K15/CD-YYY 456 689/XR-XRA RB";
let (marking, spans) = parse_sar_category(block, 0).expect("grammar accepts");
assert_eq!(marking.indicator, SarIndicator::Abbrev);
assert_eq!(marking.programs.len(), 3);
let bp = &marking.programs[0];
assert_eq!(&*bp.identifier, "BP");
assert_eq!(bp.compartments.len(), 2);
assert_eq!(&*bp.compartments[0].identifier, "J12");
assert_eq!(
bp.compartments[0]
.sub_compartments
.iter()
.map(|s| &**s)
.collect::<Vec<_>>(),
vec!["J54"]
);
assert_eq!(&*bp.compartments[1].identifier, "K15");
assert_eq!(bp.compartments[1].sub_compartments.len(), 0);
let cd = &marking.programs[1];
assert_eq!(&*cd.identifier, "CD");
assert_eq!(cd.compartments.len(), 1);
assert_eq!(&*cd.compartments[0].identifier, "YYY");
assert_eq!(
cd.compartments[0]
.sub_compartments
.iter()
.map(|s| &**s)
.collect::<Vec<_>>(),
vec!["456", "689"]
);
let xr = &marking.programs[2];
assert_eq!(&*xr.identifier, "XR");
assert_eq!(xr.compartments.len(), 1);
assert_eq!(&*xr.compartments[0].identifier, "XRA");
assert_eq!(
xr.compartments[0]
.sub_compartments
.iter()
.map(|s| &**s)
.collect::<Vec<_>>(),
vec!["RB"]
);
let indicator = spans
.iter()
.find(|s| s.kind == TokenKind::SarIndicator)
.unwrap();
assert_eq!(indicator.span, Span::new(0, 4));
assert_eq!(&*indicator.text, "SAR-");
let first_prog = spans
.iter()
.find(|s| s.kind == TokenKind::SarProgram)
.unwrap();
assert_eq!(first_prog.span, Span::new(4, 6));
assert_eq!(&*first_prog.text, "BP");
}
#[test]
fn full_form_single_program_with_space() {
let (marking, spans) =
parse_sar_category("SPECIAL ACCESS REQUIRED-BUTTER POPCORN", 0).unwrap();
assert_eq!(marking.indicator, SarIndicator::Full);
assert_eq!(marking.programs.len(), 1);
assert_eq!(&*marking.programs[0].identifier, "BUTTER POPCORN");
assert_eq!(marking.programs[0].compartments.len(), 0);
let indicator = spans
.iter()
.find(|s| s.kind == TokenKind::SarIndicator)
.unwrap();
assert_eq!(&*indicator.text, "SPECIAL ACCESS REQUIRED-");
assert_eq!(indicator.span, Span::new(0, 24));
}
#[test]
fn full_form_with_compartment_and_sub() {
let (marking, _spans) =
parse_sar_category("SPECIAL ACCESS REQUIRED-BUTTER POPCORN-J12 J54", 0)
.expect("grammar accepts full form with compartment");
assert_eq!(marking.indicator, SarIndicator::Full);
assert_eq!(marking.programs.len(), 1);
let prog = &marking.programs[0];
assert_eq!(&*prog.identifier, "BUTTER POPCORN");
assert_eq!(prog.compartments.len(), 1);
assert_eq!(&*prog.compartments[0].identifier, "J12");
assert_eq!(prog.compartments[0].sub_compartments.len(), 1);
assert_eq!(&*prog.compartments[0].sub_compartments[0], "J54");
}
#[test]
fn full_form_rejects_digits_or_hyphens_in_nickname() {
assert!(parse_sar_category("SPECIAL ACCESS REQUIRED-123", 0).is_none());
}
#[test]
fn rejects_double_slash_inside_block() {
assert!(parse_sar_category("SAR-BP//CD", 0).is_none());
}
#[test]
fn rejects_missing_hyphen() {
assert!(parse_sar_category("SAR", 0).is_none());
}
#[test]
fn rejects_empty_program() {
assert!(parse_sar_category("SAR-", 0).is_none());
}
#[test]
fn rejects_empty_string() {
assert!(parse_sar_category("", 0).is_none());
}
#[test]
fn rejects_non_sar_prefix() {
assert!(parse_sar_category("NOFORN", 0).is_none());
assert!(parse_sar_category("SI", 0).is_none());
}
#[test]
fn rejects_program_id_out_of_2_3_length() {
assert!(parse_sar_category("SAR-B", 0).is_none());
assert!(parse_sar_category("SAR-BPCD", 0).is_none());
}
fn make_banner(text: &str) -> ParsedMarking {
let source = text.as_bytes();
let tokens = CapcoTokenSet;
let parser = Parser::new(&tokens);
let candidate = MarkingCandidate {
span: Span::new(0, source.len()),
kind: MarkingType::Banner,
};
parser.parse(&candidate, source).expect("parse succeeds")
}
#[test]
fn banner_dispatch_populates_sar_markings() {
let parsed = make_banner("TOP SECRET//SAR-BP//NOFORN");
let sar = parsed
.attrs
.sar_markings
.as_ref()
.expect("SAR block must populate sar_markings");
assert_eq!(sar.programs.len(), 1);
assert_eq!(&*sar.programs[0].identifier, "BP");
let kinds: Vec<TokenKind> = parsed.attrs.token_spans.iter().map(|t| t.kind).collect();
assert!(kinds.contains(&TokenKind::SarIndicator));
assert!(kinds.contains(&TokenKind::SarProgram));
assert!(
parsed
.attrs
.dissem_controls
.contains(&marque_ism::DissemControl::Nf),
"NOFORN must still be recognized after the SAR block"
);
}
#[test]
fn banner_dispatch_multi_program_canonical() {
let parsed = make_banner("SECRET//SAR-BP-J12 J54-K15/CD-YYY 456 689/XR-XRA RB//NOFORN");
let sar = parsed.attrs.sar_markings.as_ref().expect("sar present");
assert_eq!(sar.programs.len(), 3);
let ids: Vec<&str> = sar.programs.iter().map(|p| &*p.identifier).collect();
assert_eq!(ids, vec!["BP", "CD", "XR"]);
let src = parsed
.attrs
.token_spans
.iter()
.find(|t| t.kind == TokenKind::SarIndicator)
.expect("SarIndicator span present");
assert_eq!(&*src.text, "SAR-");
assert_eq!(src.span, Span::new(8, 12));
}
#[test]
fn second_sar_block_becomes_unknown() {
let parsed = make_banner("SECRET//SAR-BP//SAR-CD//NOFORN");
let sar = parsed
.attrs
.sar_markings
.as_ref()
.expect("first SAR block populates sar_markings");
assert_eq!(sar.programs.len(), 1);
assert_eq!(&*sar.programs[0].identifier, "BP");
let unknown_texts: Vec<&str> = parsed
.attrs
.token_spans
.iter()
.filter(|t| t.kind == TokenKind::Unknown)
.map(|t| &*t.text)
.collect();
assert!(
unknown_texts.contains(&"SAR-CD"),
"duplicate SAR block must be recorded as Unknown, got: {unknown_texts:?}",
);
}
}