1use crate::error::CoreError;
23use marque_ism::attrs::{
24 AeaMarking, Classification, CountryCode, DeclassExemption, DissemControl, FgiClassification,
25 FgiMarker, ForeignClassification, IsmAttributes, JointClassification, MarkingClassification,
26 NatoClassification, NonIcDissem, SarCompartment, SarIndicator, SarMarking, SarProgram,
27 SciCompartment, SciControl, SciControlBare, SciControlSystem, SciMarking, TokenKind, TokenSpan,
28};
29use marque_ism::date::IsmDate;
30use marque_ism::is_bare_cve_value;
31use marque_ism::span::{MarkingCandidate, MarkingType, Span};
32use marque_ism::token_set::TokenSet;
33use std::str::FromStr;
34
35#[derive(Debug)]
37pub struct ParsedMarking {
38 pub attrs: IsmAttributes,
39 pub source_span: Span,
40 pub kind: MarkingType,
41}
42
43pub struct Parser<'t> {
45 tokens: &'t dyn TokenSet,
46}
47
48impl<'t> Parser<'t> {
49 pub fn new(tokens: &'t dyn TokenSet) -> Self {
50 Self { tokens }
51 }
52
53 pub fn parse(
55 &self,
56 candidate: &MarkingCandidate,
57 source: &[u8],
58 ) -> Result<ParsedMarking, CoreError> {
59 let text = candidate
60 .span
61 .as_str(source)
62 .map_err(|_| CoreError::InvalidUtf8(candidate.span))?;
63 match candidate.kind {
64 MarkingType::Portion => self.parse_portion(text, candidate),
65 MarkingType::Banner => self.parse_banner(text, candidate),
66 MarkingType::Cab => self.parse_cab(text, candidate),
67 MarkingType::PageBreak => Err(CoreError::MalformedMarking(
72 "page-break candidate must not be parsed".to_owned(),
73 )),
74 }
75 }
76
77 fn parse_portion(
78 &self,
79 text: &str,
80 candidate: &MarkingCandidate,
81 ) -> Result<ParsedMarking, CoreError> {
82 let inner = text
86 .strip_prefix('(')
87 .and_then(|s| s.strip_suffix(')'))
88 .ok_or_else(|| CoreError::MalformedMarking(text.to_owned()))?;
89
90 let attrs =
91 self.parse_marking_string(inner, MarkingType::Portion, candidate.span.start + 1)?;
92 Ok(ParsedMarking {
93 attrs,
94 source_span: candidate.span,
95 kind: MarkingType::Portion,
96 })
97 }
98
99 fn parse_banner(
100 &self,
101 text: &str,
102 candidate: &MarkingCandidate,
103 ) -> Result<ParsedMarking, CoreError> {
104 let trimmed = text.trim_start();
109 let lead_ws = text.len() - trimmed.len();
110 let trimmed = trimmed.trim_end();
111 let attrs = self.parse_marking_string(
112 trimmed,
113 MarkingType::Banner,
114 candidate.span.start + lead_ws,
115 )?;
116 Ok(ParsedMarking {
117 attrs,
118 source_span: candidate.span,
119 kind: MarkingType::Banner,
120 })
121 }
122
123 fn parse_cab(
124 &self,
125 text: &str,
126 candidate: &MarkingCandidate,
127 ) -> Result<ParsedMarking, CoreError> {
128 let mut attrs = IsmAttributes::default();
130
131 for line in text.lines() {
132 if let Some(val) = line.strip_prefix("Classified By:") {
133 attrs.classified_by = Some(val.trim().into());
134 } else if let Some(val) = line.strip_prefix("Derived From:") {
135 attrs.derived_from = Some(val.trim().into());
136 } else if let Some(val) = line.strip_prefix("Declassify On:") {
137 let s = val.trim();
138 if let Some(exemption) = DeclassExemption::parse(s) {
139 attrs.declass_exemption = Some(exemption);
140 } else {
141 attrs.declassify_on = IsmDate::from_str(s).ok();
146 }
147 }
148 }
149
150 Ok(ParsedMarking {
151 attrs,
152 source_span: candidate.span,
153 kind: MarkingType::Cab,
154 })
155 }
156
157 fn parse_marking_string(
165 &self,
166 s: &str,
167 context: MarkingType,
168 s_offset: usize,
169 ) -> Result<IsmAttributes, CoreError> {
170 let mut attrs = IsmAttributes::default();
171
172 if s.is_empty() {
173 return Err(CoreError::MalformedMarking(s.to_owned()));
174 }
175
176 let separators: Vec<usize> = s.match_indices("//").map(|(i, _)| i).collect();
181 let mut block_ranges: Vec<(usize, usize)> = Vec::with_capacity(separators.len() + 1);
182 let mut prev_end = 0usize;
183 for &sep_start in &separators {
184 block_ranges.push((prev_end, sep_start));
185 prev_end = sep_start + 2; }
187 block_ranges.push((prev_end, s.len()));
188
189 let mut token_spans: Vec<TokenSpan> = Vec::new();
190
191 let mut sci: Vec<SciControl> = Vec::new();
192 let mut sci_markings: Vec<SciMarking> = Vec::new();
193 let mut sar_captured = false;
198 let mut aea: Vec<AeaMarking> = Vec::new();
199 let mut dissem: Vec<DissemControl> = Vec::new();
200 let mut non_ic: Vec<NonIcDissem> = Vec::new();
201 let mut rel_to: Vec<CountryCode> = Vec::new();
202
203 let is_non_us = s.starts_with("//");
207
208 for (idx, &(rel_start, rel_end)) in block_ranges.iter().enumerate() {
209 let raw = &s[rel_start..rel_end];
210 let trimmed = raw.trim();
211 if trimmed.is_empty() {
212 continue;
213 }
214 let trim_lead = raw.len() - raw.trim_start().len();
215 let abs_start = s_offset + rel_start + trim_lead;
216 let abs_end = abs_start + trimmed.len();
217 let span = Span::new(abs_start, abs_end);
218
219 if idx == 0 && !is_non_us {
223 attrs.classification = parse_classification(trimmed).map(MarkingClassification::Us);
224 token_spans.push(TokenSpan {
225 kind: TokenKind::Classification,
226 span,
227 text: trimmed.into(),
228 });
229 continue;
230 }
231
232 if idx == 1 && is_non_us {
236 if let Some(nato) = parse_nato_classification(trimmed) {
237 attrs.classification = Some(MarkingClassification::Nato(nato));
238 } else if let Some(joint) = parse_joint_classification(trimmed) {
239 attrs.classification = Some(MarkingClassification::Joint(joint));
240 } else if let Some(fgi) = parse_fgi_classification(trimmed) {
241 attrs.classification = Some(MarkingClassification::Fgi(fgi));
242 } else {
243 token_spans.push(TokenSpan {
245 kind: TokenKind::Unknown,
246 span,
247 text: trimmed.into(),
248 });
249 continue;
250 }
251 token_spans.push(TokenSpan {
252 kind: TokenKind::Classification,
253 span,
254 text: trimmed.into(),
255 });
256 continue;
257 }
258
259 if trimmed.starts_with("SAR-") || trimmed.starts_with("SPECIAL ACCESS REQUIRED-") {
267 if sar_captured {
268 token_spans.push(TokenSpan {
272 kind: TokenKind::Unknown,
273 span,
274 text: trimmed.into(),
275 });
276 continue;
277 }
278 if let Some((marking, sar_spans)) = parse_sar_category(trimmed, abs_start) {
279 attrs.sar_markings = Some(marking);
280 token_spans.extend(sar_spans);
281 sar_captured = true;
282 continue;
283 }
284 token_spans.push(TokenSpan {
287 kind: TokenKind::Unknown,
288 span,
289 text: trimmed.into(),
290 });
291 continue;
292 }
293
294 if trimmed.starts_with("REL TO") || trimmed.starts_with("REL ") {
295 token_spans.push(TokenSpan {
298 kind: TokenKind::RelToBlock,
299 span,
300 text: trimmed.into(),
301 });
302 let parsed =
303 parse_rel_to_with_spans(trimmed, abs_start, self.tokens, &mut token_spans);
304 rel_to.extend(parsed.countries);
305 dissem.extend(parsed.trailing_dissem);
306 non_ic.extend(parsed.trailing_non_ic);
307 } else if (trimmed.contains('-')
308 || trimmed.contains('/')
309 || is_bare_cve_value(trimmed)
310 || (is_valid_custom_control(trimmed)
317 && trimmed.bytes().any(|b| b.is_ascii_digit())
318 && !is_known_non_sci_token(trimmed)
319 && !is_declass_date(trimmed)))
320 && let Some(markings) = parse_sci_block(trimmed, abs_start, &mut token_spans)
321 {
322 for marking in &markings {
329 if let Some(ctrl) = marking.canonical_enum {
330 sci.push(ctrl);
331 }
332 }
333 sci_markings.extend(markings);
334 } else if let Some(ctrl) = SciControl::parse(trimmed) {
335 sci.push(ctrl);
336 token_spans.push(TokenSpan {
337 kind: TokenKind::SciControl,
338 span,
339 text: trimmed.into(),
340 });
341 } else if trimmed.starts_with("FGI")
342 && matches!(attrs.classification, Some(MarkingClassification::Us(_)))
343 {
344 if let Some(marker) = parse_fgi_marker(trimmed) {
346 attrs.fgi_marker = Some(marker);
347 token_spans.push(TokenSpan {
348 kind: TokenKind::FgiMarker,
349 span,
350 text: trimmed.into(),
351 });
352 }
353 } else if let Some(ctrl) =
354 DissemControl::parse(trimmed).or_else(|| parse_dissem_full_form(trimmed))
355 {
356 dissem.push(ctrl);
357 token_spans.push(TokenSpan {
358 kind: TokenKind::DissemControl,
359 span,
360 text: trimmed.into(),
361 });
362 } else if let Some(nic) = parse_non_ic_full_form(trimmed) {
363 non_ic.push(nic);
364 token_spans.push(TokenSpan {
365 kind: TokenKind::NonIcDissem,
366 span,
367 text: trimmed.into(),
368 });
369 } else if let Some(aea_marking) = AeaMarking::parse(trimmed) {
370 aea.push(aea_marking);
371 token_spans.push(TokenSpan {
372 kind: TokenKind::AeaMarking,
373 span,
374 text: trimmed.into(),
375 });
376 } else if let Some(exemption) = DeclassExemption::parse(trimmed) {
377 attrs.declass_exemption = Some(exemption);
378 token_spans.push(TokenSpan {
379 kind: TokenKind::DeclassExemption,
380 span,
381 text: trimmed.into(),
382 });
383 } else if is_declass_date(trimmed) {
384 attrs.declassify_on = IsmDate::from_str(trimmed).ok();
385 token_spans.push(TokenSpan {
386 kind: TokenKind::DeclassDate,
387 span,
388 text: trimmed.into(),
389 });
390 } else if let Some(foreign) = try_parse_foreign_classification(trimmed) {
391 if let Some(MarkingClassification::Us(us_level)) = attrs.classification {
394 let foreign_equiv = match &foreign {
395 ForeignClassification::Nato(n) => n.us_equivalent(),
396 ForeignClassification::Fgi(f) => f.level,
397 ForeignClassification::Joint(j) => j.level,
398 };
399 let max_level = us_level.max(foreign_equiv);
400 attrs.classification = Some(MarkingClassification::Conflict {
401 us: max_level,
402 foreign: Box::new(foreign),
403 });
404 token_spans.push(TokenSpan {
405 kind: TokenKind::Classification,
406 span,
407 text: trimmed.into(),
408 });
409 } else {
410 token_spans.push(TokenSpan {
412 kind: TokenKind::Unknown,
413 span,
414 text: trimmed.into(),
415 });
416 }
417 } else if trimmed.contains('/') && !trimmed.starts_with("REL") {
418 #[derive(Clone, Copy, PartialEq, Eq)]
427 enum SubKind {
428 Sci,
429 Dissem,
430 NonIc,
431 Aea,
432 Unknown,
433 }
434
435 struct SubResult<'a> {
436 kind: SubKind,
437 tok: &'a str,
438 span: Span,
439 sci: Option<SciControl>,
441 dissem: Option<DissemControl>,
442 nic: Option<NonIcDissem>,
443 aea: Option<AeaMarking>,
444 }
445
446 let mut results: Vec<SubResult<'_>> = Vec::new();
447 for (sub_off, sub_tok) in split_slash_with_offsets(trimmed) {
448 let sub_abs_start = abs_start + sub_off;
449 let sub_span = Span::new(sub_abs_start, sub_abs_start + sub_tok.len());
450 if let Some(ctrl) = SciControl::parse(sub_tok) {
451 results.push(SubResult {
452 kind: SubKind::Sci,
453 tok: sub_tok,
454 span: sub_span,
455 sci: Some(ctrl),
456 dissem: None,
457 nic: None,
458 aea: None,
459 });
460 } else if let Some(ctrl) =
461 DissemControl::parse(sub_tok).or_else(|| parse_dissem_full_form(sub_tok))
462 {
463 results.push(SubResult {
464 kind: SubKind::Dissem,
465 tok: sub_tok,
466 span: sub_span,
467 sci: None,
468 dissem: Some(ctrl),
469 nic: None,
470 aea: None,
471 });
472 } else if let Some(nic) = parse_non_ic_full_form(sub_tok) {
473 results.push(SubResult {
474 kind: SubKind::NonIc,
475 tok: sub_tok,
476 span: sub_span,
477 sci: None,
478 dissem: None,
479 nic: Some(nic),
480 aea: None,
481 });
482 } else if let Some(aea_marking) = AeaMarking::parse(sub_tok) {
483 results.push(SubResult {
484 kind: SubKind::Aea,
485 tok: sub_tok,
486 span: sub_span,
487 sci: None,
488 dissem: None,
489 nic: None,
490 aea: Some(aea_marking),
491 });
492 } else {
493 results.push(SubResult {
494 kind: SubKind::Unknown,
495 tok: sub_tok,
496 span: sub_span,
497 sci: None,
498 dissem: None,
499 nic: None,
500 aea: None,
501 });
502 }
503 }
504
505 let first_parsed_kind = results
510 .iter()
511 .find(|r| r.kind != SubKind::Unknown)
512 .map(|r| r.kind);
513 let all_same_category = first_parsed_kind.is_some_and(|first| {
514 results
515 .iter()
516 .filter(|r| r.kind != SubKind::Unknown)
517 .all(|r| r.kind == first)
518 });
519
520 if first_parsed_kind.is_some() && !all_same_category {
521 token_spans.push(TokenSpan {
524 kind: TokenKind::Unknown,
525 span,
526 text: trimmed.into(),
527 });
528 } else {
529 for r in results {
531 match r.kind {
532 SubKind::Sci => {
533 sci.push(r.sci.unwrap());
534 token_spans.push(TokenSpan {
535 kind: TokenKind::SciControl,
536 span: r.span,
537 text: r.tok.into(),
538 });
539 }
540 SubKind::Dissem => {
541 dissem.push(r.dissem.unwrap());
542 token_spans.push(TokenSpan {
543 kind: TokenKind::DissemControl,
544 span: r.span,
545 text: r.tok.into(),
546 });
547 }
548 SubKind::NonIc => {
549 non_ic.push(r.nic.unwrap());
550 token_spans.push(TokenSpan {
551 kind: TokenKind::NonIcDissem,
552 span: r.span,
553 text: r.tok.into(),
554 });
555 }
556 SubKind::Aea => {
557 aea.push(r.aea.unwrap());
558 token_spans.push(TokenSpan {
559 kind: TokenKind::AeaMarking,
560 span: r.span,
561 text: r.tok.into(),
562 });
563 }
564 SubKind::Unknown => {
565 token_spans.push(TokenSpan {
568 kind: TokenKind::Unknown,
569 span: r.span,
570 text: r.tok.into(),
571 });
572 }
573 }
574 }
575 }
576 } else {
577 token_spans.push(TokenSpan {
578 kind: TokenKind::Unknown,
579 span,
580 text: trimmed.into(),
581 });
582 }
583 }
584
585 attrs.sci_controls = sci.into_boxed_slice();
586 attrs.sci_markings = sci_markings.into_boxed_slice();
587 attrs.aea_markings = aea.into_boxed_slice();
592 attrs.dissem_controls = dissem.into_boxed_slice();
593 attrs.non_ic_dissem = non_ic.into_boxed_slice();
594 attrs.rel_to = rel_to.into_boxed_slice();
595 for &sep_start in &separators {
599 token_spans.push(TokenSpan {
600 kind: TokenKind::Separator,
601 span: Span::new(s_offset + sep_start, s_offset + sep_start + 2),
602 text: "//".into(),
603 });
604 }
605 token_spans.sort_unstable_by_key(|ts| ts.span.start);
606 attrs.token_spans = token_spans.into_boxed_slice();
607
608 let _ = context; Ok(attrs)
611 }
612}
613
614fn parse_classification(s: &str) -> Option<Classification> {
625 match s {
626 "TS" | "TOP SECRET" => Some(Classification::TopSecret),
627 "S" | "SECRET" => Some(Classification::Secret),
628 "C" | "CONFIDENTIAL" => Some(Classification::Confidential),
629 "R" | "RESTRICTED" => Some(Classification::Restricted),
630 "U" | "UNCLASSIFIED" => Some(Classification::Unclassified),
631 _ => None,
632 }
633}
634
635fn parse_sci_block(
662 text: &str,
663 base: usize,
664 tokens: &mut Vec<TokenSpan>,
665) -> Option<Vec<SciMarking>> {
666 if text.is_empty() {
667 return None;
668 }
669
670 let mut local_tokens: Vec<TokenSpan> = Vec::new();
673 let mut markings: Vec<SciMarking> = Vec::new();
674
675 let mut chunk_start = 0usize;
678 let chunks: Vec<(usize, &str)> = {
679 let mut v = Vec::new();
680 for (i, ch) in text.char_indices() {
681 if ch == '/' {
682 v.push((chunk_start, &text[chunk_start..i]));
683 chunk_start = i + 1;
684 }
685 }
686 v.push((chunk_start, &text[chunk_start..]));
687 v
688 };
689
690 for (chunk_off, chunk) in chunks {
691 if chunk.is_empty() {
694 return None;
695 }
696 if chunk.starts_with('-') {
698 return None;
699 }
700
701 let (ctrl_str, rest_opt) = match chunk.find('-') {
704 Some(i) => (&chunk[..i], Some(&chunk[i + 1..])),
705 None => (chunk, None),
706 };
707
708 if ctrl_str.is_empty() {
709 return None;
710 }
711
712 let system: SciControlSystem = if let Some(bare) = SciControlBare::parse(ctrl_str) {
718 SciControlSystem::Published(bare)
719 } else if is_valid_custom_control(ctrl_str) && !is_known_non_sci_token(ctrl_str) {
720 SciControlSystem::Custom(ctrl_str.into())
721 } else {
722 return None;
723 };
724
725 let chunk_abs = base + chunk_off;
733 local_tokens.push(TokenSpan {
734 kind: TokenKind::SciControl,
735 span: Span::new(chunk_abs, chunk_abs + chunk.len()),
736 text: chunk.into(),
737 });
738 let ctrl_abs = base + chunk_off;
740 local_tokens.push(TokenSpan {
741 kind: TokenKind::SciSystem,
742 span: Span::new(ctrl_abs, ctrl_abs + ctrl_str.len()),
743 text: ctrl_str.into(),
744 });
745
746 let mut compartments: Vec<SciCompartment> = Vec::new();
750 if let Some(rest) = rest_opt {
751 let rest_abs_base = base + chunk_off + ctrl_str.len() + 1; let mut seg_start = 0usize;
755 let mut seg_offs: Vec<(usize, &str)> = Vec::new();
756 for (i, ch) in rest.char_indices() {
757 if ch == '-' {
758 seg_offs.push((seg_start, &rest[seg_start..i]));
759 seg_start = i + 1;
760 }
761 }
762 seg_offs.push((seg_start, &rest[seg_start..]));
763
764 for (seg_off, seg) in seg_offs {
765 if seg.is_empty() {
766 return None; }
768 let mut parts = seg.split(' ');
771 let comp_id = parts.next().unwrap(); if comp_id.is_empty() || !is_alnum_upper(comp_id) {
773 return None;
774 }
775
776 let comp_abs = rest_abs_base + seg_off;
777 local_tokens.push(TokenSpan {
778 kind: TokenKind::SciCompartment,
779 span: Span::new(comp_abs, comp_abs + comp_id.len()),
780 text: comp_id.into(),
781 });
782
783 let mut subs: Vec<Box<str>> = Vec::new();
784 let mut sub_cursor = comp_id.len() + 1; for sub in parts {
787 if sub.is_empty() || !is_alnum_upper(sub) {
788 return None;
789 }
790 let sub_abs = rest_abs_base + seg_off + sub_cursor;
791 local_tokens.push(TokenSpan {
792 kind: TokenKind::SciSubCompartment,
793 span: Span::new(sub_abs, sub_abs + sub.len()),
794 text: sub.into(),
795 });
796 subs.push(sub.into());
797 sub_cursor += sub.len() + 1;
798 }
799
800 compartments.push(SciCompartment::new(comp_id.into(), subs.into_boxed_slice()));
801 }
802 }
803
804 let canonical_enum = if compartments.is_empty() {
811 SciControl::parse(ctrl_str)
812 } else {
813 compartments
814 .first()
815 .filter(|c| c.sub_compartments.is_empty())
816 .and_then(|c| {
817 let composite = format!("{}-{}", ctrl_str, c.identifier);
818 SciControl::parse(&composite)
819 })
820 };
821
822 markings.push(SciMarking::new(
823 system,
824 compartments.into_boxed_slice(),
825 canonical_enum,
826 ));
827 }
828
829 tokens.extend(local_tokens);
830 Some(markings)
831}
832
833fn is_valid_custom_control(s: &str) -> bool {
837 let len = s.len();
838 (2..=5).contains(&len) && is_alnum_upper(s)
839}
840
841fn is_alnum_upper(s: &str) -> bool {
843 !s.is_empty()
844 && s.bytes()
845 .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit())
846}
847
848fn is_known_non_sci_token(s: &str) -> bool {
854 DissemControl::parse(s).is_some()
855 || parse_dissem_full_form(s).is_some()
856 || parse_non_ic_full_form(s).is_some()
857 || AeaMarking::parse(s).is_some()
858 || DeclassExemption::parse(s).is_some()
859}
860
861fn parse_nato_classification(s: &str) -> Option<NatoClassification> {
868 match s {
870 "COSMIC TOP SECRET ATOMAL" => Some(NatoClassification::CosmicTopSecretAtomal),
872 "COSMIC TOP SECRET-BOHEMIA" => Some(NatoClassification::CosmicTopSecretBohemia),
873 "COSMIC TOP SECRET-BALK" => Some(NatoClassification::CosmicTopSecretBalk),
874 "COSMIC TOP SECRET" => Some(NatoClassification::CosmicTopSecret),
875 "NATO SECRET ATOMAL" => Some(NatoClassification::NatoSecretAtomal),
876 "NATO SECRET" => Some(NatoClassification::NatoSecret),
877 "NATO CONFIDENTIAL ATOMAL" => Some(NatoClassification::NatoConfidentialAtomal),
878 "NATO CONFIDENTIAL" => Some(NatoClassification::NatoConfidential),
879 "NATO RESTRICTED" => Some(NatoClassification::NatoRestricted),
880 "NATO UNCLASSIFIED" => Some(NatoClassification::NatoUnclassified),
881 "CTSA" | "CTS-A" => Some(NatoClassification::CosmicTopSecretAtomal),
883 "CTS-B" => Some(NatoClassification::CosmicTopSecretBohemia),
884 "CTS-BALK" => Some(NatoClassification::CosmicTopSecretBalk),
885 "CTS" => Some(NatoClassification::CosmicTopSecret),
886 "NSAT" | "NS-A" => Some(NatoClassification::NatoSecretAtomal),
887 "NS" => Some(NatoClassification::NatoSecret),
888 "NCA" | "NC-A" => Some(NatoClassification::NatoConfidentialAtomal),
889 "NC" => Some(NatoClassification::NatoConfidential),
890 "NR" => Some(NatoClassification::NatoRestricted),
891 "NU" => Some(NatoClassification::NatoUnclassified),
892 _ => None,
893 }
894}
895
896fn parse_joint_classification(s: &str) -> Option<JointClassification> {
901 let rest = s.strip_prefix("JOINT ")?;
902 let mut tokens = rest.split_whitespace();
903
904 let first = tokens.next()?;
907 let (level, remaining_start) = if first == "TOP" {
908 let mut peek_tokens = rest.split_whitespace();
910 peek_tokens.next(); if peek_tokens.next() == Some("SECRET") {
912 let level = parse_classification("TOP SECRET")?;
913 let after_ts = rest.find("SECRET").map(|i| i + "SECRET".len())?;
915 (level, after_ts)
916 } else {
917 return None; }
919 } else {
920 let level = parse_classification(first)?;
921 let after_level = rest.find(first).map(|i| i + first.len())?;
922 (level, after_level)
923 };
924
925 let country_str = rest[remaining_start..].trim();
932 let mut countries = Vec::new();
933 for token in country_str.split_whitespace() {
934 if token.len() == 3 {
935 if let Some(t) = CountryCode::try_new(token.as_bytes()) {
936 countries.push(t);
937 }
938 }
939 }
940
941 if countries.is_empty() {
942 return None; }
944
945 Some(JointClassification {
946 level,
947 countries: countries.into(),
948 })
949}
950
951fn parse_fgi_classification(s: &str) -> Option<FgiClassification> {
960 let tokens: Vec<&str> = s.split_whitespace().collect();
961 if tokens.len() < 2 {
962 return None; }
964
965 let (level, country_end) = if tokens.len() >= 3
967 && tokens[tokens.len() - 2] == "TOP"
968 && tokens[tokens.len() - 1] == "SECRET"
969 {
970 (parse_classification("TOP SECRET")?, tokens.len() - 2)
971 } else {
972 (
973 parse_classification(tokens[tokens.len() - 1])?,
974 tokens.len() - 1,
975 )
976 };
977
978 let mut countries = Vec::new();
980 for &token in &tokens[..country_end] {
981 if token == "FGI" {
982 continue;
984 }
985 if token.len() == 3 {
986 let t = CountryCode::try_new(token.as_bytes())?;
987 countries.push(t);
988 } else {
989 return None; }
991 }
992
993 Some(FgiClassification {
994 countries: countries.into(),
995 level,
996 })
997}
998
999fn parse_fgi_marker(s: &str) -> Option<FgiMarker> {
1005 if s == "FGI" {
1006 return Some(FgiMarker {
1007 countries: Box::new([]),
1008 });
1009 }
1010
1011 let rest = s.strip_prefix("FGI ")?;
1012 let mut countries = Vec::new();
1013 for token in rest.split_whitespace() {
1014 if token.len() == 3 {
1015 if let Some(t) = CountryCode::try_new(token.as_bytes()) {
1016 countries.push(t);
1017 }
1018 }
1019 }
1021
1022 Some(FgiMarker {
1023 countries: countries.into(),
1024 })
1025}
1026
1027fn try_parse_foreign_classification(s: &str) -> Option<ForeignClassification> {
1033 if let Some(nato) = parse_nato_classification(s) {
1034 Some(ForeignClassification::Nato(nato))
1035 } else if let Some(joint) = parse_joint_classification(s) {
1036 Some(ForeignClassification::Joint(joint))
1037 } else {
1038 parse_fgi_classification(s).map(ForeignClassification::Fgi)
1039 }
1040}
1041
1042fn parse_dissem_full_form(s: &str) -> Option<DissemControl> {
1054 let portion = marque_ism::marking_forms::banner_to_portion(s)
1063 .or_else(|| marque_ism::marking_forms::title_to_portion(s))?;
1064 DissemControl::parse(portion)
1065}
1066
1067fn parse_non_ic_full_form(s: &str) -> Option<NonIcDissem> {
1073 NonIcDissem::parse(s).or_else(|| {
1074 let portion = marque_ism::marking_forms::title_to_portion(s)?;
1075 NonIcDissem::parse(portion)
1076 })
1077}
1078
1079struct RelToParseResult {
1086 countries: Vec<CountryCode>,
1087 trailing_dissem: Vec<DissemControl>,
1088 trailing_non_ic: Vec<NonIcDissem>,
1089}
1090
1091fn parse_rel_to_with_spans(
1105 block: &str,
1106 block_offset: usize,
1107 tokens: &dyn TokenSet,
1108 token_spans: &mut Vec<TokenSpan>,
1109) -> RelToParseResult {
1110 let prefix_skip = if let Some(rest) = block.strip_prefix("REL TO") {
1114 block.len() - rest.len()
1115 } else if let Some(rest) = block.strip_prefix("REL") {
1116 block.len() - rest.len()
1117 } else {
1118 0
1119 };
1120 let after_rel = &block[prefix_skip..];
1121
1122 let mut countries: Vec<CountryCode> = Vec::new();
1123 let mut trailing_dissem: Vec<DissemControl> = Vec::new();
1124 let mut trailing_non_ic: Vec<NonIcDissem> = Vec::new();
1125 let mut cursor = 0usize;
1129 for entry in after_rel.split(',') {
1130 let entry_start_in_after = cursor;
1131 cursor += entry.len() + 1;
1137
1138 let trim_lead = entry.len() - entry.trim_start().len();
1139 let trimmed = entry.trim();
1140 if trimmed.is_empty() {
1141 continue;
1142 }
1143 let abs_start = block_offset + prefix_skip + entry_start_in_after + trim_lead;
1144
1145 if let Some(slash_pos) = trimmed.find('/') {
1151 let country_part = trimmed[..slash_pos].trim();
1152 let tail = trimmed[slash_pos + 1..].trim();
1153
1154 if !country_part.is_empty() {
1156 if tokens.is_trigraph(country_part) {
1157 if let Some(t) = CountryCode::try_new(country_part.as_bytes()) {
1158 countries.push(t);
1159 token_spans.push(TokenSpan {
1160 kind: TokenKind::RelToTrigraph,
1161 span: Span::new(abs_start, abs_start + country_part.len()),
1162 text: country_part.into(),
1163 });
1164 }
1165 } else {
1166 token_spans.push(TokenSpan {
1167 kind: TokenKind::Unknown,
1168 span: Span::new(abs_start, abs_start + country_part.len()),
1169 text: country_part.into(),
1170 });
1171 }
1172 }
1173
1174 let tail_base = abs_start + slash_pos + 1;
1176 let mut tail_cursor = 0usize;
1177 for part in tail.split('/') {
1178 let part_trim_lead = part.len() - part.trim_start().len();
1179 let part = part.trim();
1180 let part_abs = tail_base + tail_cursor + part_trim_lead;
1181 tail_cursor += part.len() + part_trim_lead + 1; if part.is_empty() {
1183 continue;
1184 }
1185 if let Some(ctrl) =
1186 DissemControl::parse(part).or_else(|| parse_dissem_full_form(part))
1187 {
1188 trailing_dissem.push(ctrl);
1189 token_spans.push(TokenSpan {
1190 kind: TokenKind::DissemControl,
1191 span: Span::new(part_abs, part_abs + part.len()),
1192 text: part.into(),
1193 });
1194 } else if let Some(nic) = parse_non_ic_full_form(part) {
1195 trailing_non_ic.push(nic);
1196 token_spans.push(TokenSpan {
1197 kind: TokenKind::NonIcDissem,
1198 span: Span::new(part_abs, part_abs + part.len()),
1199 text: part.into(),
1200 });
1201 } else {
1202 token_spans.push(TokenSpan {
1203 kind: TokenKind::Unknown,
1204 span: Span::new(part_abs, part_abs + part.len()),
1205 text: part.into(),
1206 });
1207 }
1208 }
1209 continue;
1210 }
1211
1212 if !tokens.is_trigraph(trimmed) {
1213 token_spans.push(TokenSpan {
1232 kind: TokenKind::Unknown,
1233 span: Span::new(abs_start, abs_start + trimmed.len()),
1234 text: trimmed.into(),
1235 });
1236 continue;
1237 }
1238 let Some(t) = CountryCode::try_new(trimmed.as_bytes()) else {
1248 continue;
1249 };
1250 countries.push(t);
1251 token_spans.push(TokenSpan {
1252 kind: TokenKind::RelToTrigraph,
1253 span: Span::new(abs_start, abs_start + trimmed.len()),
1254 text: trimmed.into(),
1255 });
1256 }
1257 RelToParseResult {
1258 countries,
1259 trailing_dissem,
1260 trailing_non_ic,
1261 }
1262}
1263
1264fn is_declass_date(s: &str) -> bool {
1282 let bytes = s.as_bytes();
1283 if !matches!(bytes.len(), 4 | 8) || !bytes.iter().all(u8::is_ascii_digit) {
1284 return false;
1285 }
1286 IsmDate::from_str(s).is_ok()
1287}
1288
1289fn split_slash_with_offsets(s: &str) -> Vec<(usize, &str)> {
1295 let mut result = Vec::new();
1296 let mut pos = 0usize;
1297 for part in s.split('/') {
1298 let trim_lead = part.len() - part.trim_start().len();
1299 let trimmed = part.trim();
1300 if !trimmed.is_empty() {
1301 result.push((pos + trim_lead, trimmed));
1302 }
1303 pos += part.len() + 1; }
1305 result
1306}
1307
1308fn parse_sar_category(block_text: &str, base: usize) -> Option<(SarMarking, Vec<TokenSpan>)> {
1347 if block_text.contains("//") {
1351 return None;
1352 }
1353
1354 let (indicator, indicator_lit) = if block_text.starts_with("SPECIAL ACCESS REQUIRED-") {
1357 (SarIndicator::Full, "SPECIAL ACCESS REQUIRED-")
1358 } else if block_text.starts_with("SAR-") {
1359 (SarIndicator::Abbrev, "SAR-")
1360 } else {
1361 return None;
1362 };
1363 let rest_offset = indicator_lit.len();
1364 let rest = &block_text[rest_offset..];
1365 if rest.is_empty() {
1366 return None;
1367 }
1368
1369 let mut spans: Vec<TokenSpan> = Vec::new();
1370
1371 spans.push(TokenSpan {
1375 kind: TokenKind::SarIndicator,
1376 span: Span::new(base, base + indicator_lit.len()),
1377 text: indicator_lit.into(),
1378 });
1379
1380 let mut programs: Vec<SarProgram> = Vec::new();
1381
1382 let mut chunk_offset = rest_offset; for (i, prog_chunk) in rest.split('/').enumerate() {
1386 if i > 0 {
1387 chunk_offset += 1; }
1389 let program_base = base + chunk_offset;
1390
1391 let program = parse_sar_program(prog_chunk, program_base, indicator, &mut spans)?;
1392 programs.push(program);
1393 chunk_offset += prog_chunk.len();
1394 }
1395
1396 if programs.is_empty() {
1397 return None;
1398 }
1399
1400 Some((
1401 SarMarking::new(indicator, programs.into_boxed_slice()),
1402 spans,
1403 ))
1404}
1405
1406fn parse_sar_program(
1428 chunk: &str,
1429 base: usize,
1430 indicator: SarIndicator,
1431 spans: &mut Vec<TokenSpan>,
1432) -> Option<SarProgram> {
1433 if chunk.is_empty() {
1434 return None;
1435 }
1436
1437 let mut segments = split_with_offsets(chunk, '-');
1441 if segments.is_empty() {
1442 return None;
1443 }
1444
1445 let (prog_off, prog_id) = segments.remove(0);
1447 if prog_id.is_empty() {
1448 return None;
1449 }
1450 let prog_shape_ok = match indicator {
1451 SarIndicator::Abbrev => {
1453 (2..=3).contains(&prog_id.len()) && prog_id.bytes().all(|b| b.is_ascii_alphanumeric())
1454 }
1455 SarIndicator::Full => {
1458 prog_id.bytes().all(|b| b == b' ' || b.is_ascii_uppercase())
1459 && prog_id.bytes().any(|b| b != b' ')
1460 }
1461 };
1462 if !prog_shape_ok {
1463 return None;
1464 }
1465 spans.push(TokenSpan {
1466 kind: TokenKind::SarProgram,
1467 span: Span::new(base + prog_off, base + prog_off + prog_id.len()),
1468 text: prog_id.into(),
1469 });
1470
1471 let mut compartments: Vec<SarCompartment> = Vec::with_capacity(segments.len());
1474 for (seg_off, seg) in segments {
1475 if seg.is_empty() {
1476 return None;
1477 }
1478 let mut parts = split_with_offsets(seg, ' ');
1480 let (comp_rel_off, comp_id) = parts.remove(0);
1481 if comp_id.is_empty() || !comp_id.bytes().all(|b| b.is_ascii_alphanumeric()) {
1482 return None;
1483 }
1484 let comp_abs_off = seg_off + comp_rel_off;
1485 spans.push(TokenSpan {
1486 kind: TokenKind::SarCompartment,
1487 span: Span::new(base + comp_abs_off, base + comp_abs_off + comp_id.len()),
1488 text: comp_id.into(),
1489 });
1490
1491 let mut subs: Vec<Box<str>> = Vec::with_capacity(parts.len());
1492 for (sub_rel_off, sub_id) in parts {
1493 if sub_id.is_empty() || !sub_id.bytes().all(|b| b.is_ascii_alphanumeric()) {
1494 return None;
1495 }
1496 let sub_abs_off = seg_off + sub_rel_off;
1497 spans.push(TokenSpan {
1498 kind: TokenKind::SarSubCompartment,
1499 span: Span::new(base + sub_abs_off, base + sub_abs_off + sub_id.len()),
1500 text: sub_id.into(),
1501 });
1502 subs.push(sub_id.into());
1503 }
1504
1505 compartments.push(SarCompartment::new(comp_id.into(), subs.into_boxed_slice()));
1506 }
1507
1508 Some(SarProgram::new(
1509 prog_id.into(),
1510 compartments.into_boxed_slice(),
1511 ))
1512}
1513
1514fn split_with_offsets(s: &str, delim: char) -> Vec<(usize, &str)> {
1518 let mut result = Vec::new();
1519 let mut pos = 0usize;
1520 let delim_len = delim.len_utf8();
1521 for part in s.split(delim) {
1522 result.push((pos, part));
1523 pos += part.len() + delim_len;
1524 }
1525 result
1526}
1527
1528#[cfg(test)]
1529#[cfg_attr(coverage_nightly, coverage(off))]
1530mod tests {
1531 use super::*;
1532 use marque_ism::span::{MarkingCandidate, MarkingType, Span};
1533 use marque_ism::token_set::CapcoTokenSet;
1534
1535 fn make_candidate(text: &[u8], kind: MarkingType, offset: usize) -> MarkingCandidate {
1536 MarkingCandidate {
1537 span: Span::new(offset, offset + text.len()),
1538 kind,
1539 }
1540 }
1541
1542 fn parse_banner(text: &str) -> ParsedMarking {
1543 let source = text.as_bytes();
1544 let tokens = CapcoTokenSet;
1545 let parser = Parser::new(&tokens);
1546 let candidate = make_candidate(source, MarkingType::Banner, 0);
1547 parser
1548 .parse(&candidate, source)
1549 .expect("parse should succeed")
1550 }
1551
1552 fn parse_portion(text: &str) -> ParsedMarking {
1553 let source = text.as_bytes();
1554 let tokens = CapcoTokenSet;
1555 let parser = Parser::new(&tokens);
1556 let candidate = make_candidate(source, MarkingType::Portion, 0);
1557 parser
1558 .parse(&candidate, source)
1559 .expect("parse should succeed")
1560 }
1561
1562 #[test]
1565 fn banner_with_declass_exemption_populates_attrs() {
1566 let parsed = parse_banner("SECRET//25X1//NOFORN");
1569 assert!(
1570 parsed.attrs.declass_exemption.is_some(),
1571 "declass_exemption should be populated when 25X1 appears in banner"
1572 );
1573 use marque_ism::DeclassExemption;
1574 assert_eq!(
1575 parsed.attrs.declass_exemption,
1576 Some(DeclassExemption::X25x1)
1577 );
1578 }
1579
1580 #[test]
1581 fn portion_with_declass_exemption_populates_attrs() {
1582 let parsed = parse_portion("(SECRET//50X1-HUM)");
1583 assert!(parsed.attrs.declass_exemption.is_some());
1584 }
1585
1586 #[test]
1589 fn banner_with_declass_date_populates_attrs() {
1590 let parsed = parse_banner("SECRET//20301231//NOFORN");
1591 assert_eq!(
1592 parsed.attrs.declassify_on,
1593 Some(marque_ism::IsmDate::Date(2030, 12, 31)),
1594 "declassify_on should be populated when YYYYMMDD appears in banner"
1595 );
1596 }
1597
1598 #[test]
1599 fn banner_with_four_digit_year_populates_attrs() {
1600 let parsed = parse_banner("SECRET//2035");
1601 assert_eq!(
1602 parsed.attrs.declassify_on,
1603 Some(marque_ism::IsmDate::Year(2035))
1604 );
1605 }
1606
1607 #[test]
1610 fn banner_without_declass_leaves_fields_none() {
1611 let parsed = parse_banner("TOP SECRET//SI//NOFORN");
1612 assert!(parsed.attrs.declassify_on.is_none());
1613 assert!(parsed.attrs.declass_exemption.is_none());
1614 }
1615
1616 #[test]
1619 fn is_declass_date_accepts_yyyymmdd() {
1620 assert!(is_declass_date("20301231"));
1621 }
1622
1623 #[test]
1624 fn is_declass_date_accepts_yyyy() {
1625 assert!(is_declass_date("2035"));
1626 }
1627
1628 #[test]
1629 fn is_declass_date_rejects_non_digit() {
1630 assert!(!is_declass_date("2030X231"));
1631 assert!(!is_declass_date("YYYYMMDD"));
1632 }
1633
1634 #[test]
1635 fn is_declass_date_rejects_wrong_length() {
1636 assert!(!is_declass_date("203012"));
1637 assert!(!is_declass_date("203012311"));
1638 }
1639
1640 #[test]
1641 fn is_declass_date_rejects_impossible_calendar_dates() {
1642 assert!(!is_declass_date("20301340"));
1644 assert!(!is_declass_date("20300100"));
1646 assert!(!is_declass_date("20030231"));
1648 assert!(!is_declass_date("20030431"));
1650 }
1651
1652 #[test]
1655 fn token_spans_track_offsets_in_banner() {
1656 let parsed = parse_banner("TOP SECRET//SI//NF");
1657 let kinds: Vec<TokenKind> = parsed.attrs.token_spans.iter().map(|t| t.kind).collect();
1658 assert!(kinds.contains(&TokenKind::Separator));
1660 assert!(kinds.contains(&TokenKind::Classification));
1661 assert!(kinds.contains(&TokenKind::SciControl));
1662 assert!(kinds.contains(&TokenKind::DissemControl));
1663
1664 let src = b"TOP SECRET//SI//NF";
1666 let cls = parsed
1667 .attrs
1668 .token_spans
1669 .iter()
1670 .find(|t| t.kind == TokenKind::Classification)
1671 .unwrap();
1672 assert_eq!(cls.span.as_str(src).unwrap(), "TOP SECRET");
1673
1674 let sci = parsed
1675 .attrs
1676 .token_spans
1677 .iter()
1678 .find(|t| t.kind == TokenKind::SciControl)
1679 .unwrap();
1680 assert_eq!(sci.span.as_str(src).unwrap(), "SI");
1681
1682 let dissem = parsed
1683 .attrs
1684 .token_spans
1685 .iter()
1686 .find(|t| t.kind == TokenKind::DissemControl)
1687 .unwrap();
1688 assert_eq!(dissem.span.as_str(src).unwrap(), "NF");
1689 }
1690
1691 #[test]
1692 fn token_spans_strip_paren_in_portion() {
1693 let parsed = parse_portion("(SECRET//NF)");
1694 let src = b"(SECRET//NF)";
1695 let cls = parsed
1696 .attrs
1697 .token_spans
1698 .iter()
1699 .find(|t| t.kind == TokenKind::Classification)
1700 .unwrap();
1701 assert_eq!(cls.span.start, 1);
1703 assert_eq!(cls.span.end, 7);
1704 assert_eq!(cls.span.as_str(src).unwrap(), "SECRET");
1705
1706 let dissem = parsed
1707 .attrs
1708 .token_spans
1709 .iter()
1710 .find(|t| t.kind == TokenKind::DissemControl)
1711 .unwrap();
1712 assert_eq!(dissem.span.start, 9);
1714 assert_eq!(dissem.span.end, 11);
1715 }
1716
1717 #[test]
1718 fn token_spans_record_unknown_token() {
1719 let parsed = parse_banner("SECRET//XYZZY//NOFORN");
1720 let unknowns: Vec<&TokenSpan> = parsed
1721 .attrs
1722 .token_spans
1723 .iter()
1724 .filter(|t| t.kind == TokenKind::Unknown)
1725 .collect();
1726 assert_eq!(unknowns.len(), 1);
1727 assert_eq!(
1728 unknowns[0].span.as_str(b"SECRET//XYZZY//NOFORN").unwrap(),
1729 "XYZZY"
1730 );
1731 }
1732
1733 #[test]
1734 fn token_spans_record_rel_to_trigraphs() {
1735 let parsed = parse_banner("SECRET//REL TO USA, GBR, AUS");
1736 let trigraphs: Vec<&TokenSpan> = parsed
1737 .attrs
1738 .token_spans
1739 .iter()
1740 .filter(|t| t.kind == TokenKind::RelToTrigraph)
1741 .collect();
1742 assert_eq!(trigraphs.len(), 3);
1743 let src = b"SECRET//REL TO USA, GBR, AUS";
1744 assert_eq!(trigraphs[0].span.as_str(src).unwrap(), "USA");
1745 assert_eq!(trigraphs[1].span.as_str(src).unwrap(), "GBR");
1746 assert_eq!(trigraphs[2].span.as_str(src).unwrap(), "AUS");
1747 }
1748
1749 #[test]
1759 fn rel_to_preserves_tetragraph_fvey() {
1760 let parsed = parse_banner("SECRET//REL TO USA, FVEY, GBR");
1761 let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
1762 assert_eq!(
1763 codes,
1764 vec!["USA", "FVEY", "GBR"],
1765 "FVEY tetragraph must land in rel_to (issue #183 silent-drop fix)"
1766 );
1767 }
1768
1769 #[test]
1770 fn rel_to_preserves_opaque_tetragraph_nato() {
1771 let parsed = parse_banner("SECRET//REL TO USA, NATO, GBR");
1772 let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
1773 assert_eq!(
1774 codes,
1775 vec!["USA", "NATO", "GBR"],
1776 "NATO is in CVE TRIGRAPHS recognition set; rel_to must preserve it \
1777 even though membership expansion is deferred to Phase F"
1778 );
1779 }
1780
1781 #[test]
1782 fn rel_to_preserves_two_byte_eu() {
1783 let parsed = parse_banner("SECRET//REL TO USA, EU");
1784 let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
1785 assert_eq!(
1786 codes,
1787 vec!["USA", "EU"],
1788 "EU (2-byte CVE entry) must round-trip through the parser"
1789 );
1790 }
1791
1792 #[test]
1793 fn rel_to_preserves_long_australia_group() {
1794 let parsed = parse_banner("SECRET//REL TO USA, AUSTRALIA_GROUP");
1795 let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
1796 assert_eq!(
1797 codes,
1798 vec!["USA", "AUSTRALIA_GROUP"],
1799 "AUSTRALIA_GROUP (15-byte CVE entry, contains underscore) \
1800 must round-trip through the parser"
1801 );
1802 }
1803
1804 #[test]
1805 fn rel_to_token_span_widens_to_actual_code_length() {
1806 let parsed = parse_banner("SECRET//REL TO USA, FVEY, AUSTRALIA_GROUP");
1812 let trigraph_spans: Vec<&TokenSpan> = parsed
1813 .attrs
1814 .token_spans
1815 .iter()
1816 .filter(|t| t.kind == TokenKind::RelToTrigraph)
1817 .collect();
1818 let src = b"SECRET//REL TO USA, FVEY, AUSTRALIA_GROUP";
1819 assert_eq!(trigraph_spans[0].span.as_str(src).unwrap(), "USA");
1820 assert_eq!(trigraph_spans[1].span.as_str(src).unwrap(), "FVEY");
1821 assert_eq!(
1822 trigraph_spans[2].span.as_str(src).unwrap(),
1823 "AUSTRALIA_GROUP"
1824 );
1825 }
1826
1827 #[test]
1828 fn rel_to_drops_unrecognized_token_silently() {
1829 let parsed = parse_banner("SECRET//REL TO USA, XYZQ, GBR");
1834 let codes: Vec<&str> = parsed.attrs.rel_to.iter().map(|c| c.as_str()).collect();
1835 assert_eq!(codes, vec!["USA", "GBR"]);
1836 }
1837
1838 #[test]
1839 fn token_spans_record_separators() {
1840 let parsed = parse_banner("SECRET//NF");
1841 let seps: Vec<&TokenSpan> = parsed
1842 .attrs
1843 .token_spans
1844 .iter()
1845 .filter(|t| t.kind == TokenKind::Separator)
1846 .collect();
1847 assert_eq!(seps.len(), 1);
1848 let src = b"SECRET//NF";
1849 assert_eq!(seps[0].span.as_str(src).unwrap(), "//");
1850 }
1851
1852 #[test]
1857 fn nato_banner_parses_all_variants() {
1858 for (input, expected) in [
1859 ("//NATO UNCLASSIFIED", NatoClassification::NatoUnclassified),
1860 ("//NATO RESTRICTED", NatoClassification::NatoRestricted),
1861 ("//NATO CONFIDENTIAL", NatoClassification::NatoConfidential),
1862 (
1863 "//NATO CONFIDENTIAL ATOMAL",
1864 NatoClassification::NatoConfidentialAtomal,
1865 ),
1866 ("//NATO SECRET", NatoClassification::NatoSecret),
1867 ("//NATO SECRET ATOMAL", NatoClassification::NatoSecretAtomal),
1868 ("//COSMIC TOP SECRET", NatoClassification::CosmicTopSecret),
1869 (
1870 "//COSMIC TOP SECRET ATOMAL",
1871 NatoClassification::CosmicTopSecretAtomal,
1872 ),
1873 (
1874 "//COSMIC TOP SECRET-BOHEMIA",
1875 NatoClassification::CosmicTopSecretBohemia,
1876 ),
1877 (
1878 "//COSMIC TOP SECRET-BALK",
1879 NatoClassification::CosmicTopSecretBalk,
1880 ),
1881 ] {
1882 let parsed = parse_banner(input);
1883 assert_eq!(
1884 parsed.attrs.classification,
1885 Some(MarkingClassification::Nato(expected)),
1886 "failed for banner: {input}"
1887 );
1888 }
1889 }
1890
1891 #[test]
1892 fn nato_portion_parses_all_variants() {
1893 for (input, expected) in [
1894 ("(//NU)", NatoClassification::NatoUnclassified),
1895 ("(//NR)", NatoClassification::NatoRestricted),
1896 ("(//NC)", NatoClassification::NatoConfidential),
1897 ("(//NCA)", NatoClassification::NatoConfidentialAtomal),
1898 ("(//NC-A)", NatoClassification::NatoConfidentialAtomal),
1899 ("(//NS)", NatoClassification::NatoSecret),
1900 ("(//NSAT)", NatoClassification::NatoSecretAtomal),
1901 ("(//NS-A)", NatoClassification::NatoSecretAtomal),
1902 ("(//CTS)", NatoClassification::CosmicTopSecret),
1903 ("(//CTSA)", NatoClassification::CosmicTopSecretAtomal),
1904 ("(//CTS-A)", NatoClassification::CosmicTopSecretAtomal),
1905 ("(//CTS-B)", NatoClassification::CosmicTopSecretBohemia),
1906 ("(//CTS-BALK)", NatoClassification::CosmicTopSecretBalk),
1907 ] {
1908 let parsed = parse_portion(input);
1909 assert_eq!(
1910 parsed.attrs.classification,
1911 Some(MarkingClassification::Nato(expected)),
1912 "failed for portion: {input}"
1913 );
1914 }
1915 }
1916
1917 #[test]
1918 fn nato_banner_with_rel_to() {
1919 let parsed = parse_banner("//NATO SECRET//REL TO USA, GBR");
1920 assert_eq!(
1921 parsed.attrs.classification,
1922 Some(MarkingClassification::Nato(NatoClassification::NatoSecret)),
1923 );
1924 assert_eq!(parsed.attrs.rel_to.len(), 2);
1925 assert_eq!(parsed.attrs.rel_to[0], CountryCode::USA);
1926 }
1927
1928 #[test]
1929 fn joint_banner_parses_correctly() {
1930 let parsed = parse_banner("//JOINT S USA GBR");
1931 match &parsed.attrs.classification {
1932 Some(MarkingClassification::Joint(j)) => {
1933 assert_eq!(j.level, Classification::Secret);
1934 assert_eq!(j.countries.len(), 2);
1935 assert_eq!(j.countries[0], CountryCode::USA);
1936 assert_eq!(j.countries[1].as_str(), "GBR");
1937 }
1938 other => panic!("expected Joint, got: {other:?}"),
1939 }
1940 }
1941
1942 #[test]
1943 fn joint_banner_parses_top_secret_multi_word_level() {
1944 let parsed = parse_banner("//JOINT TOP SECRET USA GBR");
1949 match &parsed.attrs.classification {
1950 Some(MarkingClassification::Joint(j)) => {
1951 assert_eq!(j.level, Classification::TopSecret);
1952 assert_eq!(j.countries.len(), 2);
1953 assert_eq!(j.countries[0], CountryCode::USA);
1954 assert_eq!(j.countries[1].as_str(), "GBR");
1955 }
1956 other => panic!("expected Joint(TopSecret), got: {other:?}"),
1957 }
1958 }
1959
1960 #[test]
1961 fn joint_banner_rejects_bare_top_without_secret() {
1962 let parsed = parse_banner("//JOINT TOP USA GBR");
1967 assert!(
1968 !matches!(
1969 parsed.attrs.classification,
1970 Some(MarkingClassification::Joint(_))
1971 ),
1972 "bare TOP must not parse as a JOINT classification"
1973 );
1974 }
1975
1976 #[test]
1977 fn joint_portion_with_rel_to() {
1978 let parsed = parse_portion("(//JOINT TS USA AUS GBR//REL TO USA, AUS, GBR)");
1979 match &parsed.attrs.classification {
1980 Some(MarkingClassification::Joint(j)) => {
1981 assert_eq!(j.level, Classification::TopSecret);
1982 assert_eq!(j.countries.len(), 3);
1983 }
1984 other => panic!("expected Joint, got: {other:?}"),
1985 }
1986 assert_eq!(parsed.attrs.rel_to.len(), 3);
1987 }
1988
1989 #[test]
1990 fn fgi_single_country_parses() {
1991 let parsed = parse_portion("(//GBR S//NF)");
1992 match &parsed.attrs.classification {
1993 Some(MarkingClassification::Fgi(f)) => {
1994 assert_eq!(f.level, Classification::Secret);
1995 assert_eq!(f.countries.len(), 1);
1996 assert_eq!(f.countries[0].as_str(), "GBR");
1997 }
1998 other => panic!("expected Fgi, got: {other:?}"),
1999 }
2000 }
2001
2002 #[test]
2003 fn fgi_multiple_countries_parses() {
2004 let parsed = parse_banner("//GBR DEU TS//NF");
2005 match &parsed.attrs.classification {
2006 Some(MarkingClassification::Fgi(f)) => {
2007 assert_eq!(f.level, Classification::TopSecret);
2008 assert_eq!(f.countries.len(), 2);
2009 }
2010 other => panic!("expected Fgi, got: {other:?}"),
2011 }
2012 }
2013
2014 #[test]
2015 fn fgi_placeholder_country_parses() {
2016 let parsed = parse_portion("(//FGI S//NF)");
2018 match &parsed.attrs.classification {
2019 Some(MarkingClassification::Fgi(f)) => {
2020 assert_eq!(f.level, Classification::Secret);
2021 assert!(
2022 f.countries.is_empty(),
2023 "FGI placeholder should have no countries"
2024 );
2025 }
2026 other => panic!("expected Fgi, got: {other:?}"),
2027 }
2028 }
2029
2030 #[test]
2031 fn fgi_non_uppercase_trigraph_rejected() {
2032 let parsed = parse_banner("//Gbr S//NF");
2039 assert!(
2040 !matches!(
2041 parsed.attrs.classification,
2042 Some(MarkingClassification::Fgi(_))
2043 ),
2044 "Gbr should not parse as a valid FGI classification: {:?}",
2045 parsed.attrs.classification,
2046 );
2047 }
2048
2049 #[test]
2050 fn fgi_no_level_is_error() {
2051 let parsed = parse_banner("//FGI//NF");
2053 assert!(
2054 parsed.attrs.classification.is_none()
2055 || matches!(
2056 parsed.attrs.classification,
2057 Some(MarkingClassification::Us(_))
2058 ),
2059 "bare FGI with no level should not produce a valid non-US classification: {:?}",
2060 parsed.attrs.classification,
2061 );
2062 }
2063
2064 #[test]
2065 fn fgi_marker_in_us_marking() {
2066 let parsed = parse_banner("SECRET//FGI DEU//NOFORN");
2067 assert_eq!(
2068 parsed.attrs.classification,
2069 Some(MarkingClassification::Us(Classification::Secret)),
2070 );
2071 let marker = parsed
2072 .attrs
2073 .fgi_marker
2074 .as_ref()
2075 .expect("should have FGI marker");
2076 assert_eq!(marker.countries.len(), 1);
2077 assert_eq!(marker.countries[0].as_str(), "DEU");
2078 }
2079
2080 #[test]
2081 fn fgi_marker_no_countries() {
2082 let parsed = parse_banner("SECRET//FGI//NOFORN");
2083 assert_eq!(
2084 parsed.attrs.classification,
2085 Some(MarkingClassification::Us(Classification::Secret)),
2086 );
2087 let marker = parsed
2088 .attrs
2089 .fgi_marker
2090 .as_ref()
2091 .expect("should have FGI marker");
2092 assert!(marker.countries.is_empty());
2093 }
2094
2095 #[test]
2096 fn conflict_us_and_nato() {
2097 let parsed = parse_banner("SECRET//NATO SECRET//NOFORN");
2098 match &parsed.attrs.classification {
2099 Some(MarkingClassification::Conflict { us, foreign }) => {
2100 assert_eq!(*us, Classification::Secret);
2101 assert!(matches!(
2102 foreign.as_ref(),
2103 ForeignClassification::Nato(NatoClassification::NatoSecret)
2104 ));
2105 }
2106 other => panic!("expected Conflict, got: {other:?}"),
2107 }
2108 }
2109
2110 #[test]
2111 fn conflict_level_escalation() {
2112 let parsed = parse_banner("SECRET//COSMIC TOP SECRET//NOFORN");
2114 match &parsed.attrs.classification {
2115 Some(MarkingClassification::Conflict { us, foreign }) => {
2116 assert_eq!(*us, Classification::TopSecret);
2117 assert!(matches!(
2118 foreign.as_ref(),
2119 ForeignClassification::Nato(NatoClassification::CosmicTopSecret)
2120 ));
2121 }
2122 other => panic!("expected Conflict with escalation, got: {other:?}"),
2123 }
2124 }
2125
2126 #[test]
2127 fn restricted_classification_parses() {
2128 let parsed = parse_banner("RESTRICTED//NF");
2129 assert_eq!(
2130 parsed.attrs.classification,
2131 Some(MarkingClassification::Us(Classification::Restricted)),
2132 );
2133 }
2134
2135 #[test]
2136 fn restricted_portion_parses() {
2137 let parsed = parse_portion("(R//NF)");
2138 assert_eq!(
2139 parsed.attrs.classification,
2140 Some(MarkingClassification::Us(Classification::Restricted)),
2141 );
2142 }
2143
2144 #[test]
2149 fn non_ic_dissem_limdis_banner_form() {
2150 let parsed = parse_banner("UNCLASSIFIED//LIMDIS");
2151 assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
2152 assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::Limdis,);
2153 }
2154
2155 #[test]
2156 fn non_ic_dissem_ds_portion_form() {
2157 let parsed = parse_portion("(U//DS)");
2158 assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
2159 assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::Limdis);
2160 }
2161
2162 #[test]
2163 fn non_ic_dissem_les_nf() {
2164 let parsed = parse_portion("(U//LES-NF)");
2165 assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
2166 assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::LesNf);
2167 assert!(parsed.attrs.non_ic_dissem[0].carries_noforn());
2168 }
2169
2170 #[test]
2171 fn non_ic_dissem_sbu_nf_banner() {
2172 let parsed = parse_banner("UNCLASSIFIED//SBU NOFORN");
2173 assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
2174 assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::SbuNf);
2175 }
2176
2177 #[test]
2178 fn non_ic_dissem_not_confused_with_ic_dissem() {
2179 let parsed = parse_portion("(U//SSI)");
2181 assert!(parsed.attrs.dissem_controls.is_empty());
2182 assert_eq!(parsed.attrs.non_ic_dissem.len(), 1);
2183 assert_eq!(parsed.attrs.non_ic_dissem[0], NonIcDissem::Ssi);
2184 }
2185
2186 #[test]
2187 fn non_ic_dissem_alongside_ic_dissem() {
2188 let parsed = parse_portion("(C//NF//DS)");
2190 assert_eq!(parsed.attrs.dissem_controls.len(), 1); assert_eq!(parsed.attrs.non_ic_dissem.len(), 1); }
2193
2194 #[test]
2199 fn aea_rd_parses() {
2200 let parsed = parse_banner("TOP SECRET//RD//NOFORN");
2201 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2202 assert_eq!(
2203 parsed.attrs.aea_markings[0],
2204 AeaMarking::Rd(marque_ism::RdBlock::default()),
2205 );
2206 }
2207
2208 #[test]
2209 fn aea_rd_cnwdi_compound() {
2210 let parsed = parse_banner("SECRET//RD-CNWDI//NOFORN");
2212 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2213 match &parsed.attrs.aea_markings[0] {
2214 AeaMarking::Rd(rd) => {
2215 assert!(rd.cnwdi);
2216 assert!(rd.sigma.is_empty());
2217 }
2218 other => panic!("expected Rd with CNWDI, got: {other:?}"),
2219 }
2220 }
2221
2222 #[test]
2223 fn aea_rd_sigma_compound() {
2224 let parsed = parse_banner("SECRET//RD-SIGMA 20//NOFORN");
2226 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2227 match &parsed.attrs.aea_markings[0] {
2228 AeaMarking::Rd(rd) => {
2229 assert!(!rd.cnwdi);
2230 assert_eq!(&*rd.sigma, &[20]);
2231 }
2232 other => panic!("expected Rd with SIGMA, got: {other:?}"),
2233 }
2234 }
2235
2236 #[test]
2237 fn aea_rd_cnwdi_sigma_compound() {
2238 let parsed = parse_banner("SECRET//RD-CNWDI-SIGMA 18 20//NOFORN");
2239 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2240 match &parsed.attrs.aea_markings[0] {
2241 AeaMarking::Rd(rd) => {
2242 assert!(rd.cnwdi);
2243 assert_eq!(&*rd.sigma, &[18, 20]);
2244 }
2245 other => panic!("expected Rd with CNWDI+SIGMA, got: {other:?}"),
2246 }
2247 }
2248
2249 #[test]
2250 fn aea_rd_sigma_portion() {
2251 let parsed = parse_portion("(TS//RD-SG 14//NF)");
2253 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2254 match &parsed.attrs.aea_markings[0] {
2255 AeaMarking::Rd(rd) => {
2256 assert_eq!(&*rd.sigma, &[14]);
2257 }
2258 other => panic!("expected Rd with SG, got: {other:?}"),
2259 }
2260 }
2261
2262 #[test]
2263 fn aea_frd_parses() {
2264 let parsed = parse_portion("(S//FRD//NF)");
2265 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2266 assert_eq!(
2267 parsed.attrs.aea_markings[0],
2268 AeaMarking::Frd(marque_ism::FrdBlock::default()),
2269 );
2270 }
2271
2272 #[test]
2273 fn aea_frd_sigma_compound() {
2274 let parsed = parse_banner("SECRET//FRD-SIGMA 14//NOFORN");
2275 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2276 match &parsed.attrs.aea_markings[0] {
2277 AeaMarking::Frd(frd) => {
2278 assert_eq!(&*frd.sigma, &[14]);
2279 }
2280 other => panic!("expected Frd with SIGMA, got: {other:?}"),
2281 }
2282 }
2283
2284 #[test]
2285 fn aea_dod_ucni_parses() {
2286 let parsed = parse_banner("UNCLASSIFIED//DOD UCNI");
2287 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2288 assert_eq!(parsed.attrs.aea_markings[0], AeaMarking::DodUcni);
2289 }
2290
2291 #[test]
2292 fn aea_dcni_portion_parses() {
2293 let parsed = parse_portion("(U//DCNI)");
2294 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2295 assert_eq!(parsed.attrs.aea_markings[0], AeaMarking::DodUcni);
2296 }
2297
2298 #[test]
2299 fn aea_tfni_parses() {
2300 let parsed = parse_banner("SECRET//TFNI//NOFORN");
2301 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2302 assert_eq!(parsed.attrs.aea_markings[0], AeaMarking::Tfni);
2303 }
2304
2305 #[test]
2306 fn aea_rd_n_shorthand() {
2307 let parsed = parse_portion("(S//RD-N//NF)");
2309 assert_eq!(parsed.attrs.aea_markings.len(), 1);
2310 match &parsed.attrs.aea_markings[0] {
2311 AeaMarking::Rd(rd) => assert!(rd.cnwdi),
2312 other => panic!("expected Rd with CNWDI from RD-N, got: {other:?}"),
2313 }
2314 }
2315
2316 #[test]
2319 fn slash_separated_sci_in_single_block_parses() {
2320 use marque_ism::SciControl;
2323 let parsed = parse_portion("(TS//SI/TK//NF)");
2324 assert_eq!(
2325 parsed.attrs.sci_controls.as_ref(),
2326 &[SciControl::Si, SciControl::Tk],
2327 "SI/TK block must yield two SCI controls"
2328 );
2329 assert!(
2331 parsed
2332 .attrs
2333 .token_spans
2334 .iter()
2335 .all(|t| t.kind != TokenKind::Unknown),
2336 "no Unknown spans expected: {:?}",
2337 parsed.attrs.token_spans
2338 );
2339 }
2340
2341 #[test]
2342 fn slash_separated_sci_banner_parses() {
2343 use marque_ism::SciControl;
2345 let parsed = parse_banner("TOP SECRET//SI/TK//NOFORN");
2346 assert_eq!(
2347 parsed.attrs.sci_controls.as_ref(),
2348 &[SciControl::Si, SciControl::Tk],
2349 );
2350 }
2351
2352 #[test]
2353 fn slash_separated_dissem_in_single_block_parses() {
2354 use marque_ism::DissemControl;
2356 let parsed = parse_banner("SECRET//SI//NF/RELIDO");
2357 let dissem: Vec<DissemControl> = parsed.attrs.dissem_controls.to_vec();
2358 assert!(dissem.contains(&DissemControl::Nf), "must contain NF");
2359 assert!(
2360 dissem.contains(&DissemControl::Relido),
2361 "must contain RELIDO"
2362 );
2363 }
2364
2365 #[test]
2366 fn unrecognized_slash_token_emits_unknown() {
2367 let parsed = parse_portion("(S//XYZZY)");
2369 assert!(
2370 parsed
2371 .attrs
2372 .token_spans
2373 .iter()
2374 .any(|t| t.kind == TokenKind::Unknown),
2375 "XYZZY must produce Unknown span"
2376 );
2377 }
2378
2379 #[test]
2384 fn sci_bare_single_still_parses_via_structural_path() {
2385 use marque_ism::{SciControl, SciControlBare, SciControlSystem};
2389 let parsed = parse_portion("(U//SI//NF)");
2390 assert_eq!(parsed.attrs.sci_controls.as_ref(), &[SciControl::Si]);
2391 assert_eq!(parsed.attrs.sci_markings.len(), 1);
2392 let m = &parsed.attrs.sci_markings[0];
2393 assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
2394 assert!(m.compartments.is_empty());
2395 assert_eq!(m.canonical_enum, Some(SciControl::Si));
2396 }
2397
2398 #[test]
2399 fn sci_published_compound_si_g_parses() {
2400 use marque_ism::{SciControl, SciControlBare, SciControlSystem};
2402 let parsed = parse_banner("SECRET//SI-G//NOFORN");
2403 let m = &parsed.attrs.sci_markings[0];
2404 assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
2405 assert_eq!(m.compartments.len(), 1);
2406 assert_eq!(m.compartments[0].identifier.as_ref(), "G");
2407 assert!(m.compartments[0].sub_compartments.is_empty());
2408 assert_eq!(m.canonical_enum, Some(SciControl::SiG));
2409 assert_eq!(parsed.attrs.sci_controls.as_ref(), &[SciControl::SiG]);
2410 }
2411
2412 #[test]
2413 fn sci_published_compound_hcs_p_parses() {
2414 use marque_ism::{SciControl, SciControlBare, SciControlSystem};
2415 let parsed = parse_banner("TOP SECRET//HCS-P//NOFORN");
2416 let m = &parsed.attrs.sci_markings[0];
2417 assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Hcs));
2418 assert_eq!(m.compartments[0].identifier.as_ref(), "P");
2419 assert_eq!(m.canonical_enum, Some(SciControl::HcsP));
2420 }
2421
2422 #[test]
2423 fn sci_bare_tk_parses() {
2424 use marque_ism::{SciControl, SciControlBare, SciControlSystem};
2425 let parsed = parse_banner("SECRET//TK//NOFORN");
2426 let m = &parsed.attrs.sci_markings[0];
2427 assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Tk));
2428 assert!(m.compartments.is_empty());
2429 assert_eq!(m.canonical_enum, Some(SciControl::Tk));
2430 }
2431
2432 #[test]
2433 fn sci_multi_system_si_tk_parses() {
2434 use marque_ism::SciControl;
2436 let parsed = parse_portion("(TS//SI/TK//NF)");
2437 assert_eq!(
2438 parsed.attrs.sci_controls.as_ref(),
2439 &[SciControl::Si, SciControl::Tk]
2440 );
2441 assert_eq!(parsed.attrs.sci_markings.len(), 2);
2442 }
2443
2444 #[test]
2445 fn sci_compound_with_sub_compartment_sets_canonical_none() {
2446 use marque_ism::{SciControlBare, SciControlSystem};
2450 let parsed = parse_banner("SECRET//SI-G ABCD//NOFORN");
2451 assert_eq!(parsed.attrs.sci_markings.len(), 1);
2452 let m = &parsed.attrs.sci_markings[0];
2453 assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
2454 assert_eq!(m.compartments.len(), 1);
2455 assert_eq!(m.compartments[0].identifier.as_ref(), "G");
2456 assert_eq!(m.compartments[0].sub_compartments.len(), 1);
2457 assert_eq!(m.compartments[0].sub_compartments[0].as_ref(), "ABCD");
2458 assert_eq!(m.canonical_enum, None);
2459 assert!(parsed.attrs.sci_controls.is_empty());
2461 }
2462
2463 #[test]
2464 fn sci_capco_canonical_example_parses() {
2465 use marque_ism::{SciControlBare, SciControlSystem};
2468 let parsed = parse_banner("TOP SECRET//123/SI-G ABCD DEFG-MMM AACD//ORCON/NOFORN");
2469 assert_eq!(parsed.attrs.sci_markings.len(), 2);
2470 let m0 = &parsed.attrs.sci_markings[0];
2472 assert!(matches!(&m0.system, SciControlSystem::Custom(s) if s.as_ref() == "123"));
2473 assert!(m0.compartments.is_empty());
2474 assert_eq!(m0.canonical_enum, None);
2475 let m1 = &parsed.attrs.sci_markings[1];
2477 assert_eq!(m1.system, SciControlSystem::Published(SciControlBare::Si));
2478 assert_eq!(m1.compartments.len(), 2);
2479 assert_eq!(m1.compartments[0].identifier.as_ref(), "G");
2480 assert_eq!(m1.compartments[0].sub_compartments.len(), 2);
2481 assert_eq!(m1.compartments[0].sub_compartments[0].as_ref(), "ABCD");
2482 assert_eq!(m1.compartments[0].sub_compartments[1].as_ref(), "DEFG");
2483 assert_eq!(m1.compartments[1].identifier.as_ref(), "MMM");
2484 assert_eq!(m1.compartments[1].sub_compartments.len(), 1);
2485 assert_eq!(m1.compartments[1].sub_compartments[0].as_ref(), "AACD");
2486 assert_eq!(m1.canonical_enum, None);
2488 let sci_block_has_unknown = parsed
2490 .attrs
2491 .token_spans
2492 .iter()
2493 .any(|t| t.kind == TokenKind::Unknown);
2494 assert!(
2495 !sci_block_has_unknown,
2496 "canonical example must not produce Unknown tokens; got: {:?}",
2497 parsed.attrs.token_spans
2498 );
2499 }
2500
2501 #[test]
2502 fn sci_custom_numeric_99_direct_parse() {
2503 use marque_ism::SciControlSystem;
2507 let mut tokens = Vec::new();
2508 let result = parse_sci_block("99", 0, &mut tokens).expect("99 must parse");
2509 assert_eq!(result.len(), 1);
2510 assert!(matches!(&result[0].system, SciControlSystem::Custom(s) if s.as_ref() == "99"));
2511 assert!(result[0].compartments.is_empty());
2512 assert_eq!(result[0].canonical_enum, None);
2513 }
2514
2515 #[test]
2516 fn sci_structural_rejections_return_none() {
2517 let mut tokens = Vec::new();
2519 assert!(parse_sci_block("SI-", 0, &mut tokens).is_none());
2520 let mut tokens = Vec::new();
2522 assert!(parse_sci_block("-SI", 0, &mut tokens).is_none());
2523 let mut tokens = Vec::new();
2525 assert!(parse_sci_block("", 0, &mut tokens).is_none());
2526 let mut tokens = Vec::new();
2528 assert!(parse_sci_block("si-g", 0, &mut tokens).is_none());
2529 let mut tokens = Vec::new();
2531 assert!(parse_sci_block("SI--G", 0, &mut tokens).is_none());
2532 let mut tokens = Vec::new();
2534 assert!(parse_sci_block("SI/", 0, &mut tokens).is_none());
2535 }
2536
2537 #[test]
2538 fn sci_mixed_category_slash_block_falls_through() {
2539 let parsed = parse_banner("SECRET//SI/NF");
2543 let has_unknown_block = parsed
2547 .attrs
2548 .token_spans
2549 .iter()
2550 .any(|t| t.kind == TokenKind::Unknown);
2551 assert!(
2552 has_unknown_block,
2553 "SI/NF must surface as Unknown for E004; got: {:?}",
2554 parsed.attrs.token_spans
2555 );
2556 }
2557
2558 #[test]
2559 fn sci_weird_sub_compartment_parses() {
2560 use marque_ism::{SciControlBare, SciControlSystem};
2563 let parsed = parse_banner("SECRET//SI-G WEIRD FOO//NOFORN");
2564 let m = &parsed.attrs.sci_markings[0];
2565 assert_eq!(m.system, SciControlSystem::Published(SciControlBare::Si));
2566 assert_eq!(m.compartments.len(), 1);
2567 assert_eq!(m.compartments[0].identifier.as_ref(), "G");
2568 assert_eq!(m.compartments[0].sub_compartments.len(), 2);
2569 assert_eq!(m.compartments[0].sub_compartments[0].as_ref(), "WEIRD");
2570 assert_eq!(m.compartments[0].sub_compartments[1].as_ref(), "FOO");
2571 }
2572
2573 fn parse_cab_text(text: &str) -> ParsedMarking {
2578 let source = text.as_bytes();
2579 let tokens = CapcoTokenSet;
2580 let parser = Parser::new(&tokens);
2581 let candidate = make_candidate(source, MarkingType::Cab, 0);
2582 parser
2583 .parse(&candidate, source)
2584 .expect("CAB parse should succeed")
2585 }
2586
2587 #[test]
2588 fn cab_declassify_on_yyyymmdd_populates_declassify_on() {
2589 let text = "Classified By: Jane Doe\nDeclassify On: 20301231";
2590 let parsed = parse_cab_text(text);
2591 assert_eq!(
2592 parsed.attrs.declassify_on,
2593 Some(marque_ism::IsmDate::Date(2030, 12, 31)),
2594 "YYYYMMDD in CAB should set declassify_on to Date"
2595 );
2596 assert!(parsed.attrs.declass_exemption.is_none());
2597 }
2598
2599 #[test]
2600 fn cab_declassify_on_yyyy_populates_declassify_on() {
2601 let text = "Declassify On: 2035";
2602 let parsed = parse_cab_text(text);
2603 assert_eq!(
2604 parsed.attrs.declassify_on,
2605 Some(marque_ism::IsmDate::Year(2035)),
2606 "YYYY in CAB should set declassify_on to Year"
2607 );
2608 }
2609
2610 #[test]
2611 fn cab_declassify_on_iso_date_populates_declassify_on() {
2612 let text = "Declassify On: 2030-12-31";
2614 let parsed = parse_cab_text(text);
2615 assert_eq!(
2616 parsed.attrs.declassify_on,
2617 Some(marque_ism::IsmDate::Date(2030, 12, 31)),
2618 "YYYY-MM-DD in CAB should set declassify_on to Date"
2619 );
2620 }
2621
2622 #[test]
2623 fn cab_declassify_on_exemption_sets_exemption_not_date() {
2624 let text = "Declassify On: 50X1-HUM";
2626 let parsed = parse_cab_text(text);
2627 assert!(
2628 parsed.attrs.declassify_on.is_none(),
2629 "exemption code must not set declassify_on"
2630 );
2631 assert!(
2632 parsed.attrs.declass_exemption.is_some(),
2633 "exemption code must set declass_exemption"
2634 );
2635 }
2636
2637 #[test]
2638 fn cab_declassify_on_invalid_date_silently_ignored() {
2639 let text = "Declassify On: UNRECOGNIZED";
2641 let parsed = parse_cab_text(text);
2642 assert!(
2643 parsed.attrs.declassify_on.is_none(),
2644 "unrecognized Declassify On value should leave declassify_on as None"
2645 );
2646 assert!(parsed.attrs.declass_exemption.is_none());
2647 }
2648
2649 #[test]
2650 fn cab_classified_by_and_derived_from_populated() {
2651 let text = "Classified By: Jane Doe\nDerived From: SCG-2024\nDeclassify On: 20301231";
2652 let parsed = parse_cab_text(text);
2653 assert_eq!(
2654 parsed.attrs.classified_by.as_deref(),
2655 Some("Jane Doe"),
2656 "classified_by should be populated"
2657 );
2658 assert_eq!(
2659 parsed.attrs.derived_from.as_deref(),
2660 Some("SCG-2024"),
2661 "derived_from should be populated"
2662 );
2663 assert_eq!(
2664 parsed.attrs.declassify_on,
2665 Some(marque_ism::IsmDate::Date(2030, 12, 31))
2666 );
2667 }
2668
2669 #[test]
2670 fn cab_without_declassify_on_leaves_both_none() {
2671 let text = "Classified By: Jane Doe\nDerived From: SCG-2024";
2672 let parsed = parse_cab_text(text);
2673 assert!(parsed.attrs.declassify_on.is_none());
2674 assert!(parsed.attrs.declass_exemption.is_none());
2675 }
2676
2677 #[test]
2682 fn portion_with_yyyymmdd_sets_declassify_on() {
2683 let parsed = parse_portion("(SECRET//20301231//NOFORN)");
2686 assert_eq!(
2687 parsed.attrs.declassify_on,
2688 Some(marque_ism::IsmDate::Date(2030, 12, 31)),
2689 "YYYYMMDD in portion should set declassify_on"
2690 );
2691 }
2692
2693 #[test]
2694 fn portion_with_yyyy_sets_declassify_on() {
2695 let parsed = parse_portion("(SECRET//2035)");
2696 assert_eq!(
2697 parsed.attrs.declassify_on,
2698 Some(marque_ism::IsmDate::Year(2035)),
2699 "YYYY in portion should set declassify_on"
2700 );
2701 }
2702
2703 #[test]
2704 fn is_declass_date_rejects_leap_day_non_leap_year() {
2705 assert!(!is_declass_date("20030229"));
2707 }
2708
2709 #[test]
2710 fn is_declass_date_accepts_leap_day_in_leap_year() {
2711 assert!(is_declass_date("20040229")); assert!(is_declass_date("20000229")); }
2714
2715 #[test]
2716 fn is_declass_date_rejects_day_zero() {
2717 assert!(!is_declass_date("20030100")); }
2719}
2720
2721#[cfg(test)]
2722#[cfg_attr(coverage_nightly, coverage(off))]
2723mod sar_parse_tests {
2724 use super::*;
2728 use marque_ism::span::{MarkingCandidate, MarkingType, Span};
2729 use marque_ism::token_set::CapcoTokenSet;
2730
2731 #[test]
2736 fn single_program_no_compartments() {
2737 let (marking, spans) = parse_sar_category("SAR-BP", 0).expect("grammar accepts SAR-BP");
2738 assert_eq!(marking.indicator, SarIndicator::Abbrev);
2739 assert_eq!(marking.programs.len(), 1);
2740 assert_eq!(&*marking.programs[0].identifier, "BP");
2741 assert_eq!(marking.programs[0].compartments.len(), 0);
2742 assert_eq!(
2744 spans
2745 .iter()
2746 .filter(|s| s.kind == TokenKind::SarIndicator)
2747 .count(),
2748 1
2749 );
2750 assert_eq!(
2751 spans
2752 .iter()
2753 .filter(|s| s.kind == TokenKind::SarProgram)
2754 .count(),
2755 1
2756 );
2757 }
2758
2759 #[test]
2760 fn three_programs_no_compartments() {
2761 let (marking, _) =
2762 parse_sar_category("SAR-BP/CD/XR", 0).expect("grammar accepts three programs");
2763 assert_eq!(marking.programs.len(), 3);
2764 let ids: Vec<&str> = marking.programs.iter().map(|p| &*p.identifier).collect();
2765 assert_eq!(ids, vec!["BP", "CD", "XR"]);
2766 for p in marking.programs.iter() {
2767 assert_eq!(p.compartments.len(), 0);
2768 }
2769 }
2770
2771 #[test]
2772 fn program_with_single_compartment() {
2773 let (marking, _) = parse_sar_category("SAR-BP-J12", 0).expect("grammar accepts");
2774 assert_eq!(marking.programs.len(), 1);
2775 let p = &marking.programs[0];
2776 assert_eq!(&*p.identifier, "BP");
2777 assert_eq!(p.compartments.len(), 1);
2778 assert_eq!(&*p.compartments[0].identifier, "J12");
2779 assert_eq!(p.compartments[0].sub_compartments.len(), 0);
2780 }
2781
2782 #[test]
2783 fn program_with_compartment_and_sub_compartment() {
2784 let (marking, _) = parse_sar_category("SAR-BP-J12 J54", 0).expect("grammar accepts");
2785 let p = &marking.programs[0];
2786 assert_eq!(p.compartments.len(), 1);
2787 let c = &p.compartments[0];
2788 assert_eq!(&*c.identifier, "J12");
2789 assert_eq!(c.sub_compartments.len(), 1);
2790 assert_eq!(&*c.sub_compartments[0], "J54");
2791 }
2792
2793 #[test]
2794 fn canonical_h5_p100_multi_program_example() {
2795 let block = "SAR-BP-J12 J54-K15/CD-YYY 456 689/XR-XRA RB";
2800 let (marking, spans) = parse_sar_category(block, 0).expect("grammar accepts");
2801
2802 assert_eq!(marking.indicator, SarIndicator::Abbrev);
2803 assert_eq!(marking.programs.len(), 3);
2804
2805 let bp = &marking.programs[0];
2807 assert_eq!(&*bp.identifier, "BP");
2808 assert_eq!(bp.compartments.len(), 2);
2809 assert_eq!(&*bp.compartments[0].identifier, "J12");
2810 assert_eq!(
2811 bp.compartments[0]
2812 .sub_compartments
2813 .iter()
2814 .map(|s| &**s)
2815 .collect::<Vec<_>>(),
2816 vec!["J54"]
2817 );
2818 assert_eq!(&*bp.compartments[1].identifier, "K15");
2819 assert_eq!(bp.compartments[1].sub_compartments.len(), 0);
2820
2821 let cd = &marking.programs[1];
2823 assert_eq!(&*cd.identifier, "CD");
2824 assert_eq!(cd.compartments.len(), 1);
2825 assert_eq!(&*cd.compartments[0].identifier, "YYY");
2826 assert_eq!(
2827 cd.compartments[0]
2828 .sub_compartments
2829 .iter()
2830 .map(|s| &**s)
2831 .collect::<Vec<_>>(),
2832 vec!["456", "689"]
2833 );
2834
2835 let xr = &marking.programs[2];
2837 assert_eq!(&*xr.identifier, "XR");
2838 assert_eq!(xr.compartments.len(), 1);
2839 assert_eq!(&*xr.compartments[0].identifier, "XRA");
2840 assert_eq!(
2841 xr.compartments[0]
2842 .sub_compartments
2843 .iter()
2844 .map(|s| &**s)
2845 .collect::<Vec<_>>(),
2846 vec!["RB"]
2847 );
2848
2849 let indicator = spans
2852 .iter()
2853 .find(|s| s.kind == TokenKind::SarIndicator)
2854 .unwrap();
2855 assert_eq!(indicator.span, Span::new(0, 4));
2856 assert_eq!(&*indicator.text, "SAR-");
2857 let first_prog = spans
2858 .iter()
2859 .find(|s| s.kind == TokenKind::SarProgram)
2860 .unwrap();
2861 assert_eq!(first_prog.span, Span::new(4, 6));
2862 assert_eq!(&*first_prog.text, "BP");
2863 }
2864
2865 #[test]
2866 fn full_form_single_program_with_space() {
2867 let (marking, spans) =
2871 parse_sar_category("SPECIAL ACCESS REQUIRED-BUTTER POPCORN", 0).unwrap();
2872 assert_eq!(marking.indicator, SarIndicator::Full);
2873 assert_eq!(marking.programs.len(), 1);
2874 assert_eq!(&*marking.programs[0].identifier, "BUTTER POPCORN");
2875 assert_eq!(marking.programs[0].compartments.len(), 0);
2876
2877 let indicator = spans
2879 .iter()
2880 .find(|s| s.kind == TokenKind::SarIndicator)
2881 .unwrap();
2882 assert_eq!(&*indicator.text, "SPECIAL ACCESS REQUIRED-");
2883 assert_eq!(indicator.span, Span::new(0, 24));
2884 }
2885
2886 #[test]
2887 fn full_form_with_compartment_and_sub() {
2888 let (marking, _spans) =
2893 parse_sar_category("SPECIAL ACCESS REQUIRED-BUTTER POPCORN-J12 J54", 0)
2894 .expect("grammar accepts full form with compartment");
2895 assert_eq!(marking.indicator, SarIndicator::Full);
2896 assert_eq!(marking.programs.len(), 1);
2897 let prog = &marking.programs[0];
2898 assert_eq!(&*prog.identifier, "BUTTER POPCORN");
2899 assert_eq!(prog.compartments.len(), 1);
2900 assert_eq!(&*prog.compartments[0].identifier, "J12");
2901 assert_eq!(prog.compartments[0].sub_compartments.len(), 1);
2902 assert_eq!(&*prog.compartments[0].sub_compartments[0], "J54");
2903 }
2904
2905 #[test]
2906 fn full_form_rejects_digits_or_hyphens_in_nickname() {
2907 assert!(parse_sar_category("SPECIAL ACCESS REQUIRED-123", 0).is_none());
2912 }
2913
2914 #[test]
2915 fn rejects_double_slash_inside_block() {
2916 assert!(parse_sar_category("SAR-BP//CD", 0).is_none());
2922 }
2923
2924 #[test]
2925 fn rejects_missing_hyphen() {
2926 assert!(parse_sar_category("SAR", 0).is_none());
2927 }
2928
2929 #[test]
2930 fn rejects_empty_program() {
2931 assert!(parse_sar_category("SAR-", 0).is_none());
2932 }
2933
2934 #[test]
2935 fn rejects_empty_string() {
2936 assert!(parse_sar_category("", 0).is_none());
2937 }
2938
2939 #[test]
2940 fn rejects_non_sar_prefix() {
2941 assert!(parse_sar_category("NOFORN", 0).is_none());
2942 assert!(parse_sar_category("SI", 0).is_none());
2943 }
2944
2945 #[test]
2946 fn rejects_program_id_out_of_2_3_length() {
2947 assert!(parse_sar_category("SAR-B", 0).is_none());
2949 assert!(parse_sar_category("SAR-BPCD", 0).is_none());
2951 }
2952
2953 fn make_banner(text: &str) -> ParsedMarking {
2958 let source = text.as_bytes();
2959 let tokens = CapcoTokenSet;
2960 let parser = Parser::new(&tokens);
2961 let candidate = MarkingCandidate {
2962 span: Span::new(0, source.len()),
2963 kind: MarkingType::Banner,
2964 };
2965 parser.parse(&candidate, source).expect("parse succeeds")
2966 }
2967
2968 #[test]
2969 fn banner_dispatch_populates_sar_markings() {
2970 let parsed = make_banner("TOP SECRET//SAR-BP//NOFORN");
2971 let sar = parsed
2972 .attrs
2973 .sar_markings
2974 .as_ref()
2975 .expect("SAR block must populate sar_markings");
2976 assert_eq!(sar.programs.len(), 1);
2977 assert_eq!(&*sar.programs[0].identifier, "BP");
2978
2979 let kinds: Vec<TokenKind> = parsed.attrs.token_spans.iter().map(|t| t.kind).collect();
2981 assert!(kinds.contains(&TokenKind::SarIndicator));
2982 assert!(kinds.contains(&TokenKind::SarProgram));
2983
2984 assert!(
2986 parsed
2987 .attrs
2988 .dissem_controls
2989 .contains(&marque_ism::DissemControl::Nf),
2990 "NOFORN must still be recognized after the SAR block"
2991 );
2992 }
2993
2994 #[test]
2995 fn banner_dispatch_multi_program_canonical() {
2996 let parsed = make_banner("SECRET//SAR-BP-J12 J54-K15/CD-YYY 456 689/XR-XRA RB//NOFORN");
2998 let sar = parsed.attrs.sar_markings.as_ref().expect("sar present");
2999 assert_eq!(sar.programs.len(), 3);
3000 let ids: Vec<&str> = sar.programs.iter().map(|p| &*p.identifier).collect();
3001 assert_eq!(ids, vec!["BP", "CD", "XR"]);
3002
3003 let src = parsed
3006 .attrs
3007 .token_spans
3008 .iter()
3009 .find(|t| t.kind == TokenKind::SarIndicator)
3010 .expect("SarIndicator span present");
3011 assert_eq!(&*src.text, "SAR-");
3012 assert_eq!(src.span, Span::new(8, 12));
3014 }
3015
3016 #[test]
3017 fn second_sar_block_becomes_unknown() {
3018 let parsed = make_banner("SECRET//SAR-BP//SAR-CD//NOFORN");
3021 let sar = parsed
3022 .attrs
3023 .sar_markings
3024 .as_ref()
3025 .expect("first SAR block populates sar_markings");
3026 assert_eq!(sar.programs.len(), 1);
3027 assert_eq!(&*sar.programs[0].identifier, "BP");
3028
3029 let unknown_texts: Vec<&str> = parsed
3031 .attrs
3032 .token_spans
3033 .iter()
3034 .filter(|t| t.kind == TokenKind::Unknown)
3035 .map(|t| &*t.text)
3036 .collect();
3037 assert!(
3038 unknown_texts.contains(&"SAR-CD"),
3039 "duplicate SAR block must be recorded as Unknown, got: {unknown_texts:?}",
3040 );
3041 }
3042}