marque_ism/attrs.rs
1//! `IsmAttributes` — the canonical in-memory representation of a classification marking.
2//!
3//! Mirrors the IC ISM XML attribute model. Every source format (free text, XML, web forms)
4//! normalizes into this struct before rule validation.
5//!
6//! # Type design
7//! Multi-value fields use `Box<[T]>` rather than `Vec<T>` to avoid over-allocation
8//! after parsing. Most markings have 0–4 values per field.
9//!
10//! # Classification systems
11//!
12//! A marking carries exactly one classification system: US, FGI (non-US),
13//! NATO, or JOINT. This is represented by [`MarkingClassification`]. Non-US
14//! classifications start with `//` (the US classification slot is empty).
15//!
16//! When the parser encounters two classification systems in one marking
17//! (e.g., `SECRET//NATO SECRET//NOFORN`), it resolves to
18//! [`MarkingClassification::Conflict`] — US wins at the greater of the two
19//! levels, and the foreign part is preserved for rule-generated fixes.
20//!
21//! # Code generation
22//! CVE enum variants (`SciControl`, `DissemControl`, `DeclassExemption`, `SarIdentifier`)
23//! are generated by `build.rs` from ODNI CVE XML files and re-exported from
24//! `crate::generated::values`.
25
26use crate::generated::values;
27use crate::span::Span;
28
29// Re-export generated enum types for convenience.
30pub use values::{DeclassExemption, DissemControl, SarIdentifier, SciControl};
31
32/// Canonical in-memory representation of a classification marking.
33///
34/// Produced by `marque-core::parser` from scanner candidates.
35/// Consumed by `marque-rules::Rule` implementations for validation.
36///
37/// # Block ordering (CAPCO)
38///
39/// Fields are ordered per CAPCO block sequence:
40/// Classification → SCI → SAR → FGI marker → Dissem (incl. REL TO)
41#[non_exhaustive]
42#[derive(Debug, Clone, Default, PartialEq, Eq)]
43pub struct IsmAttributes {
44 /// The marking's classification system and level.
45 /// `None` means parsing failed to identify a classification.
46 pub classification: Option<MarkingClassification>,
47
48 /// SCI controls (e.g., SI, TK, HCS-P). Ordered per CAPCO block ordering.
49 pub sci_controls: Box<[SciControl]>,
50
51 /// Special Access Required identifiers.
52 pub sar_identifiers: Box<[SarIdentifier]>,
53
54 /// Atomic Energy Act markings (CAPCO Register §6).
55 ///
56 /// Includes RD, FRD, CNWDI, TFNI, SIGMA, and UCNI variants.
57 /// Positioned between SAR and FGI in CAPCO block ordering.
58 pub aea_markings: Box<[AeaMarking]>,
59
60 /// FGI block in US-classified markings: `FGI` or `FGI [LIST]`.
61 ///
62 /// Present when a US-classified document references foreign government
63 /// information. This is the *marker* in the banner/portion — distinct
64 /// from [`MarkingClassification::Fgi`], which means the marking IS
65 /// foreign-classified.
66 ///
67 /// `None` when no FGI marker is present.
68 pub fgi_marker: Option<FgiMarker>,
69
70 /// Dissemination controls (e.g., NOFORN, RELIDO, ORCON, FISA).
71 pub dissem_controls: Box<[DissemControl]>,
72
73 /// Non-IC dissemination controls (e.g., LIMDIS, SBU, LES, SSI).
74 ///
75 /// Separate authority framework (CAPCO Register §9), distinct from IC
76 /// dissem controls. In classified documents these are generally portion-
77 /// only and stripped from banners, but some values propagate to the
78 /// classified banner; see [`NonIcDissem::propagates_to_classified_banner`]
79 /// for the authoritative rule. On unclassified pages they propagate to
80 /// the banner. LES-NF and SBU-NF carry NOFORN treatment even when
81 /// stripped.
82 pub non_ic_dissem: Box<[NonIcDissem]>,
83
84 /// REL TO country trigraphs. USA must be present and first if non-empty.
85 ///
86 /// Structurally part of the dissem block (comma-delimited), but kept as
87 /// a typed field for E002 and REL TO validation rules.
88 pub rel_to: Box<[Trigraph]>,
89
90 /// Declassification date from CAB (free text, e.g., "20331231").
91 pub declassify_on: Option<Box<str>>,
92
93 /// Free-text "Classified By" identifier from CAB.
94 pub classified_by: Option<Box<str>>,
95
96 /// Free-text "Derived From" source from CAB.
97 pub derived_from: Option<Box<str>>,
98
99 /// Declassification exemption code from CAB (e.g., 25X1, 50X1-HUM).
100 pub declass_exemption: Option<DeclassExemption>,
101
102 /// Per-token byte spans into the *original source buffer*, recorded by
103 /// the parser as it walks the marking string. Phase 3 added this so
104 /// rules can point at the exact offending byte range instead of the
105 /// whole marking. Empty for CAB markings (CAB parsing is line-structured
106 /// and doesn't go through the token-walking path).
107 ///
108 /// Indexing convention: `token_spans` is in document order. To find the
109 /// span for the Nth `DissemControl`, walk the slice and pick the Nth
110 /// entry whose `kind == TokenKind::DissemControl`.
111 pub token_spans: Box<[TokenSpan]>,
112}
113
114impl IsmAttributes {
115 /// Convenience accessor: returns the US classification level if this
116 /// marking uses the US or Conflict classification system.
117 ///
118 /// Returns `None` for pure FGI, NATO, or JOINT markings (use
119 /// `self.classification` directly for those).
120 pub fn us_classification(&self) -> Option<Classification> {
121 match self.classification {
122 Some(MarkingClassification::Us(c)) => Some(c),
123 Some(MarkingClassification::Conflict { us, .. }) => Some(us),
124 _ => None,
125 }
126 }
127}
128
129/// One parser-recognized token plus its byte span in the original source.
130///
131/// Used by Phase 3 rules to surface byte-precise diagnostic spans without
132/// re-parsing the source. The `text` field carries the literal token bytes
133/// so rules that need the source content (E006, E007, E008 against migration
134/// keys) can look up entries without threading `&[u8] source` through every
135/// `Rule::check` signature.
136#[derive(Debug, Clone, PartialEq, Eq)]
137pub struct TokenSpan {
138 pub kind: TokenKind,
139 pub span: Span,
140 pub text: Box<str>,
141}
142
143/// Discriminant for `TokenSpan`. Phase 3 rules read these to filter
144/// token-span lookups by category.
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
146pub enum TokenKind {
147 /// Classification level token (S, SECRET, TS, TOP SECRET, ...).
148 Classification,
149 /// SCI control token (SI, TK, HCS, ...).
150 SciControl,
151 /// SAR identifier token.
152 SarIdentifier,
153 /// Atomic Energy Act marking token (RD, FRD, CNWDI, TFNI, SIGMA ##, etc.).
154 AeaMarking,
155 /// FGI marker token (`FGI`, `FGI DEU`, `FGI DEU GBR`).
156 FgiMarker,
157 /// Dissemination control token (NOFORN, NF, ORCON, OC, RELIDO, ...).
158 DissemControl,
159 /// Non-IC dissemination control token (LIMDIS, DS, SBU, LES, SSI, ...).
160 NonIcDissem,
161 /// REL TO country trigraph (USA, GBR, AUS, ...). One per token, not the
162 /// whole REL TO list.
163 RelToTrigraph,
164 /// The full `REL TO ...` block text. Recorded so E013 can inspect the
165 /// raw source for delimiter errors (spaces instead of commas).
166 RelToBlock,
167 /// Declassification exemption code in CAB or banner (25X1, 50X1-HUM).
168 DeclassExemption,
169 /// Declassification date in CAB or banner (YYYYMMDD or YYYY).
170 DeclassDate,
171 /// `//` separator between blocks. Recorded so E004 can detect extra/
172 /// missing separator runs.
173 Separator,
174 /// A non-empty block that did not match any known token kind. E008 fires
175 /// one diagnostic per `Unknown` entry.
176 Unknown,
177}
178
179// ===========================================================================
180// Classification types
181// ===========================================================================
182
183/// The classification system and level for a marking.
184///
185/// A marking has exactly one classification system. When the parser finds
186/// two (e.g., `SECRET//NATO SECRET//...`), it resolves to [`Conflict`](Self::Conflict).
187#[derive(Debug, Clone, PartialEq, Eq)]
188pub enum MarkingClassification {
189 /// US IC classification.
190 Us(Classification),
191 /// Non-US (FGI) classification: `//GBR S//...`
192 Fgi(FgiClassification),
193 /// NATO classification: `//NS//...`
194 Nato(NatoClassification),
195 /// JOINT classification (US co-owned): `//JOINT S USA GBR//...`
196 Joint(JointClassification),
197 /// Parser found two classification systems in one marking.
198 ///
199 /// US wins, upgraded to the greater of the two levels.
200 /// The foreign part is preserved so rules can suggest the FGI fix.
201 ///
202 /// Example: `SECRET//COSMIC TOP SECRET//REL TO USA, NATO`
203 /// → `us: TopSecret`, `foreign: Nato(CosmicTopSecret)`
204 /// → fix: `TOP SECRET//FGI NATO//REL TO USA, NATO`
205 Conflict {
206 /// Resolved US classification (max of both levels).
207 us: Classification,
208 /// The foreign classification that should become an FGI marker.
209 foreign: Box<ForeignClassification>,
210 },
211}
212
213impl MarkingClassification {
214 /// The effective classification level for ordering purposes, regardless of
215 /// classification system.
216 ///
217 /// NATO levels are mapped to their US equivalents via
218 /// [`NatoClassification::us_equivalent`]. All systems use the
219 /// [`Classification`] ladder for comparison so that `Iterator::max()` on
220 /// a mixed set of portions returns the most restrictive level overall.
221 pub fn effective_level(&self) -> Classification {
222 match self {
223 Self::Us(c) => *c,
224 Self::Fgi(f) => f.level,
225 Self::Nato(n) => n.us_equivalent(),
226 Self::Joint(j) => j.level,
227 Self::Conflict { us, .. } => *us,
228 }
229 }
230}
231
232impl Default for MarkingClassification {
233 fn default() -> Self {
234 Self::Us(Classification::Unclassified)
235 }
236}
237
238/// The non-US classification in a [`MarkingClassification::Conflict`].
239///
240/// Preserves enough information for rules to generate the FGI fix:
241/// the foreign system, its level, and any associated countries.
242#[derive(Debug, Clone, PartialEq, Eq)]
243pub enum ForeignClassification {
244 Fgi(FgiClassification),
245 Nato(NatoClassification),
246 Joint(JointClassification),
247}
248
249// ---------------------------------------------------------------------------
250// Classification level (US ladder + RESTRICTED for foreign interop)
251// ---------------------------------------------------------------------------
252
253/// Classification level. Ordered by restrictiveness: U < R < C < S < TS.
254///
255/// Includes `Restricted` for foreign-origin markings — many non-US
256/// classification systems (and NATO) have a RESTRICTED level between
257/// UNCLASSIFIED and CONFIDENTIAL.
258///
259/// The derived `Ord` reflects restrictiveness ordering so that
260/// `Iterator::max()` returns the most restrictive level.
261#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
262pub enum Classification {
263 Unclassified,
264 Restricted,
265 Confidential,
266 Secret,
267 TopSecret,
268}
269
270impl Classification {
271 /// Banner form (full words, no abbreviations).
272 pub fn banner_str(self) -> &'static str {
273 match self {
274 Self::Unclassified => "UNCLASSIFIED",
275 Self::Restricted => "RESTRICTED",
276 Self::Confidential => "CONFIDENTIAL",
277 Self::Secret => "SECRET",
278 Self::TopSecret => "TOP SECRET",
279 }
280 }
281
282 /// Portion form (abbreviation used in portion markings).
283 pub fn portion_str(self) -> &'static str {
284 match self {
285 Self::Unclassified => "U",
286 Self::Restricted => "R",
287 Self::Confidential => "C",
288 Self::Secret => "S",
289 Self::TopSecret => "TS",
290 }
291 }
292}
293
294// ---------------------------------------------------------------------------
295// FGI classification (non-US, country-prefixed)
296// ---------------------------------------------------------------------------
297
298/// Non-US (FGI) classification.
299///
300/// Two forms exist:
301///
302/// - **Source-acknowledged**: country trigraph(s) identify the originator.
303/// `//GBR S//REL TO USA, GBR`
304/// - **Source-concealed**: `FGI` replaces the country trigraph(s) when
305/// the originating country is sensitive. `//FGI S//REL TO USA, GBR`
306/// An empty `countries` list indicates source-concealed FGI.
307///
308/// Countries are space-delimited in the source marking.
309///
310/// # Banner aggregation
311///
312/// If a document contains **any** source-concealed FGI portions alongside
313/// source-acknowledged FGI portions, the banner must use `FGI` without
314/// country codes — revealing the country list would compromise the
315/// concealed source. This rule is enforced at the `PageContext` level
316/// during banner validation.
317#[derive(Debug, Clone, PartialEq, Eq)]
318pub struct FgiClassification {
319 /// Originating countries (space-delimited in source).
320 /// Empty for source-concealed FGI (`//FGI S//...`).
321 pub countries: Box<[Trigraph]>,
322 /// Classification level (includes RESTRICTED).
323 pub level: Classification,
324}
325
326// ---------------------------------------------------------------------------
327// NATO classification
328// ---------------------------------------------------------------------------
329
330/// NATO classification ladder with optional SAP designation.
331///
332/// NATO uses a separate classification system governed by treaty.
333/// Not everyone with a US clearance is cleared for NATO; many US systems
334/// are not approved for NATO information.
335///
336/// # NATO SAP markings
337///
338/// Three NATO SAP programs exist, each with specific constraints:
339///
340/// - **ATOMAL**: Applies to CTS, NS, and NC levels. Space-separated in
341/// banner (`COSMIC TOP SECRET ATOMAL`). Portion marks: CTSA, NSAT, NCA.
342/// Alternative portion forms CTS-A, NS-A, NC-A also appear in practice.
343/// - **BOHEMIA**: CTS-only. Hyphenated (`COSMIC TOP SECRET-BOHEMIA` → `CTS-B`).
344/// - **BALK**: CTS-only, exercise replacement for BOHEMIA.
345/// Hyphenated (`COSMIC TOP SECRET-BALK` → `CTS-BALK`).
346///
347/// Per the CAPCO Register, bare `COSMIC TOP SECRET` requires either
348/// BOHEMIA or BALK — standalone CTS without a SAP suffix is an error.
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
350pub enum NatoClassification {
351 NatoUnclassified, // NU
352 NatoRestricted, // NR
353 NatoConfidential, // NC
354 NatoConfidentialAtomal, // NCA (alt: NC-A)
355 NatoSecret, // NS
356 NatoSecretAtomal, // NSAT (alt: NS-A)
357 CosmicTopSecret, // CTS (requires BOHEMIA or BALK)
358 CosmicTopSecretAtomal, // CTSA (alt: CTS-A)
359 CosmicTopSecretBohemia, // CTS-B
360 CosmicTopSecretBalk, // CTS-BALK
361}
362
363impl NatoClassification {
364 /// Banner form (full words, as used in banner marking lines).
365 pub fn banner_str(self) -> &'static str {
366 match self {
367 Self::NatoUnclassified => "NATO UNCLASSIFIED",
368 Self::NatoRestricted => "NATO RESTRICTED",
369 Self::NatoConfidential => "NATO CONFIDENTIAL",
370 Self::NatoConfidentialAtomal => "NATO CONFIDENTIAL ATOMAL",
371 Self::NatoSecret => "NATO SECRET",
372 Self::NatoSecretAtomal => "NATO SECRET ATOMAL",
373 Self::CosmicTopSecret => "COSMIC TOP SECRET",
374 Self::CosmicTopSecretAtomal => "COSMIC TOP SECRET ATOMAL",
375 Self::CosmicTopSecretBohemia => "COSMIC TOP SECRET-BOHEMIA",
376 Self::CosmicTopSecretBalk => "COSMIC TOP SECRET-BALK",
377 }
378 }
379
380 /// Portion form (primary abbreviation from the CAPCO Register).
381 pub fn portion_str(self) -> &'static str {
382 match self {
383 Self::NatoUnclassified => "NU",
384 Self::NatoRestricted => "NR",
385 Self::NatoConfidential => "NC",
386 Self::NatoConfidentialAtomal => "NCA",
387 Self::NatoSecret => "NS",
388 Self::NatoSecretAtomal => "NSAT",
389 Self::CosmicTopSecret => "CTS",
390 Self::CosmicTopSecretAtomal => "CTSA",
391 Self::CosmicTopSecretBohemia => "CTS-B",
392 Self::CosmicTopSecretBalk => "CTS-BALK",
393 }
394 }
395
396 /// The base classification level (without SAP), for ordering comparisons.
397 pub fn base_level(self) -> NatoLevel {
398 match self {
399 Self::NatoUnclassified => NatoLevel::NatoUnclassified,
400 Self::NatoRestricted => NatoLevel::NatoRestricted,
401 Self::NatoConfidential | Self::NatoConfidentialAtomal => NatoLevel::NatoConfidential,
402 Self::NatoSecret | Self::NatoSecretAtomal => NatoLevel::NatoSecret,
403 Self::CosmicTopSecret
404 | Self::CosmicTopSecretAtomal
405 | Self::CosmicTopSecretBohemia
406 | Self::CosmicTopSecretBalk => NatoLevel::CosmicTopSecret,
407 }
408 }
409
410 /// Map the NATO level to the equivalent US classification for conflict
411 /// resolution (US wins at the greater of the two).
412 pub fn us_equivalent(self) -> Classification {
413 match self.base_level() {
414 NatoLevel::NatoUnclassified => Classification::Unclassified,
415 NatoLevel::NatoRestricted => Classification::Restricted,
416 NatoLevel::NatoConfidential => Classification::Confidential,
417 NatoLevel::NatoSecret => Classification::Secret,
418 NatoLevel::CosmicTopSecret => Classification::TopSecret,
419 }
420 }
421}
422
423/// NATO classification level without SAP, for ordering comparisons.
424#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
425pub enum NatoLevel {
426 NatoUnclassified,
427 NatoRestricted,
428 NatoConfidential,
429 NatoSecret,
430 CosmicTopSecret,
431}
432
433// ---------------------------------------------------------------------------
434// JOINT classification
435// ---------------------------------------------------------------------------
436
437/// JOINT classification: US is co-owner with other nations.
438///
439/// `//JOINT S USA GBR//REL TO USA, GBR`
440///
441/// Country list is space-delimited (NOT comma-delimited like REL TO).
442/// Must include USA. All JOINT participants must also appear in REL TO.
443#[derive(Debug, Clone, PartialEq, Eq)]
444pub struct JointClassification {
445 /// Classification level (US ladder, includes RESTRICTED).
446 pub level: Classification,
447 /// Co-owning countries (space-delimited in source). Must include USA.
448 pub countries: Box<[Trigraph]>,
449}
450
451// ---------------------------------------------------------------------------
452// Atomic Energy Act markings
453// ---------------------------------------------------------------------------
454
455/// Atomic Energy Act information markings (CAPCO Register §6).
456///
457/// AEA markings appear as a single `//`-delimited block in the marking string,
458/// using hyphen separators for compound forms:
459/// - `SECRET//RD//NOFORN` — RD alone
460/// - `SECRET//RD-CNWDI//NOFORN` — RD with CNWDI modifier
461/// - `SECRET//RD-SIGMA 20//NOFORN` — RD with SIGMA compartment
462/// - `SECRET//RD-SIGMA 18 20//NOFORN` — RD with multiple SIGMAs
463/// - `SECRET//FRD//NOFORN` — FRD alone
464/// - `SECRET//FRD-SIGMA 14//NOFORN` — FRD with SIGMA
465///
466/// Standalone (non-compound) markings:
467/// - `UNCLASSIFIED//DOD UCNI` / `(U//DCNI)`
468/// - `UNCLASSIFIED//DOE UCNI` / `(U//UCNI)`
469/// - `SECRET//TFNI//NOFORN` / `(S//TFNI//NF)`
470///
471/// # Key rules (CAPCO-2016)
472///
473/// - RD and FRD always require NOFORN unless a sharing agreement exists
474/// (default severity: Error, configurable to Warn via `.marque.toml`)
475/// - CNWDI may only be used with TS or S RD (not standalone, not with FRD)
476/// - SIGMA 14, 15, 18, 20 may only be used with TS or S RD or FRD
477/// - RD takes precedence over FRD and TFNI in both banners and portions
478/// - SIGMA numbers must be in numerical order, space-separated
479#[derive(Debug, Clone, PartialEq, Eq, Hash)]
480#[non_exhaustive]
481pub enum AeaMarking {
482 /// Compound RD block: `RD`, `RD-CNWDI`, `RD-SIGMA 20`, `RD-CNWDI-SIGMA 18 20`
483 Rd(RdBlock),
484 /// Compound FRD block: `FRD`, `FRD-SIGMA 14`
485 Frd(FrdBlock),
486 /// DOD UCNI / DCNI — standalone, unclassified only
487 DodUcni,
488 /// DOE UCNI / UCNI — standalone, unclassified only
489 DoeUcni,
490 /// TFNI — standalone
491 Tfni,
492}
493
494/// Restricted Data block with optional modifiers.
495///
496/// Rendered as `RD`, `RD-CNWDI`, `RD-SIGMA 20`, or `RD-CNWDI-SIGMA 18 20`.
497#[derive(Debug, Clone, PartialEq, Eq, Hash)]
498pub struct RdBlock {
499 /// Whether CNWDI is present. Only valid with TS or S classification.
500 pub cnwdi: bool,
501 /// SIGMA compartment numbers (14, 15, 18, 20). Must be in numerical order.
502 /// Empty if no SIGMA designation.
503 pub sigma: Box<[u8]>,
504}
505
506impl Default for RdBlock {
507 fn default() -> Self {
508 Self {
509 cnwdi: false,
510 sigma: Box::new([]),
511 }
512 }
513}
514
515/// Formerly Restricted Data block with optional SIGMA modifier.
516///
517/// Rendered as `FRD` or `FRD-SIGMA 14`.
518#[derive(Debug, Clone, PartialEq, Eq, Hash)]
519pub struct FrdBlock {
520 /// SIGMA compartment numbers. Must be in numerical order.
521 /// Empty if no SIGMA designation.
522 pub sigma: Box<[u8]>,
523}
524
525impl Default for FrdBlock {
526 fn default() -> Self {
527 Self {
528 sigma: Box::new([]),
529 }
530 }
531}
532
533impl AeaMarking {
534 /// Banner-line form.
535 pub fn banner_str(&self) -> String {
536 match self {
537 Self::Rd(rd) => {
538 let mut s = "RD".to_owned();
539 if rd.cnwdi {
540 s.push_str("-CNWDI");
541 }
542 if !rd.sigma.is_empty() {
543 s.push_str("-SIGMA ");
544 let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
545 s.push_str(&nums.join(" "));
546 }
547 s
548 }
549 Self::Frd(frd) => {
550 let mut s = "FRD".to_owned();
551 if !frd.sigma.is_empty() {
552 s.push_str("-SIGMA ");
553 let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
554 s.push_str(&nums.join(" "));
555 }
556 s
557 }
558 Self::DodUcni => "DOD UCNI".to_owned(),
559 Self::DoeUcni => "DOE UCNI".to_owned(),
560 Self::Tfni => "TFNI".to_owned(),
561 }
562 }
563
564 /// Portion mark form.
565 pub fn portion_str(&self) -> String {
566 match self {
567 Self::Rd(rd) => {
568 let mut s = "RD".to_owned();
569 if rd.cnwdi {
570 s.push_str("-CNWDI");
571 }
572 if !rd.sigma.is_empty() {
573 s.push_str("-SG ");
574 let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
575 s.push_str(&nums.join(" "));
576 }
577 s
578 }
579 Self::Frd(frd) => {
580 let mut s = "FRD".to_owned();
581 if !frd.sigma.is_empty() {
582 s.push_str("-SG ");
583 let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
584 s.push_str(&nums.join(" "));
585 }
586 s
587 }
588 Self::DodUcni => "DCNI".to_owned(),
589 Self::DoeUcni => "UCNI".to_owned(),
590 Self::Tfni => "TFNI".to_owned(),
591 }
592 }
593
594 /// Parse a `//`-delimited AEA block from either banner or portion form.
595 ///
596 /// Handles compound tokens: `RD`, `RD-CNWDI`, `RD-SIGMA 20`,
597 /// `RD-CNWDI-SIGMA 18 20`, `FRD`, `FRD-SIGMA 14`, etc.
598 pub fn parse(s: &str) -> Option<Self> {
599 // Standalone non-compound markings.
600 match s {
601 "DOD UCNI" | "DCNI" => return Some(Self::DodUcni),
602 "DOE UCNI" | "UCNI" => return Some(Self::DoeUcni),
603 "TFNI" | "TRANSCLASSIFIED FOREIGN NUCLEAR INFORMATION" => return Some(Self::Tfni),
604 _ => {}
605 }
606
607 // RD compound block: RD, RD-CNWDI, RD-SIGMA ##, RD-CNWDI-SIGMA ##,
608 // RESTRICTED DATA, RESTRICTED DATA-CNWDI, etc.
609 if s == "RD" || s == "RESTRICTED DATA" {
610 return Some(Self::Rd(RdBlock::default()));
611 }
612 if let Some(rest) = s
613 .strip_prefix("RD-")
614 .or_else(|| s.strip_prefix("RESTRICTED DATA-"))
615 {
616 return Self::parse_rd_modifiers(rest);
617 }
618
619 // FRD compound block: FRD, FRD-SIGMA ##,
620 // FORMERLY RESTRICTED DATA, etc.
621 if s == "FRD" || s == "FORMERLY RESTRICTED DATA" {
622 return Some(Self::Frd(FrdBlock::default()));
623 }
624 if let Some(rest) = s
625 .strip_prefix("FRD-")
626 .or_else(|| s.strip_prefix("FORMERLY RESTRICTED DATA-"))
627 {
628 return Self::parse_frd_modifiers(rest);
629 }
630
631 None
632 }
633
634 /// Parse RD modifiers after the `RD-` prefix.
635 /// Handles: `CNWDI`, `SIGMA ##`, `CNWDI-SIGMA ##`, `SG ##`, `CNWDI-SG ##`.
636 fn parse_rd_modifiers(s: &str) -> Option<Self> {
637 let mut cnwdi = false;
638 let mut rest = s;
639
640 // Check for CNWDI prefix.
641 if let Some(after) = rest.strip_prefix("CNWDI") {
642 cnwdi = true;
643 rest = after.strip_prefix('-').unwrap_or(after);
644 } else if rest == "N" {
645 // DoD shorthand: RD-N means RD-CNWDI (per CAPCO-2016 §6)
646 return Some(Self::Rd(RdBlock {
647 cnwdi: true,
648 sigma: Box::new([]),
649 }));
650 }
651
652 // Check for SIGMA/SG.
653 let sigma = parse_sigma_numbers(rest);
654
655 if rest.is_empty() || !sigma.is_empty() {
656 Some(Self::Rd(RdBlock {
657 cnwdi,
658 sigma: sigma.into(),
659 }))
660 } else {
661 None
662 }
663 }
664
665 /// Parse FRD modifiers after the `FRD-` prefix.
666 /// Handles: `SIGMA ##`, `SG ##`.
667 fn parse_frd_modifiers(s: &str) -> Option<Self> {
668 let sigma = parse_sigma_numbers(s);
669 if !sigma.is_empty() {
670 Some(Self::Frd(FrdBlock {
671 sigma: sigma.into(),
672 }))
673 } else {
674 None
675 }
676 }
677}
678
679/// Parse SIGMA/SG numbers from a string like `SIGMA 18 20` or `SG 14`.
680fn parse_sigma_numbers(s: &str) -> Vec<u8> {
681 let rest = s
682 .strip_prefix("SIGMA ")
683 .or_else(|| s.strip_prefix("SG "))
684 .unwrap_or("");
685 if rest.is_empty() {
686 return vec![];
687 }
688 rest.split_whitespace()
689 .filter_map(|n| n.parse::<u8>().ok())
690 .collect()
691}
692
693impl std::fmt::Display for AeaMarking {
694 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
695 f.write_str(&self.portion_str())
696 }
697}
698
699// ---------------------------------------------------------------------------
700// FGI marker (in US-classified markings)
701// ---------------------------------------------------------------------------
702
703/// FGI marker in a US-classified marking: `FGI` or `FGI [LIST]`.
704///
705/// Appears in the FGI block (after SAR, before dissem controls) when a
706/// US-classified document references foreign government information.
707///
708/// This is NOT the same as [`FgiClassification`] — that represents a
709/// marking where the classification itself IS foreign. This marker says
710/// "this US-classified marking contains foreign government information."
711///
712/// An empty `countries` list represents source-concealed FGI (no country
713/// attribution). If a document mixes source-concealed and source-acknowledged
714/// FGI portions, the banner must use the bare `FGI` form without countries
715/// to avoid compromising the concealed source.
716#[derive(Debug, Clone, PartialEq, Eq)]
717pub struct FgiMarker {
718 /// Countries (space-delimited in source).
719 /// Empty for source-concealed FGI.
720 pub countries: Box<[Trigraph]>,
721}
722
723// ===========================================================================
724// Non-IC dissemination controls
725// ===========================================================================
726
727/// Non-Intelligence Community dissemination control markings (CAPCO Register §9).
728///
729/// These operate under a separate authority framework from IC dissem controls.
730/// In classified documents, most non-IC dissem controls appear **only in portion
731/// markings** — they are stripped from banners. However, some controls propagate
732/// to classified banners: LIMDIS (NGA Title 10), LES, LES-NF, and SSI. See
733/// [`NonIcDissem::propagates_to_classified_banner`] for the authoritative list.
734/// When the page is **unclassified**, all non-IC dissem controls propagate to
735/// the banner.
736///
737/// LES-NF and SBU-NF carry NOFORN treatment even when stripped from the banner.
738///
739/// # CUI note
740///
741/// CUI (Controlled Unclassified Information) is recognized but not validated.
742/// Full CUI rule support is planned for a dedicated crate. The IC equivalent
743/// (FOUO) remains in active use in the `DissemControl` enum.
744#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
745#[non_exhaustive]
746pub enum NonIcDissem {
747 /// LIMITED DISTRIBUTION / LIMDIS / DS
748 Limdis,
749 /// EXCLUSIVE DISTRIBUTION / EXDIS / XD
750 Exdis,
751 /// NO DISTRIBUTION / NODIS / ND
752 Nodis,
753 /// SENSITIVE BUT UNCLASSIFIED / SBU / SBU
754 Sbu,
755 /// SENSITIVE BUT UNCLASSIFIED NOFORN / SBU NOFORN / SBU-NF
756 /// Carries NOFORN treatment even when stripped from banner.
757 SbuNf,
758 /// LAW ENFORCEMENT SENSITIVE / LES / LES
759 Les,
760 /// LAW ENFORCEMENT SENSITIVE NOFORN / LES NOFORN / LES-NF
761 /// Carries NOFORN treatment even when stripped from banner.
762 LesNf,
763 /// SENSITIVE SECURITY INFORMATION / SSI / SSI
764 Ssi,
765}
766
767impl NonIcDissem {
768 /// Banner-line abbreviation form.
769 pub fn banner_str(self) -> &'static str {
770 match self {
771 Self::Limdis => "LIMDIS",
772 Self::Exdis => "EXDIS",
773 Self::Nodis => "NODIS",
774 Self::Sbu => "SBU",
775 Self::SbuNf => "SBU NOFORN",
776 Self::Les => "LES",
777 Self::LesNf => "LES NOFORN",
778 Self::Ssi => "SSI",
779 }
780 }
781
782 /// Portion mark abbreviation.
783 pub fn portion_str(self) -> &'static str {
784 match self {
785 Self::Limdis => "DS",
786 Self::Exdis => "XD",
787 Self::Nodis => "ND",
788 Self::Sbu => "SBU",
789 Self::SbuNf => "SBU-NF",
790 Self::Les => "LES",
791 Self::LesNf => "LES-NF",
792 Self::Ssi => "SSI",
793 }
794 }
795
796 /// Parse from either banner or portion form.
797 pub fn parse(s: &str) -> Option<Self> {
798 match s {
799 "LIMDIS" | "DS" => Some(Self::Limdis),
800 "EXDIS" | "XD" => Some(Self::Exdis),
801 "NODIS" | "ND" => Some(Self::Nodis),
802 "SBU" => Some(Self::Sbu),
803 "SBU NOFORN" | "SBU-NF" => Some(Self::SbuNf),
804 "LES" => Some(Self::Les),
805 "LES NOFORN" | "LES-NF" => Some(Self::LesNf),
806 "SSI" => Some(Self::Ssi),
807 _ => None,
808 }
809 }
810
811 /// Returns true if this control carries NOFORN treatment.
812 pub fn carries_noforn(self) -> bool {
813 matches!(self, Self::SbuNf | Self::LesNf)
814 }
815
816 /// Returns true if this control propagates to classified banners.
817 ///
818 /// Most non-IC dissem controls are stripped from banners in classified
819 /// documents. These exceptions propagate:
820 /// - LIMDIS: NGA Title 10 marking, appears in classified banners
821 /// - LES: propagates to banners; LES-NF propagates as NOFORN//LES
822 /// - SSI: propagates to banners
823 pub fn propagates_to_classified_banner(self) -> bool {
824 matches!(self, Self::Limdis | Self::Les | Self::LesNf | Self::Ssi)
825 }
826
827 /// All valid values.
828 pub const ALL: &[NonIcDissem] = &[
829 Self::Limdis,
830 Self::Exdis,
831 Self::Nodis,
832 Self::Sbu,
833 Self::SbuNf,
834 Self::Les,
835 Self::LesNf,
836 Self::Ssi,
837 ];
838}
839
840impl std::fmt::Display for NonIcDissem {
841 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
842 f.write_str(self.portion_str())
843 }
844}
845
846// ===========================================================================
847// Trigraph
848// ===========================================================================
849
850/// A 3-character country trigraph (e.g., USA, GBR, AUS).
851/// Validated against CVE country code list at rule-check time.
852///
853/// The inner bytes are private; construction goes through [`Trigraph::try_new`]
854/// which enforces ASCII-uppercase invariants so that [`Trigraph::as_str`] can
855/// return a `&str` infallibly without panicking at runtime.
856///
857/// # Limitations
858///
859/// CAPCO also uses tetragraphs (NATO, FVEY, ACGU) and longer org codes
860/// (AUSTRALIA_GROUP). These are present in the CVE TRIGRAPHS list but cannot
861/// be represented by this type's 3-byte constraint. A broader `CountryCode`
862/// type is planned for a future version.
863#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
864pub struct Trigraph([u8; 3]);
865
866impl Trigraph {
867 /// The always-valid `USA` trigraph constant.
868 pub const USA: Self = Self(*b"USA");
869
870 /// Attempt to construct a trigraph from 3 bytes.
871 ///
872 /// Returns `None` unless every byte is an ASCII uppercase letter
873 /// (`A`–`Z`), which is the invariant enforced by CAPCO for all valid
874 /// country/entity codes.
875 #[inline]
876 pub const fn try_new(bytes: [u8; 3]) -> Option<Self> {
877 let mut i = 0;
878 while i < 3 {
879 if !bytes[i].is_ascii_uppercase() {
880 return None;
881 }
882 i += 1;
883 }
884 Some(Self(bytes))
885 }
886
887 /// Return the trigraph as a string slice.
888 ///
889 /// Infallible because construction via [`Trigraph::try_new`] (or the
890 /// [`Trigraph::USA`] constant) guarantees ASCII-uppercase bytes, which
891 /// are always valid UTF-8.
892 #[inline]
893 pub fn as_str(&self) -> &str {
894 // SAFETY: `Trigraph` can only be constructed via `try_new` or the
895 // `USA` constant, both of which require ASCII uppercase letters.
896 // ASCII is a subset of valid UTF-8.
897 unsafe { std::str::from_utf8_unchecked(&self.0) }
898 }
899}
900
901impl std::fmt::Display for Trigraph {
902 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
903 f.write_str(self.as_str())
904 }
905}
906
907#[cfg(test)]
908mod tests {
909 use super::*;
910
911 #[test]
912 fn trigraph_usa_constant_is_valid() {
913 assert_eq!(Trigraph::USA.as_str(), "USA");
914 }
915
916 #[test]
917 fn trigraph_try_new_accepts_uppercase() {
918 let t = Trigraph::try_new(*b"GBR").unwrap();
919 assert_eq!(t.as_str(), "GBR");
920 }
921
922 #[test]
923 fn trigraph_try_new_rejects_lowercase() {
924 assert!(Trigraph::try_new(*b"usa").is_none());
925 }
926
927 #[test]
928 fn trigraph_try_new_rejects_digits() {
929 assert!(Trigraph::try_new(*b"US1").is_none());
930 }
931
932 #[test]
933 fn trigraph_try_new_rejects_high_bytes() {
934 assert!(Trigraph::try_new([0xFF, 0xFF, 0xFF]).is_none());
935 }
936
937 #[test]
938 fn classification_ord_is_restrictiveness() {
939 assert!(Classification::Unclassified < Classification::Restricted);
940 assert!(Classification::Restricted < Classification::Confidential);
941 assert!(Classification::Confidential < Classification::Secret);
942 assert!(Classification::Secret < Classification::TopSecret);
943 }
944
945 #[test]
946 fn classification_banner_portion_round_trip() {
947 for c in [
948 Classification::Unclassified,
949 Classification::Restricted,
950 Classification::Confidential,
951 Classification::Secret,
952 Classification::TopSecret,
953 ] {
954 assert!(!c.banner_str().is_empty());
955 assert!(!c.portion_str().is_empty());
956 }
957 }
958
959 #[test]
960 fn nato_us_equivalent_mapping() {
961 assert_eq!(
962 NatoClassification::CosmicTopSecret.us_equivalent(),
963 Classification::TopSecret,
964 );
965 assert_eq!(
966 NatoClassification::NatoSecret.us_equivalent(),
967 Classification::Secret,
968 );
969 assert_eq!(
970 NatoClassification::NatoRestricted.us_equivalent(),
971 Classification::Restricted,
972 );
973 }
974
975 #[test]
976 fn nato_banner_portion_round_trip() {
977 for n in [
978 NatoClassification::NatoUnclassified,
979 NatoClassification::NatoRestricted,
980 NatoClassification::NatoConfidential,
981 NatoClassification::NatoConfidentialAtomal,
982 NatoClassification::NatoSecret,
983 NatoClassification::NatoSecretAtomal,
984 NatoClassification::CosmicTopSecret,
985 NatoClassification::CosmicTopSecretAtomal,
986 NatoClassification::CosmicTopSecretBohemia,
987 NatoClassification::CosmicTopSecretBalk,
988 ] {
989 assert!(!n.banner_str().is_empty());
990 assert!(!n.portion_str().is_empty());
991 }
992 }
993
994 #[test]
995 fn us_classification_convenience_returns_us() {
996 let attrs = IsmAttributes {
997 classification: Some(MarkingClassification::Us(Classification::Secret)),
998 ..Default::default()
999 };
1000 assert_eq!(attrs.us_classification(), Some(Classification::Secret));
1001 }
1002
1003 #[test]
1004 fn us_classification_convenience_returns_none_for_nato() {
1005 let attrs = IsmAttributes {
1006 classification: Some(MarkingClassification::Nato(NatoClassification::NatoSecret)),
1007 ..Default::default()
1008 };
1009 assert_eq!(attrs.us_classification(), None);
1010 }
1011
1012 #[test]
1013 fn us_classification_convenience_returns_resolved_for_conflict() {
1014 let attrs = IsmAttributes {
1015 classification: Some(MarkingClassification::Conflict {
1016 us: Classification::TopSecret,
1017 foreign: Box::new(ForeignClassification::Nato(
1018 NatoClassification::CosmicTopSecret,
1019 )),
1020 }),
1021 ..Default::default()
1022 };
1023 assert_eq!(attrs.us_classification(), Some(Classification::TopSecret));
1024 }
1025}