Skip to main content

marque_ism/
attrs.rs

1//! `IsmAttributes` — the canonical in-memory representation of a classification marking.
2//!
3//! Mirrors the IC ISM XML attribute model. Every source format (free text, XML, web forms)
4//! normalizes into this struct before rule validation.
5//!
6//! # Type design
7//! Multi-value fields use `Box<[T]>` rather than `Vec<T>` to avoid over-allocation
8//! after parsing. Most markings have 0–4 values per field.
9//!
10//! # Classification systems
11//!
12//! A marking carries exactly one classification system: US, FGI (non-US),
13//! NATO, or JOINT. This is represented by [`MarkingClassification`]. Non-US
14//! classifications start with `//` (the US classification slot is empty).
15//!
16//! When the parser encounters two classification systems in one marking
17//! (e.g., `SECRET//NATO SECRET//NOFORN`), it resolves to
18//! [`MarkingClassification::Conflict`] — US wins at the greater of the two
19//! levels, and the foreign part is preserved for rule-generated fixes.
20//!
21//! # Code generation
22//! CVE enum variants (`SciControl`, `DissemControl`, `DeclassExemption`, `SarIdentifier`)
23//! are generated by `build.rs` from ODNI CVE XML files and re-exported from
24//! `crate::generated::values`.
25
26use crate::generated::values;
27use crate::span::Span;
28
29// Re-export generated enum types for convenience.
30pub use values::{DeclassExemption, DissemControl, SarIdentifier, SciControl};
31
32/// Canonical in-memory representation of a classification marking.
33///
34/// Produced by `marque-core::parser` from scanner candidates.
35/// Consumed by `marque-rules::Rule` implementations for validation.
36///
37/// # Block ordering (CAPCO)
38///
39/// Fields are ordered per CAPCO block sequence:
40/// Classification → SCI → SAR → FGI marker → Dissem (incl. REL TO)
41#[non_exhaustive]
42#[derive(Debug, Clone, Default, PartialEq, Eq)]
43pub struct IsmAttributes {
44    /// The marking's classification system and level.
45    /// `None` means parsing failed to identify a classification.
46    pub classification: Option<MarkingClassification>,
47
48    /// SCI controls (e.g., SI, TK, HCS-P). Ordered per CAPCO block ordering.
49    pub sci_controls: Box<[SciControl]>,
50
51    /// Special Access Required identifiers.
52    pub sar_identifiers: Box<[SarIdentifier]>,
53
54    /// Atomic Energy Act markings (CAPCO Register §6).
55    ///
56    /// Includes RD, FRD, CNWDI, TFNI, SIGMA, and UCNI variants.
57    /// Positioned between SAR and FGI in CAPCO block ordering.
58    pub aea_markings: Box<[AeaMarking]>,
59
60    /// FGI block in US-classified markings: `FGI` or `FGI [LIST]`.
61    ///
62    /// Present when a US-classified document references foreign government
63    /// information. This is the *marker* in the banner/portion — distinct
64    /// from [`MarkingClassification::Fgi`], which means the marking IS
65    /// foreign-classified.
66    ///
67    /// `None` when no FGI marker is present.
68    pub fgi_marker: Option<FgiMarker>,
69
70    /// Dissemination controls (e.g., NOFORN, RELIDO, ORCON, FISA).
71    pub dissem_controls: Box<[DissemControl]>,
72
73    /// Non-IC dissemination controls (e.g., LIMDIS, SBU, LES, SSI).
74    ///
75    /// Separate authority framework (CAPCO Register §9), distinct from IC
76    /// dissem controls. In classified documents these are generally portion-
77    /// only and stripped from banners, but some values propagate to the
78    /// classified banner; see [`NonIcDissem::propagates_to_classified_banner`]
79    /// for the authoritative rule. On unclassified pages they propagate to
80    /// the banner. LES-NF and SBU-NF carry NOFORN treatment even when
81    /// stripped.
82    pub non_ic_dissem: Box<[NonIcDissem]>,
83
84    /// REL TO country trigraphs. USA must be present and first if non-empty.
85    ///
86    /// Structurally part of the dissem block (comma-delimited), but kept as
87    /// a typed field for E002 and REL TO validation rules.
88    pub rel_to: Box<[Trigraph]>,
89
90    /// Declassification date from CAB (free text, e.g., "20331231").
91    pub declassify_on: Option<Box<str>>,
92
93    /// Free-text "Classified By" identifier from CAB.
94    pub classified_by: Option<Box<str>>,
95
96    /// Free-text "Derived From" source from CAB.
97    pub derived_from: Option<Box<str>>,
98
99    /// Declassification exemption code from CAB (e.g., 25X1, 50X1-HUM).
100    pub declass_exemption: Option<DeclassExemption>,
101
102    /// Per-token byte spans into the *original source buffer*, recorded by
103    /// the parser as it walks the marking string. Phase 3 added this so
104    /// rules can point at the exact offending byte range instead of the
105    /// whole marking. Empty for CAB markings (CAB parsing is line-structured
106    /// and doesn't go through the token-walking path).
107    ///
108    /// Indexing convention: `token_spans` is in document order. To find the
109    /// span for the Nth `DissemControl`, walk the slice and pick the Nth
110    /// entry whose `kind == TokenKind::DissemControl`.
111    pub token_spans: Box<[TokenSpan]>,
112}
113
114impl IsmAttributes {
115    /// Convenience accessor: returns the US classification level if this
116    /// marking uses the US or Conflict classification system.
117    ///
118    /// Returns `None` for pure FGI, NATO, or JOINT markings (use
119    /// `self.classification` directly for those).
120    pub fn us_classification(&self) -> Option<Classification> {
121        match self.classification {
122            Some(MarkingClassification::Us(c)) => Some(c),
123            Some(MarkingClassification::Conflict { us, .. }) => Some(us),
124            _ => None,
125        }
126    }
127}
128
129/// One parser-recognized token plus its byte span in the original source.
130///
131/// Used by Phase 3 rules to surface byte-precise diagnostic spans without
132/// re-parsing the source. The `text` field carries the literal token bytes
133/// so rules that need the source content (E006, E007, E008 against migration
134/// keys) can look up entries without threading `&[u8] source` through every
135/// `Rule::check` signature.
136#[derive(Debug, Clone, PartialEq, Eq)]
137pub struct TokenSpan {
138    pub kind: TokenKind,
139    pub span: Span,
140    pub text: Box<str>,
141}
142
143/// Discriminant for `TokenSpan`. Phase 3 rules read these to filter
144/// token-span lookups by category.
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
146pub enum TokenKind {
147    /// Classification level token (S, SECRET, TS, TOP SECRET, ...).
148    Classification,
149    /// SCI control token (SI, TK, HCS, ...).
150    SciControl,
151    /// SAR identifier token.
152    SarIdentifier,
153    /// Atomic Energy Act marking token (RD, FRD, CNWDI, TFNI, SIGMA ##, etc.).
154    AeaMarking,
155    /// FGI marker token (`FGI`, `FGI DEU`, `FGI DEU GBR`).
156    FgiMarker,
157    /// Dissemination control token (NOFORN, NF, ORCON, OC, RELIDO, ...).
158    DissemControl,
159    /// Non-IC dissemination control token (LIMDIS, DS, SBU, LES, SSI, ...).
160    NonIcDissem,
161    /// REL TO country trigraph (USA, GBR, AUS, ...). One per token, not the
162    /// whole REL TO list.
163    RelToTrigraph,
164    /// The full `REL TO ...` block text. Recorded so E013 can inspect the
165    /// raw source for delimiter errors (spaces instead of commas).
166    RelToBlock,
167    /// Declassification exemption code in CAB or banner (25X1, 50X1-HUM).
168    DeclassExemption,
169    /// Declassification date in CAB or banner (YYYYMMDD or YYYY).
170    DeclassDate,
171    /// `//` separator between blocks. Recorded so E004 can detect extra/
172    /// missing separator runs.
173    Separator,
174    /// A non-empty block that did not match any known token kind. E008 fires
175    /// one diagnostic per `Unknown` entry.
176    Unknown,
177}
178
179// ===========================================================================
180// Classification types
181// ===========================================================================
182
183/// The classification system and level for a marking.
184///
185/// A marking has exactly one classification system. When the parser finds
186/// two (e.g., `SECRET//NATO SECRET//...`), it resolves to [`Conflict`](Self::Conflict).
187#[derive(Debug, Clone, PartialEq, Eq)]
188pub enum MarkingClassification {
189    /// US IC classification.
190    Us(Classification),
191    /// Non-US (FGI) classification: `//GBR S//...`
192    Fgi(FgiClassification),
193    /// NATO classification: `//NS//...`
194    Nato(NatoClassification),
195    /// JOINT classification (US co-owned): `//JOINT S USA GBR//...`
196    Joint(JointClassification),
197    /// Parser found two classification systems in one marking.
198    ///
199    /// US wins, upgraded to the greater of the two levels.
200    /// The foreign part is preserved so rules can suggest the FGI fix.
201    ///
202    /// Example: `SECRET//COSMIC TOP SECRET//REL TO USA, NATO`
203    /// → `us: TopSecret`, `foreign: Nato(CosmicTopSecret)`
204    /// → fix: `TOP SECRET//FGI NATO//REL TO USA, NATO`
205    Conflict {
206        /// Resolved US classification (max of both levels).
207        us: Classification,
208        /// The foreign classification that should become an FGI marker.
209        foreign: Box<ForeignClassification>,
210    },
211}
212
213impl MarkingClassification {
214    /// The effective classification level for ordering purposes, regardless of
215    /// classification system.
216    ///
217    /// NATO levels are mapped to their US equivalents via
218    /// [`NatoClassification::us_equivalent`]. All systems use the
219    /// [`Classification`] ladder for comparison so that `Iterator::max()` on
220    /// a mixed set of portions returns the most restrictive level overall.
221    pub fn effective_level(&self) -> Classification {
222        match self {
223            Self::Us(c) => *c,
224            Self::Fgi(f) => f.level,
225            Self::Nato(n) => n.us_equivalent(),
226            Self::Joint(j) => j.level,
227            Self::Conflict { us, .. } => *us,
228        }
229    }
230}
231
232impl Default for MarkingClassification {
233    fn default() -> Self {
234        Self::Us(Classification::Unclassified)
235    }
236}
237
238/// The non-US classification in a [`MarkingClassification::Conflict`].
239///
240/// Preserves enough information for rules to generate the FGI fix:
241/// the foreign system, its level, and any associated countries.
242#[derive(Debug, Clone, PartialEq, Eq)]
243pub enum ForeignClassification {
244    Fgi(FgiClassification),
245    Nato(NatoClassification),
246    Joint(JointClassification),
247}
248
249// ---------------------------------------------------------------------------
250// Classification level (US ladder + RESTRICTED for foreign interop)
251// ---------------------------------------------------------------------------
252
253/// Classification level. Ordered by restrictiveness: U < R < C < S < TS.
254///
255/// Includes `Restricted` for foreign-origin markings — many non-US
256/// classification systems (and NATO) have a RESTRICTED level between
257/// UNCLASSIFIED and CONFIDENTIAL.
258///
259/// The derived `Ord` reflects restrictiveness ordering so that
260/// `Iterator::max()` returns the most restrictive level.
261#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
262pub enum Classification {
263    Unclassified,
264    Restricted,
265    Confidential,
266    Secret,
267    TopSecret,
268}
269
270impl Classification {
271    /// Banner form (full words, no abbreviations).
272    pub fn banner_str(self) -> &'static str {
273        match self {
274            Self::Unclassified => "UNCLASSIFIED",
275            Self::Restricted => "RESTRICTED",
276            Self::Confidential => "CONFIDENTIAL",
277            Self::Secret => "SECRET",
278            Self::TopSecret => "TOP SECRET",
279        }
280    }
281
282    /// Portion form (abbreviation used in portion markings).
283    pub fn portion_str(self) -> &'static str {
284        match self {
285            Self::Unclassified => "U",
286            Self::Restricted => "R",
287            Self::Confidential => "C",
288            Self::Secret => "S",
289            Self::TopSecret => "TS",
290        }
291    }
292}
293
294// ---------------------------------------------------------------------------
295// FGI classification (non-US, country-prefixed)
296// ---------------------------------------------------------------------------
297
298/// Non-US (FGI) classification.
299///
300/// Two forms exist:
301///
302/// - **Source-acknowledged**: country trigraph(s) identify the originator.
303///   `//GBR S//REL TO USA, GBR`
304/// - **Source-concealed**: `FGI` replaces the country trigraph(s) when
305///   the originating country is sensitive. `//FGI S//REL TO USA, GBR`
306///   An empty `countries` list indicates source-concealed FGI.
307///
308/// Countries are space-delimited in the source marking.
309///
310/// # Banner aggregation
311///
312/// If a document contains **any** source-concealed FGI portions alongside
313/// source-acknowledged FGI portions, the banner must use `FGI` without
314/// country codes — revealing the country list would compromise the
315/// concealed source. This rule is enforced at the `PageContext` level
316/// during banner validation.
317#[derive(Debug, Clone, PartialEq, Eq)]
318pub struct FgiClassification {
319    /// Originating countries (space-delimited in source).
320    /// Empty for source-concealed FGI (`//FGI S//...`).
321    pub countries: Box<[Trigraph]>,
322    /// Classification level (includes RESTRICTED).
323    pub level: Classification,
324}
325
326// ---------------------------------------------------------------------------
327// NATO classification
328// ---------------------------------------------------------------------------
329
330/// NATO classification ladder with optional SAP designation.
331///
332/// NATO uses a separate classification system governed by treaty.
333/// Not everyone with a US clearance is cleared for NATO; many US systems
334/// are not approved for NATO information.
335///
336/// # NATO SAP markings
337///
338/// Three NATO SAP programs exist, each with specific constraints:
339///
340/// - **ATOMAL**: Applies to CTS, NS, and NC levels. Space-separated in
341///   banner (`COSMIC TOP SECRET ATOMAL`). Portion marks: CTSA, NSAT, NCA.
342///   Alternative portion forms CTS-A, NS-A, NC-A also appear in practice.
343/// - **BOHEMIA**: CTS-only. Hyphenated (`COSMIC TOP SECRET-BOHEMIA` → `CTS-B`).
344/// - **BALK**: CTS-only, exercise replacement for BOHEMIA.
345///   Hyphenated (`COSMIC TOP SECRET-BALK` → `CTS-BALK`).
346///
347/// Per the CAPCO Register, bare `COSMIC TOP SECRET` requires either
348/// BOHEMIA or BALK — standalone CTS without a SAP suffix is an error.
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
350pub enum NatoClassification {
351    NatoUnclassified,       // NU
352    NatoRestricted,         // NR
353    NatoConfidential,       // NC
354    NatoConfidentialAtomal, // NCA (alt: NC-A)
355    NatoSecret,             // NS
356    NatoSecretAtomal,       // NSAT (alt: NS-A)
357    CosmicTopSecret,        // CTS (requires BOHEMIA or BALK)
358    CosmicTopSecretAtomal,  // CTSA (alt: CTS-A)
359    CosmicTopSecretBohemia, // CTS-B
360    CosmicTopSecretBalk,    // CTS-BALK
361}
362
363impl NatoClassification {
364    /// Banner form (full words, as used in banner marking lines).
365    pub fn banner_str(self) -> &'static str {
366        match self {
367            Self::NatoUnclassified => "NATO UNCLASSIFIED",
368            Self::NatoRestricted => "NATO RESTRICTED",
369            Self::NatoConfidential => "NATO CONFIDENTIAL",
370            Self::NatoConfidentialAtomal => "NATO CONFIDENTIAL ATOMAL",
371            Self::NatoSecret => "NATO SECRET",
372            Self::NatoSecretAtomal => "NATO SECRET ATOMAL",
373            Self::CosmicTopSecret => "COSMIC TOP SECRET",
374            Self::CosmicTopSecretAtomal => "COSMIC TOP SECRET ATOMAL",
375            Self::CosmicTopSecretBohemia => "COSMIC TOP SECRET-BOHEMIA",
376            Self::CosmicTopSecretBalk => "COSMIC TOP SECRET-BALK",
377        }
378    }
379
380    /// Portion form (primary abbreviation from the CAPCO Register).
381    pub fn portion_str(self) -> &'static str {
382        match self {
383            Self::NatoUnclassified => "NU",
384            Self::NatoRestricted => "NR",
385            Self::NatoConfidential => "NC",
386            Self::NatoConfidentialAtomal => "NCA",
387            Self::NatoSecret => "NS",
388            Self::NatoSecretAtomal => "NSAT",
389            Self::CosmicTopSecret => "CTS",
390            Self::CosmicTopSecretAtomal => "CTSA",
391            Self::CosmicTopSecretBohemia => "CTS-B",
392            Self::CosmicTopSecretBalk => "CTS-BALK",
393        }
394    }
395
396    /// The base classification level (without SAP), for ordering comparisons.
397    pub fn base_level(self) -> NatoLevel {
398        match self {
399            Self::NatoUnclassified => NatoLevel::NatoUnclassified,
400            Self::NatoRestricted => NatoLevel::NatoRestricted,
401            Self::NatoConfidential | Self::NatoConfidentialAtomal => NatoLevel::NatoConfidential,
402            Self::NatoSecret | Self::NatoSecretAtomal => NatoLevel::NatoSecret,
403            Self::CosmicTopSecret
404            | Self::CosmicTopSecretAtomal
405            | Self::CosmicTopSecretBohemia
406            | Self::CosmicTopSecretBalk => NatoLevel::CosmicTopSecret,
407        }
408    }
409
410    /// Map the NATO level to the equivalent US classification for conflict
411    /// resolution (US wins at the greater of the two).
412    pub fn us_equivalent(self) -> Classification {
413        match self.base_level() {
414            NatoLevel::NatoUnclassified => Classification::Unclassified,
415            NatoLevel::NatoRestricted => Classification::Restricted,
416            NatoLevel::NatoConfidential => Classification::Confidential,
417            NatoLevel::NatoSecret => Classification::Secret,
418            NatoLevel::CosmicTopSecret => Classification::TopSecret,
419        }
420    }
421}
422
423/// NATO classification level without SAP, for ordering comparisons.
424#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
425pub enum NatoLevel {
426    NatoUnclassified,
427    NatoRestricted,
428    NatoConfidential,
429    NatoSecret,
430    CosmicTopSecret,
431}
432
433// ---------------------------------------------------------------------------
434// JOINT classification
435// ---------------------------------------------------------------------------
436
437/// JOINT classification: US is co-owner with other nations.
438///
439/// `//JOINT S USA GBR//REL TO USA, GBR`
440///
441/// Country list is space-delimited (NOT comma-delimited like REL TO).
442/// Must include USA. All JOINT participants must also appear in REL TO.
443#[derive(Debug, Clone, PartialEq, Eq)]
444pub struct JointClassification {
445    /// Classification level (US ladder, includes RESTRICTED).
446    pub level: Classification,
447    /// Co-owning countries (space-delimited in source). Must include USA.
448    pub countries: Box<[Trigraph]>,
449}
450
451// ---------------------------------------------------------------------------
452// Atomic Energy Act markings
453// ---------------------------------------------------------------------------
454
455/// Atomic Energy Act information markings (CAPCO Register §6).
456///
457/// AEA markings appear as a single `//`-delimited block in the marking string,
458/// using hyphen separators for compound forms:
459/// - `SECRET//RD//NOFORN` — RD alone
460/// - `SECRET//RD-CNWDI//NOFORN` — RD with CNWDI modifier
461/// - `SECRET//RD-SIGMA 20//NOFORN` — RD with SIGMA compartment
462/// - `SECRET//RD-SIGMA 18 20//NOFORN` — RD with multiple SIGMAs
463/// - `SECRET//FRD//NOFORN` — FRD alone
464/// - `SECRET//FRD-SIGMA 14//NOFORN` — FRD with SIGMA
465///
466/// Standalone (non-compound) markings:
467/// - `UNCLASSIFIED//DOD UCNI` / `(U//DCNI)`
468/// - `UNCLASSIFIED//DOE UCNI` / `(U//UCNI)`
469/// - `SECRET//TFNI//NOFORN` / `(S//TFNI//NF)`
470///
471/// # Key rules (CAPCO-2016)
472///
473/// - RD and FRD always require NOFORN unless a sharing agreement exists
474///   (default severity: Error, configurable to Warn via `.marque.toml`)
475/// - CNWDI may only be used with TS or S RD (not standalone, not with FRD)
476/// - SIGMA 14, 15, 18, 20 may only be used with TS or S RD or FRD
477/// - RD takes precedence over FRD and TFNI in both banners and portions
478/// - SIGMA numbers must be in numerical order, space-separated
479#[derive(Debug, Clone, PartialEq, Eq, Hash)]
480#[non_exhaustive]
481pub enum AeaMarking {
482    /// Compound RD block: `RD`, `RD-CNWDI`, `RD-SIGMA 20`, `RD-CNWDI-SIGMA 18 20`
483    Rd(RdBlock),
484    /// Compound FRD block: `FRD`, `FRD-SIGMA 14`
485    Frd(FrdBlock),
486    /// DOD UCNI / DCNI — standalone, unclassified only
487    DodUcni,
488    /// DOE UCNI / UCNI — standalone, unclassified only
489    DoeUcni,
490    /// TFNI — standalone
491    Tfni,
492}
493
494/// Restricted Data block with optional modifiers.
495///
496/// Rendered as `RD`, `RD-CNWDI`, `RD-SIGMA 20`, or `RD-CNWDI-SIGMA 18 20`.
497#[derive(Debug, Clone, PartialEq, Eq, Hash)]
498pub struct RdBlock {
499    /// Whether CNWDI is present. Only valid with TS or S classification.
500    pub cnwdi: bool,
501    /// SIGMA compartment numbers (14, 15, 18, 20). Must be in numerical order.
502    /// Empty if no SIGMA designation.
503    pub sigma: Box<[u8]>,
504}
505
506impl Default for RdBlock {
507    fn default() -> Self {
508        Self {
509            cnwdi: false,
510            sigma: Box::new([]),
511        }
512    }
513}
514
515/// Formerly Restricted Data block with optional SIGMA modifier.
516///
517/// Rendered as `FRD` or `FRD-SIGMA 14`.
518#[derive(Debug, Clone, PartialEq, Eq, Hash)]
519pub struct FrdBlock {
520    /// SIGMA compartment numbers. Must be in numerical order.
521    /// Empty if no SIGMA designation.
522    pub sigma: Box<[u8]>,
523}
524
525impl Default for FrdBlock {
526    fn default() -> Self {
527        Self {
528            sigma: Box::new([]),
529        }
530    }
531}
532
533impl AeaMarking {
534    /// Banner-line form.
535    pub fn banner_str(&self) -> String {
536        match self {
537            Self::Rd(rd) => {
538                let mut s = "RD".to_owned();
539                if rd.cnwdi {
540                    s.push_str("-CNWDI");
541                }
542                if !rd.sigma.is_empty() {
543                    s.push_str("-SIGMA ");
544                    let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
545                    s.push_str(&nums.join(" "));
546                }
547                s
548            }
549            Self::Frd(frd) => {
550                let mut s = "FRD".to_owned();
551                if !frd.sigma.is_empty() {
552                    s.push_str("-SIGMA ");
553                    let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
554                    s.push_str(&nums.join(" "));
555                }
556                s
557            }
558            Self::DodUcni => "DOD UCNI".to_owned(),
559            Self::DoeUcni => "DOE UCNI".to_owned(),
560            Self::Tfni => "TFNI".to_owned(),
561        }
562    }
563
564    /// Portion mark form.
565    pub fn portion_str(&self) -> String {
566        match self {
567            Self::Rd(rd) => {
568                let mut s = "RD".to_owned();
569                if rd.cnwdi {
570                    s.push_str("-CNWDI");
571                }
572                if !rd.sigma.is_empty() {
573                    s.push_str("-SG ");
574                    let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
575                    s.push_str(&nums.join(" "));
576                }
577                s
578            }
579            Self::Frd(frd) => {
580                let mut s = "FRD".to_owned();
581                if !frd.sigma.is_empty() {
582                    s.push_str("-SG ");
583                    let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
584                    s.push_str(&nums.join(" "));
585                }
586                s
587            }
588            Self::DodUcni => "DCNI".to_owned(),
589            Self::DoeUcni => "UCNI".to_owned(),
590            Self::Tfni => "TFNI".to_owned(),
591        }
592    }
593
594    /// Parse a `//`-delimited AEA block from either banner or portion form.
595    ///
596    /// Handles compound tokens: `RD`, `RD-CNWDI`, `RD-SIGMA 20`,
597    /// `RD-CNWDI-SIGMA 18 20`, `FRD`, `FRD-SIGMA 14`, etc.
598    pub fn parse(s: &str) -> Option<Self> {
599        // Standalone non-compound markings.
600        match s {
601            "DOD UCNI" | "DCNI" => return Some(Self::DodUcni),
602            "DOE UCNI" | "UCNI" => return Some(Self::DoeUcni),
603            "TFNI" | "TRANSCLASSIFIED FOREIGN NUCLEAR INFORMATION" => return Some(Self::Tfni),
604            _ => {}
605        }
606
607        // RD compound block: RD, RD-CNWDI, RD-SIGMA ##, RD-CNWDI-SIGMA ##,
608        // RESTRICTED DATA, RESTRICTED DATA-CNWDI, etc.
609        if s == "RD" || s == "RESTRICTED DATA" {
610            return Some(Self::Rd(RdBlock::default()));
611        }
612        if let Some(rest) = s
613            .strip_prefix("RD-")
614            .or_else(|| s.strip_prefix("RESTRICTED DATA-"))
615        {
616            return Self::parse_rd_modifiers(rest);
617        }
618
619        // FRD compound block: FRD, FRD-SIGMA ##,
620        // FORMERLY RESTRICTED DATA, etc.
621        if s == "FRD" || s == "FORMERLY RESTRICTED DATA" {
622            return Some(Self::Frd(FrdBlock::default()));
623        }
624        if let Some(rest) = s
625            .strip_prefix("FRD-")
626            .or_else(|| s.strip_prefix("FORMERLY RESTRICTED DATA-"))
627        {
628            return Self::parse_frd_modifiers(rest);
629        }
630
631        None
632    }
633
634    /// Parse RD modifiers after the `RD-` prefix.
635    /// Handles: `CNWDI`, `SIGMA ##`, `CNWDI-SIGMA ##`, `SG ##`, `CNWDI-SG ##`.
636    fn parse_rd_modifiers(s: &str) -> Option<Self> {
637        let mut cnwdi = false;
638        let mut rest = s;
639
640        // Check for CNWDI prefix.
641        if let Some(after) = rest.strip_prefix("CNWDI") {
642            cnwdi = true;
643            rest = after.strip_prefix('-').unwrap_or(after);
644        } else if rest == "N" {
645            // DoD shorthand: RD-N means RD-CNWDI (per CAPCO-2016 §6)
646            return Some(Self::Rd(RdBlock {
647                cnwdi: true,
648                sigma: Box::new([]),
649            }));
650        }
651
652        // Check for SIGMA/SG.
653        let sigma = parse_sigma_numbers(rest);
654
655        if rest.is_empty() || !sigma.is_empty() {
656            Some(Self::Rd(RdBlock {
657                cnwdi,
658                sigma: sigma.into(),
659            }))
660        } else {
661            None
662        }
663    }
664
665    /// Parse FRD modifiers after the `FRD-` prefix.
666    /// Handles: `SIGMA ##`, `SG ##`.
667    fn parse_frd_modifiers(s: &str) -> Option<Self> {
668        let sigma = parse_sigma_numbers(s);
669        if !sigma.is_empty() {
670            Some(Self::Frd(FrdBlock {
671                sigma: sigma.into(),
672            }))
673        } else {
674            None
675        }
676    }
677}
678
679/// Parse SIGMA/SG numbers from a string like `SIGMA 18 20` or `SG 14`.
680fn parse_sigma_numbers(s: &str) -> Vec<u8> {
681    let rest = s
682        .strip_prefix("SIGMA ")
683        .or_else(|| s.strip_prefix("SG "))
684        .unwrap_or("");
685    if rest.is_empty() {
686        return vec![];
687    }
688    rest.split_whitespace()
689        .filter_map(|n| n.parse::<u8>().ok())
690        .collect()
691}
692
693impl std::fmt::Display for AeaMarking {
694    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
695        f.write_str(&self.portion_str())
696    }
697}
698
699// ---------------------------------------------------------------------------
700// FGI marker (in US-classified markings)
701// ---------------------------------------------------------------------------
702
703/// FGI marker in a US-classified marking: `FGI` or `FGI [LIST]`.
704///
705/// Appears in the FGI block (after SAR, before dissem controls) when a
706/// US-classified document references foreign government information.
707///
708/// This is NOT the same as [`FgiClassification`] — that represents a
709/// marking where the classification itself IS foreign. This marker says
710/// "this US-classified marking contains foreign government information."
711///
712/// An empty `countries` list represents source-concealed FGI (no country
713/// attribution). If a document mixes source-concealed and source-acknowledged
714/// FGI portions, the banner must use the bare `FGI` form without countries
715/// to avoid compromising the concealed source.
716#[derive(Debug, Clone, PartialEq, Eq)]
717pub struct FgiMarker {
718    /// Countries (space-delimited in source).
719    /// Empty for source-concealed FGI.
720    pub countries: Box<[Trigraph]>,
721}
722
723// ===========================================================================
724// Non-IC dissemination controls
725// ===========================================================================
726
727/// Non-Intelligence Community dissemination control markings (CAPCO Register §9).
728///
729/// These operate under a separate authority framework from IC dissem controls.
730/// In classified documents, most non-IC dissem controls appear **only in portion
731/// markings** — they are stripped from banners. However, some controls propagate
732/// to classified banners: LIMDIS (NGA Title 10), LES, LES-NF, and SSI. See
733/// [`NonIcDissem::propagates_to_classified_banner`] for the authoritative list.
734/// When the page is **unclassified**, all non-IC dissem controls propagate to
735/// the banner.
736///
737/// LES-NF and SBU-NF carry NOFORN treatment even when stripped from the banner.
738///
739/// # CUI note
740///
741/// CUI (Controlled Unclassified Information) is recognized but not validated.
742/// Full CUI rule support is planned for a dedicated crate. The IC equivalent
743/// (FOUO) remains in active use in the `DissemControl` enum.
744#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
745#[non_exhaustive]
746pub enum NonIcDissem {
747    /// LIMITED DISTRIBUTION / LIMDIS / DS
748    Limdis,
749    /// EXCLUSIVE DISTRIBUTION / EXDIS / XD
750    Exdis,
751    /// NO DISTRIBUTION / NODIS / ND
752    Nodis,
753    /// SENSITIVE BUT UNCLASSIFIED / SBU / SBU
754    Sbu,
755    /// SENSITIVE BUT UNCLASSIFIED NOFORN / SBU NOFORN / SBU-NF
756    /// Carries NOFORN treatment even when stripped from banner.
757    SbuNf,
758    /// LAW ENFORCEMENT SENSITIVE / LES / LES
759    Les,
760    /// LAW ENFORCEMENT SENSITIVE NOFORN / LES NOFORN / LES-NF
761    /// Carries NOFORN treatment even when stripped from banner.
762    LesNf,
763    /// SENSITIVE SECURITY INFORMATION / SSI / SSI
764    Ssi,
765}
766
767impl NonIcDissem {
768    /// Banner-line abbreviation form.
769    pub fn banner_str(self) -> &'static str {
770        match self {
771            Self::Limdis => "LIMDIS",
772            Self::Exdis => "EXDIS",
773            Self::Nodis => "NODIS",
774            Self::Sbu => "SBU",
775            Self::SbuNf => "SBU NOFORN",
776            Self::Les => "LES",
777            Self::LesNf => "LES NOFORN",
778            Self::Ssi => "SSI",
779        }
780    }
781
782    /// Portion mark abbreviation.
783    pub fn portion_str(self) -> &'static str {
784        match self {
785            Self::Limdis => "DS",
786            Self::Exdis => "XD",
787            Self::Nodis => "ND",
788            Self::Sbu => "SBU",
789            Self::SbuNf => "SBU-NF",
790            Self::Les => "LES",
791            Self::LesNf => "LES-NF",
792            Self::Ssi => "SSI",
793        }
794    }
795
796    /// Parse from either banner or portion form.
797    pub fn parse(s: &str) -> Option<Self> {
798        match s {
799            "LIMDIS" | "DS" => Some(Self::Limdis),
800            "EXDIS" | "XD" => Some(Self::Exdis),
801            "NODIS" | "ND" => Some(Self::Nodis),
802            "SBU" => Some(Self::Sbu),
803            "SBU NOFORN" | "SBU-NF" => Some(Self::SbuNf),
804            "LES" => Some(Self::Les),
805            "LES NOFORN" | "LES-NF" => Some(Self::LesNf),
806            "SSI" => Some(Self::Ssi),
807            _ => None,
808        }
809    }
810
811    /// Returns true if this control carries NOFORN treatment.
812    pub fn carries_noforn(self) -> bool {
813        matches!(self, Self::SbuNf | Self::LesNf)
814    }
815
816    /// Returns true if this control propagates to classified banners.
817    ///
818    /// Most non-IC dissem controls are stripped from banners in classified
819    /// documents. These exceptions propagate:
820    /// - LIMDIS: NGA Title 10 marking, appears in classified banners
821    /// - LES: propagates to banners; LES-NF propagates as NOFORN//LES
822    /// - SSI: propagates to banners
823    pub fn propagates_to_classified_banner(self) -> bool {
824        matches!(self, Self::Limdis | Self::Les | Self::LesNf | Self::Ssi)
825    }
826
827    /// All valid values.
828    pub const ALL: &[NonIcDissem] = &[
829        Self::Limdis,
830        Self::Exdis,
831        Self::Nodis,
832        Self::Sbu,
833        Self::SbuNf,
834        Self::Les,
835        Self::LesNf,
836        Self::Ssi,
837    ];
838}
839
840impl std::fmt::Display for NonIcDissem {
841    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
842        f.write_str(self.portion_str())
843    }
844}
845
846// ===========================================================================
847// Trigraph
848// ===========================================================================
849
850/// A 3-character country trigraph (e.g., USA, GBR, AUS).
851/// Validated against CVE country code list at rule-check time.
852///
853/// The inner bytes are private; construction goes through [`Trigraph::try_new`]
854/// which enforces ASCII-uppercase invariants so that [`Trigraph::as_str`] can
855/// return a `&str` infallibly without panicking at runtime.
856///
857/// # Limitations
858///
859/// CAPCO also uses tetragraphs (NATO, FVEY, ACGU) and longer org codes
860/// (AUSTRALIA_GROUP). These are present in the CVE TRIGRAPHS list but cannot
861/// be represented by this type's 3-byte constraint. A broader `CountryCode`
862/// type is planned for a future version.
863#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
864pub struct Trigraph([u8; 3]);
865
866impl Trigraph {
867    /// The always-valid `USA` trigraph constant.
868    pub const USA: Self = Self(*b"USA");
869
870    /// Attempt to construct a trigraph from 3 bytes.
871    ///
872    /// Returns `None` unless every byte is an ASCII uppercase letter
873    /// (`A`–`Z`), which is the invariant enforced by CAPCO for all valid
874    /// country/entity codes.
875    #[inline]
876    pub const fn try_new(bytes: [u8; 3]) -> Option<Self> {
877        let mut i = 0;
878        while i < 3 {
879            if !bytes[i].is_ascii_uppercase() {
880                return None;
881            }
882            i += 1;
883        }
884        Some(Self(bytes))
885    }
886
887    /// Return the trigraph as a string slice.
888    ///
889    /// Infallible because construction via [`Trigraph::try_new`] (or the
890    /// [`Trigraph::USA`] constant) guarantees ASCII-uppercase bytes, which
891    /// are always valid UTF-8.
892    #[inline]
893    pub fn as_str(&self) -> &str {
894        // SAFETY: `Trigraph` can only be constructed via `try_new` or the
895        // `USA` constant, both of which require ASCII uppercase letters.
896        // ASCII is a subset of valid UTF-8.
897        unsafe { std::str::from_utf8_unchecked(&self.0) }
898    }
899}
900
901impl std::fmt::Display for Trigraph {
902    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
903        f.write_str(self.as_str())
904    }
905}
906
907#[cfg(test)]
908mod tests {
909    use super::*;
910
911    #[test]
912    fn trigraph_usa_constant_is_valid() {
913        assert_eq!(Trigraph::USA.as_str(), "USA");
914    }
915
916    #[test]
917    fn trigraph_try_new_accepts_uppercase() {
918        let t = Trigraph::try_new(*b"GBR").unwrap();
919        assert_eq!(t.as_str(), "GBR");
920    }
921
922    #[test]
923    fn trigraph_try_new_rejects_lowercase() {
924        assert!(Trigraph::try_new(*b"usa").is_none());
925    }
926
927    #[test]
928    fn trigraph_try_new_rejects_digits() {
929        assert!(Trigraph::try_new(*b"US1").is_none());
930    }
931
932    #[test]
933    fn trigraph_try_new_rejects_high_bytes() {
934        assert!(Trigraph::try_new([0xFF, 0xFF, 0xFF]).is_none());
935    }
936
937    #[test]
938    fn classification_ord_is_restrictiveness() {
939        assert!(Classification::Unclassified < Classification::Restricted);
940        assert!(Classification::Restricted < Classification::Confidential);
941        assert!(Classification::Confidential < Classification::Secret);
942        assert!(Classification::Secret < Classification::TopSecret);
943    }
944
945    #[test]
946    fn classification_banner_portion_round_trip() {
947        for c in [
948            Classification::Unclassified,
949            Classification::Restricted,
950            Classification::Confidential,
951            Classification::Secret,
952            Classification::TopSecret,
953        ] {
954            assert!(!c.banner_str().is_empty());
955            assert!(!c.portion_str().is_empty());
956        }
957    }
958
959    #[test]
960    fn nato_us_equivalent_mapping() {
961        assert_eq!(
962            NatoClassification::CosmicTopSecret.us_equivalent(),
963            Classification::TopSecret,
964        );
965        assert_eq!(
966            NatoClassification::NatoSecret.us_equivalent(),
967            Classification::Secret,
968        );
969        assert_eq!(
970            NatoClassification::NatoRestricted.us_equivalent(),
971            Classification::Restricted,
972        );
973    }
974
975    #[test]
976    fn nato_banner_portion_round_trip() {
977        for n in [
978            NatoClassification::NatoUnclassified,
979            NatoClassification::NatoRestricted,
980            NatoClassification::NatoConfidential,
981            NatoClassification::NatoConfidentialAtomal,
982            NatoClassification::NatoSecret,
983            NatoClassification::NatoSecretAtomal,
984            NatoClassification::CosmicTopSecret,
985            NatoClassification::CosmicTopSecretAtomal,
986            NatoClassification::CosmicTopSecretBohemia,
987            NatoClassification::CosmicTopSecretBalk,
988        ] {
989            assert!(!n.banner_str().is_empty());
990            assert!(!n.portion_str().is_empty());
991        }
992    }
993
994    #[test]
995    fn us_classification_convenience_returns_us() {
996        let attrs = IsmAttributes {
997            classification: Some(MarkingClassification::Us(Classification::Secret)),
998            ..Default::default()
999        };
1000        assert_eq!(attrs.us_classification(), Some(Classification::Secret));
1001    }
1002
1003    #[test]
1004    fn us_classification_convenience_returns_none_for_nato() {
1005        let attrs = IsmAttributes {
1006            classification: Some(MarkingClassification::Nato(NatoClassification::NatoSecret)),
1007            ..Default::default()
1008        };
1009        assert_eq!(attrs.us_classification(), None);
1010    }
1011
1012    #[test]
1013    fn us_classification_convenience_returns_resolved_for_conflict() {
1014        let attrs = IsmAttributes {
1015            classification: Some(MarkingClassification::Conflict {
1016                us: Classification::TopSecret,
1017                foreign: Box::new(ForeignClassification::Nato(
1018                    NatoClassification::CosmicTopSecret,
1019                )),
1020            }),
1021            ..Default::default()
1022        };
1023        assert_eq!(attrs.us_classification(), Some(Classification::TopSecret));
1024    }
1025}