Skip to main content

marque_ism/
attrs.rs

1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5//! `IsmAttributes` — the canonical in-memory representation of a classification marking.
6//!
7//! Mirrors the IC ISM XML attribute model. Every source format (free text, XML, web forms)
8//! normalizes into this struct before rule validation.
9//!
10//! # Type design
11//! Multi-value fields use `Box<[T]>` rather than `Vec<T>` to avoid over-allocation
12//! after parsing. Most markings have 0–4 values per field.
13//!
14//! # Classification systems
15//!
16//! A marking carries exactly one classification system: US, FGI (non-US),
17//! NATO, or JOINT. This is represented by [`MarkingClassification`]. Non-US
18//! classifications start with `//` (the US classification slot is empty).
19//!
20//! When the parser encounters two classification systems in one marking
21//! (e.g., `SECRET//NATO SECRET//NOFORN`), it resolves to
22//! [`MarkingClassification::Conflict`] — US wins at the greater of the two
23//! levels, and the foreign part is preserved for rule-generated fixes.
24//!
25//! # Code generation
26//! CVE enum variants (`SciControl`, `DissemControl`, `DeclassExemption`) are
27//! generated by `build.rs` from ODNI CVE XML files and re-exported from
28//! `crate::generated::values`.
29//!
30//! SAR is NOT code-generated — SAR program identifiers are agency-assigned
31//! codewords, not a closed vocabulary. SAR is modeled structurally via
32//! [`SarMarking`] / [`SarProgram`] / [`SarCompartment`].
33
34use crate::date::IsmDate;
35use crate::generated::values;
36use crate::span::Span;
37
38// Re-export generated enum types for convenience.
39pub use values::{DeclassExemption, DissemControl, SciControl, SciControlBare};
40
41/// Canonical in-memory representation of a classification marking.
42///
43/// Produced by `marque-core::parser` from scanner candidates.
44/// Consumed by `marque-rules::Rule` implementations for validation.
45///
46/// # Block ordering (CAPCO)
47///
48/// Fields are ordered per CAPCO block sequence:
49/// Classification → SCI → SAR → FGI marker → Dissem (incl. REL TO)
50#[non_exhaustive]
51#[derive(Debug, Clone, Default, PartialEq, Eq)]
52pub struct IsmAttributes {
53    /// The marking's classification system and level.
54    /// `None` means parsing failed to identify a classification.
55    pub classification: Option<MarkingClassification>,
56
57    /// SCI controls (e.g., SI, TK, HCS-P). Ordered per CAPCO block ordering.
58    ///
59    /// This is the *enum projection* populated by the parser's CVE exact-match
60    /// path. Retained for back-compat with existing rules (E010, E011). New
61    /// rules that need compartment / sub-compartment structure should read
62    /// [`IsmAttributes::sci_markings`] instead.
63    pub sci_controls: Box<[SciControl]>,
64
65    /// Structural view of SCI category-block entries.
66    ///
67    /// Each entry corresponds to one `/`-separated marking within an SCI
68    /// category block (e.g., `//SI-G/TK-BLFH//` yields two `SciMarking`
69    /// entries). Populated alongside `sci_controls`; `sci_markings` is the
70    /// authoritative source for rules that inspect compartments or
71    /// sub-compartments. See spec 003-sci-compartments.
72    pub sci_markings: Box<[SciMarking]>,
73
74    /// Special Access Required block, if present. Only one SAR block is
75    /// permitted per marking per §A.6; cardinality is `Option`, not `Vec`.
76    /// See [`SarMarking`] for the structural representation.
77    pub sar_markings: Option<SarMarking>,
78
79    /// Atomic Energy Act markings (CAPCO Register §6).
80    ///
81    /// Includes RD, FRD, CNWDI, TFNI, SIGMA, and UCNI variants.
82    /// Positioned between SAR and FGI in CAPCO block ordering.
83    pub aea_markings: Box<[AeaMarking]>,
84
85    /// FGI block in US-classified markings: `FGI` or `FGI [LIST]`.
86    ///
87    /// Present when a US-classified document references foreign government
88    /// information. This is the *marker* in the banner/portion — distinct
89    /// from [`MarkingClassification::Fgi`], which means the marking IS
90    /// foreign-classified.
91    ///
92    /// `None` when no FGI marker is present.
93    pub fgi_marker: Option<FgiMarker>,
94
95    /// Dissemination controls (e.g., NOFORN, RELIDO, ORCON, FISA).
96    pub dissem_controls: Box<[DissemControl]>,
97
98    /// Non-IC dissemination controls (e.g., LIMDIS, SBU, LES, SSI).
99    ///
100    /// Separate authority framework (CAPCO Register §9), distinct from IC
101    /// dissem controls. In classified documents these are generally portion-
102    /// only and stripped from banners, but some values propagate to the
103    /// classified banner; see [`NonIcDissem::propagates_to_classified_banner`]
104    /// for the authoritative rule. On unclassified pages they propagate to
105    /// the banner. LES-NF and SBU-NF carry NOFORN treatment even when
106    /// stripped.
107    pub non_ic_dissem: Box<[NonIcDissem]>,
108
109    /// REL TO country / country-group codes. USA must be present and
110    /// first when the marking targets a US release.
111    ///
112    /// Holds the full CAPCO country-code surface — trigraphs (`USA`,
113    /// `GBR`), tetragraphs / country-group codes (`FVEY`, `ACGU`,
114    /// `NATO`, `RSMA`, …), and the longer registered codes (`EU`,
115    /// `AUSTRALIA_GROUP`). Tetragraph membership expansion (FVEY →
116    /// {AUS, CAN, GBR, NZL, USA}) happens at banner-roll-up time in
117    /// [`PageContext::expected_rel_to`], not at parse time, so this
118    /// list preserves the source vocabulary as written.
119    ///
120    /// Structurally part of the dissem block (comma-delimited), but
121    /// kept as a typed field for E002 and REL TO validation rules.
122    pub rel_to: Box<[CountryCode]>,
123
124    /// Declassification date from CAB (ISM precision-tier union).
125    ///
126    /// Typed as [`IsmDate`] to preserve the precision tier from the original
127    /// source. In CAPCO text markings the parser accepts:
128    /// - `YYYY` (4-digit year → [`IsmDate::Year`])
129    /// - `YYYYMMDD` (8-digit no-hyphen → [`IsmDate::Date`])
130    /// - ISO 8601 with hyphens (`YYYY-MM-DD`, etc.) for XML-sourced markings.
131    ///
132    /// `Year(y)` represents the entire calendar year — its end-of-span is
133    /// December 31 of year `y`, which is later than any date in that year.
134    /// Use [`IsmDate::end_cmp`] when determining the most-conservative
135    /// (furthest-out) date across portions.
136    pub declassify_on: Option<IsmDate>,
137
138    /// Free-text "Classified By" identifier from CAB.
139    pub classified_by: Option<Box<str>>,
140
141    /// Free-text "Derived From" source from CAB.
142    pub derived_from: Option<Box<str>>,
143
144    /// Declassification exemption code from CAB (e.g., 25X1, 50X1-HUM).
145    pub declass_exemption: Option<DeclassExemption>,
146
147    /// Per-token byte spans into the *original source buffer*, recorded by
148    /// the parser as it walks the marking string. Phase 3 added this so
149    /// rules can point at the exact offending byte range instead of the
150    /// whole marking. Empty for CAB markings (CAB parsing is line-structured
151    /// and doesn't go through the token-walking path).
152    ///
153    /// Indexing convention: `token_spans` is in document order. To find the
154    /// span for the Nth `DissemControl`, walk the slice and pick the Nth
155    /// entry whose `kind == TokenKind::DissemControl`.
156    pub token_spans: Box<[TokenSpan]>,
157}
158
159impl IsmAttributes {
160    /// Convenience accessor: returns the US classification level if this
161    /// marking uses the US or Conflict classification system.
162    ///
163    /// Returns `None` for pure FGI, NATO, or JOINT markings (use
164    /// `self.classification` directly for those).
165    pub fn us_classification(&self) -> Option<Classification> {
166        match self.classification {
167            Some(MarkingClassification::Us(c)) => Some(c),
168            Some(MarkingClassification::Conflict { us, .. }) => Some(us),
169            _ => None,
170        }
171    }
172}
173
174/// One parser-recognized token plus its byte span in the original source.
175///
176/// Used by Phase 3 rules to surface byte-precise diagnostic spans without
177/// re-parsing the source. The `text` field carries the literal token bytes
178/// so rules that need the source content (E006, E007, E008 against migration
179/// keys) can look up entries without threading `&[u8] source` through every
180/// `Rule::check` signature.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct TokenSpan {
183    pub kind: TokenKind,
184    pub span: Span,
185    pub text: Box<str>,
186}
187
188/// Discriminant for `TokenSpan`. Phase 3 rules read these to filter
189/// token-span lookups by category.
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
191pub enum TokenKind {
192    /// Classification level token (S, SECRET, TS, TOP SECRET, ...).
193    Classification,
194    /// SCI control token (SI, TK, HCS, ...).
195    ///
196    /// Emitted by the existing CVE exact-match path. For new structural
197    /// parsing (spec 003-sci-compartments) see [`TokenKind::SciSystem`],
198    /// [`TokenKind::SciCompartment`], and [`TokenKind::SciSubCompartment`].
199    SciControl,
200    /// Structural SCI control-system anchor (e.g., `SI`, `TK`, `123`).
201    ///
202    /// Emitted by the structural SCI parser introduced in spec
203    /// 003-sci-compartments alongside the existing [`TokenKind::SciControl`]
204    /// token for exact-CVE matches.
205    SciSystem,
206    /// Structural SCI compartment identifier (e.g., `G` in `SI-G`).
207    SciCompartment,
208    /// Structural SCI sub-compartment identifier (e.g., `ABCD` in `SI-G ABCD`).
209    SciSubCompartment,
210    /// Legacy SAR identifier token. Superseded by `SarIndicator` +
211    /// `SarProgram` + `SarCompartment` + `SarSubCompartment` after the
212    /// structural SAR model landed. No longer emitted by the parser.
213    #[deprecated(note = "use SarIndicator/SarProgram/SarCompartment/SarSubCompartment")]
214    SarIdentifier,
215    /// SAR category indicator — `SAR-` or `SPECIAL ACCESS REQUIRED-`.
216    /// One per SAR block; serves as the anchor for block-ordering rules.
217    SarIndicator,
218    /// SAR program identifier (e.g., `BP`, `BUTTER POPCORN`).
219    SarProgram,
220    /// SAR compartment identifier (e.g., `J12`).
221    SarCompartment,
222    /// SAR sub-compartment identifier (e.g., `J54`).
223    SarSubCompartment,
224    /// Atomic Energy Act marking token (RD, FRD, CNWDI, TFNI, SIGMA ##, etc.).
225    AeaMarking,
226    /// FGI marker token (`FGI`, `FGI DEU`, `FGI DEU GBR`).
227    FgiMarker,
228    /// Dissemination control token (NOFORN, NF, ORCON, OC, RELIDO, ...).
229    DissemControl,
230    /// Non-IC dissemination control token (LIMDIS, DS, SBU, LES, SSI, ...).
231    NonIcDissem,
232    /// REL TO country trigraph (USA, GBR, AUS, ...). One per token, not the
233    /// whole REL TO list.
234    RelToTrigraph,
235    /// The full `REL TO ...` block text. Recorded so E013 can inspect the
236    /// raw source for delimiter errors (spaces instead of commas).
237    RelToBlock,
238    /// Declassification exemption code in CAB or banner (25X1, 50X1-HUM).
239    DeclassExemption,
240    /// Declassification date in CAB or banner (YYYYMMDD or YYYY).
241    DeclassDate,
242    /// `//` separator between blocks. Recorded so E004 can detect extra/
243    /// missing separator runs.
244    Separator,
245    /// A non-empty block that did not match any known token kind. E008 fires
246    /// one diagnostic per `Unknown` entry.
247    Unknown,
248}
249
250// ===========================================================================
251// SAR (Special Access Required) structural types
252// ===========================================================================
253//
254// See CAPCO Register §H.5 (pp 99–102) and §A.6 (pp 15–17) for the source
255// grammar. SAR identifiers are agency-assigned codewords and cannot be
256// enumerated — this type hierarchy validates shape and roll-up rather than
257// membership.
258
259/// Complete SAR category block parsed from a marking.
260///
261/// Produced by `marque-core::parser::parse_sar_category` (P2) and stored on
262/// [`IsmAttributes::sar_markings`]. Only one SAR block is permitted per
263/// marking per §A.6; multiple `//SAR-…//` blocks in the same marking yield
264/// an `E030 sar-indicator-repeat` diagnostic.
265#[non_exhaustive]
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub struct SarMarking {
268    /// The form of SAR indicator used in the source marking.
269    pub indicator: SarIndicator,
270    /// Programs in the order they appeared. Sort-order validation is
271    /// performed by rule E028, not at parse time.
272    pub programs: Box<[SarProgram]>,
273}
274
275/// Which SAR indicator form a marking uses. Banner lines may use either;
276/// portion marks may only use `Abbrev` (rule E026 enforces this).
277#[derive(Debug, Clone, Copy, PartialEq, Eq)]
278pub enum SarIndicator {
279    /// `SAR-` (portion and banner).
280    Abbrev,
281    /// `SPECIAL ACCESS REQUIRED-` (banner only).
282    Full,
283}
284
285/// A single Special Access Program with optional compartments.
286///
287/// Identifier forms (§A.6 grammar):
288/// - Abbreviated: 2–3 alphanumeric characters (`BP`, `CD`, `XR`).
289/// - Full (nickname): uppercase letters with optional spaces
290///   (`BUTTER POPCORN`).
291#[non_exhaustive]
292#[derive(Debug, Clone, PartialEq, Eq)]
293pub struct SarProgram {
294    /// Program identifier as it appeared in the source.
295    pub identifier: Box<str>,
296    /// Compartments in source order. May be empty.
297    pub compartments: Box<[SarCompartment]>,
298}
299
300/// A compartment within a SAR program, optionally carrying sub-compartments.
301///
302/// §H.5 p100 explicitly forbids depicting hierarchy below the sub-compartment
303/// level.
304#[non_exhaustive]
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct SarCompartment {
307    /// Compartment identifier (alphanumeric).
308    pub identifier: Box<str>,
309    /// Sub-compartments in source order. May be empty.
310    pub sub_compartments: Box<[Box<str>]>,
311}
312
313impl SarMarking {
314    /// Construct a [`SarMarking`] from an indicator form and a list of
315    /// programs. `programs` SHOULD be in source order — sort validation is
316    /// performed by rule E028, not here.
317    pub fn new(indicator: SarIndicator, programs: Box<[SarProgram]>) -> Self {
318        Self {
319            indicator,
320            programs,
321        }
322    }
323}
324
325impl SarProgram {
326    /// Construct a [`SarProgram`] with an optional compartment list.
327    pub fn new(identifier: Box<str>, compartments: Box<[SarCompartment]>) -> Self {
328        Self {
329            identifier,
330            compartments,
331        }
332    }
333}
334
335impl SarCompartment {
336    /// Construct a [`SarCompartment`] with an optional sub-compartment list.
337    pub fn new(identifier: Box<str>, sub_compartments: Box<[Box<str>]>) -> Self {
338        Self {
339            identifier,
340            sub_compartments,
341        }
342    }
343}
344
345// ===========================================================================
346// Classification types
347// ===========================================================================
348
349/// The classification system and level for a marking.
350///
351/// A marking has exactly one classification system. When the parser finds
352/// two (e.g., `SECRET//NATO SECRET//...`), it resolves to [`Conflict`](Self::Conflict).
353#[derive(Debug, Clone, PartialEq, Eq)]
354pub enum MarkingClassification {
355    /// US IC classification.
356    Us(Classification),
357    /// Non-US (FGI) classification: `//GBR S//...`
358    Fgi(FgiClassification),
359    /// NATO classification: `//NS//...`
360    Nato(NatoClassification),
361    /// JOINT classification (US co-owned): `//JOINT S USA GBR//...`
362    Joint(JointClassification),
363    /// Parser found two classification systems in one marking.
364    ///
365    /// US wins, upgraded to the greater of the two levels.
366    /// The foreign part is preserved so rules can suggest the FGI fix.
367    ///
368    /// Example: `SECRET//COSMIC TOP SECRET//REL TO USA, NATO`
369    /// → `us: TopSecret`, `foreign: Nato(CosmicTopSecret)`
370    /// → fix: `TOP SECRET//FGI NATO//REL TO USA, NATO`
371    Conflict {
372        /// Resolved US classification (max of both levels).
373        us: Classification,
374        /// The foreign classification that should become an FGI marker.
375        foreign: Box<ForeignClassification>,
376    },
377}
378
379impl MarkingClassification {
380    /// The effective classification level for ordering purposes, regardless of
381    /// classification system.
382    ///
383    /// NATO levels are mapped to their US equivalents via
384    /// [`NatoClassification::us_equivalent`]. All systems use the
385    /// [`Classification`] ladder for comparison so that `Iterator::max()` on
386    /// a mixed set of portions returns the most restrictive level overall.
387    pub fn effective_level(&self) -> Classification {
388        match self {
389            Self::Us(c) => *c,
390            Self::Fgi(f) => f.level,
391            Self::Nato(n) => n.us_equivalent(),
392            Self::Joint(j) => j.level,
393            Self::Conflict { us, .. } => *us,
394        }
395    }
396}
397
398impl Default for MarkingClassification {
399    fn default() -> Self {
400        Self::Us(Classification::Unclassified)
401    }
402}
403
404/// The non-US classification in a [`MarkingClassification::Conflict`].
405///
406/// Preserves enough information for rules to generate the FGI fix:
407/// the foreign system, its level, and any associated countries.
408#[derive(Debug, Clone, PartialEq, Eq)]
409pub enum ForeignClassification {
410    Fgi(FgiClassification),
411    Nato(NatoClassification),
412    Joint(JointClassification),
413}
414
415// ---------------------------------------------------------------------------
416// Classification level (US ladder + RESTRICTED for foreign interop)
417// ---------------------------------------------------------------------------
418
419/// Classification level. Ordered by restrictiveness: U < R < C < S < TS.
420///
421/// Includes `Restricted` for foreign-origin markings — many non-US
422/// classification systems (and NATO) have a RESTRICTED level between
423/// UNCLASSIFIED and CONFIDENTIAL.
424///
425/// The derived `Ord` reflects restrictiveness ordering so that
426/// `Iterator::max()` returns the most restrictive level.
427#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
428pub enum Classification {
429    Unclassified,
430    Restricted,
431    Confidential,
432    Secret,
433    TopSecret,
434}
435
436impl Classification {
437    /// Banner form (full words, no abbreviations).
438    pub fn banner_str(self) -> &'static str {
439        match self {
440            Self::Unclassified => "UNCLASSIFIED",
441            Self::Restricted => "RESTRICTED",
442            Self::Confidential => "CONFIDENTIAL",
443            Self::Secret => "SECRET",
444            Self::TopSecret => "TOP SECRET",
445        }
446    }
447
448    /// Portion form (abbreviation used in portion markings).
449    pub fn portion_str(self) -> &'static str {
450        match self {
451            Self::Unclassified => "U",
452            Self::Restricted => "R",
453            Self::Confidential => "C",
454            Self::Secret => "S",
455            Self::TopSecret => "TS",
456        }
457    }
458}
459
460// ---------------------------------------------------------------------------
461// FGI classification (non-US, country-prefixed)
462// ---------------------------------------------------------------------------
463
464/// Non-US (FGI) classification.
465///
466/// Two forms exist:
467///
468/// - **Source-acknowledged**: country trigraph(s) identify the originator.
469///   `//GBR S//REL TO USA, GBR`
470/// - **Source-concealed**: `FGI` replaces the country trigraph(s) when
471///   the originating country is sensitive. `//FGI S//REL TO USA, GBR`
472///   An empty `countries` list indicates source-concealed FGI.
473///
474/// Countries are space-delimited in the source marking.
475///
476/// # Banner aggregation
477///
478/// If a document contains **any** source-concealed FGI portions alongside
479/// source-acknowledged FGI portions, the banner must use `FGI` without
480/// country codes — revealing the country list would compromise the
481/// concealed source. This rule is enforced at the `PageContext` level
482/// during banner validation.
483#[derive(Debug, Clone, PartialEq, Eq)]
484pub struct FgiClassification {
485    /// Originating countries (space-delimited in source).
486    /// Empty for source-concealed FGI (`//FGI S//...`).
487    pub countries: Box<[CountryCode]>,
488    /// Classification level (includes RESTRICTED).
489    pub level: Classification,
490}
491
492// ---------------------------------------------------------------------------
493// NATO classification
494// ---------------------------------------------------------------------------
495
496/// NATO classification ladder with optional SAP designation.
497///
498/// NATO uses a separate classification system governed by treaty.
499/// Not everyone with a US clearance is cleared for NATO; many US systems
500/// are not approved for NATO information.
501///
502/// # NATO SAP markings
503///
504/// Three NATO SAP programs exist, each with specific constraints:
505///
506/// - **ATOMAL**: Applies to CTS, NS, and NC levels. Space-separated in
507///   banner (`COSMIC TOP SECRET ATOMAL`). Portion marks: CTSA, NSAT, NCA.
508///   Alternative portion forms CTS-A, NS-A, NC-A also appear in practice.
509/// - **BOHEMIA**: CTS-only. Hyphenated (`COSMIC TOP SECRET-BOHEMIA` → `CTS-B`).
510/// - **BALK**: CTS-only, exercise replacement for BOHEMIA.
511///   Hyphenated (`COSMIC TOP SECRET-BALK` → `CTS-BALK`).
512///
513/// Per the CAPCO Register, bare `COSMIC TOP SECRET` requires either
514/// BOHEMIA or BALK — standalone CTS without a SAP suffix is an error.
515#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
516pub enum NatoClassification {
517    NatoUnclassified,       // NU
518    NatoRestricted,         // NR
519    NatoConfidential,       // NC
520    NatoConfidentialAtomal, // NCA (alt: NC-A)
521    NatoSecret,             // NS
522    NatoSecretAtomal,       // NSAT (alt: NS-A)
523    CosmicTopSecret,        // CTS (requires BOHEMIA or BALK)
524    CosmicTopSecretAtomal,  // CTSA (alt: CTS-A)
525    CosmicTopSecretBohemia, // CTS-B
526    CosmicTopSecretBalk,    // CTS-BALK
527}
528
529impl NatoClassification {
530    /// Banner form (full words, as used in banner marking lines).
531    pub fn banner_str(self) -> &'static str {
532        match self {
533            Self::NatoUnclassified => "NATO UNCLASSIFIED",
534            Self::NatoRestricted => "NATO RESTRICTED",
535            Self::NatoConfidential => "NATO CONFIDENTIAL",
536            Self::NatoConfidentialAtomal => "NATO CONFIDENTIAL ATOMAL",
537            Self::NatoSecret => "NATO SECRET",
538            Self::NatoSecretAtomal => "NATO SECRET ATOMAL",
539            Self::CosmicTopSecret => "COSMIC TOP SECRET",
540            Self::CosmicTopSecretAtomal => "COSMIC TOP SECRET ATOMAL",
541            Self::CosmicTopSecretBohemia => "COSMIC TOP SECRET-BOHEMIA",
542            Self::CosmicTopSecretBalk => "COSMIC TOP SECRET-BALK",
543        }
544    }
545
546    /// Portion form (primary abbreviation from the CAPCO Register).
547    pub fn portion_str(self) -> &'static str {
548        match self {
549            Self::NatoUnclassified => "NU",
550            Self::NatoRestricted => "NR",
551            Self::NatoConfidential => "NC",
552            Self::NatoConfidentialAtomal => "NCA",
553            Self::NatoSecret => "NS",
554            Self::NatoSecretAtomal => "NSAT",
555            Self::CosmicTopSecret => "CTS",
556            Self::CosmicTopSecretAtomal => "CTSA",
557            Self::CosmicTopSecretBohemia => "CTS-B",
558            Self::CosmicTopSecretBalk => "CTS-BALK",
559        }
560    }
561
562    /// The base classification level (without SAP), for ordering comparisons.
563    pub fn base_level(self) -> NatoLevel {
564        match self {
565            Self::NatoUnclassified => NatoLevel::NatoUnclassified,
566            Self::NatoRestricted => NatoLevel::NatoRestricted,
567            Self::NatoConfidential | Self::NatoConfidentialAtomal => NatoLevel::NatoConfidential,
568            Self::NatoSecret | Self::NatoSecretAtomal => NatoLevel::NatoSecret,
569            Self::CosmicTopSecret
570            | Self::CosmicTopSecretAtomal
571            | Self::CosmicTopSecretBohemia
572            | Self::CosmicTopSecretBalk => NatoLevel::CosmicTopSecret,
573        }
574    }
575
576    /// Map the NATO level to the equivalent US classification for conflict
577    /// resolution (US wins at the greater of the two).
578    pub fn us_equivalent(self) -> Classification {
579        match self.base_level() {
580            NatoLevel::NatoUnclassified => Classification::Unclassified,
581            NatoLevel::NatoRestricted => Classification::Restricted,
582            NatoLevel::NatoConfidential => Classification::Confidential,
583            NatoLevel::NatoSecret => Classification::Secret,
584            NatoLevel::CosmicTopSecret => Classification::TopSecret,
585        }
586    }
587}
588
589/// NATO classification level without SAP, for ordering comparisons.
590#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
591pub enum NatoLevel {
592    NatoUnclassified,
593    NatoRestricted,
594    NatoConfidential,
595    NatoSecret,
596    CosmicTopSecret,
597}
598
599// ---------------------------------------------------------------------------
600// JOINT classification
601// ---------------------------------------------------------------------------
602
603/// JOINT classification: US is co-owner with other nations.
604///
605/// `//JOINT S USA GBR//REL TO USA, GBR`
606///
607/// Country list is space-delimited (NOT comma-delimited like REL TO).
608/// Must include USA. All JOINT participants must also appear in REL TO.
609#[derive(Debug, Clone, PartialEq, Eq)]
610pub struct JointClassification {
611    /// Classification level (US ladder, includes RESTRICTED).
612    pub level: Classification,
613    /// Co-owning countries (space-delimited in source). Must include USA.
614    pub countries: Box<[CountryCode]>,
615}
616
617// ---------------------------------------------------------------------------
618// Atomic Energy Act markings
619// ---------------------------------------------------------------------------
620
621/// Atomic Energy Act information markings (CAPCO Register §6).
622///
623/// AEA markings appear as a single `//`-delimited block in the marking string,
624/// using hyphen separators for compound forms:
625/// - `SECRET//RD//NOFORN` — RD alone
626/// - `SECRET//RD-CNWDI//NOFORN` — RD with CNWDI modifier
627/// - `SECRET//RD-SIGMA 20//NOFORN` — RD with SIGMA compartment
628/// - `SECRET//RD-SIGMA 18 20//NOFORN` — RD with multiple SIGMAs
629/// - `SECRET//FRD//NOFORN` — FRD alone
630/// - `SECRET//FRD-SIGMA 14//NOFORN` — FRD with SIGMA
631///
632/// Standalone (non-compound) markings:
633/// - `UNCLASSIFIED//DOD UCNI` / `(U//DCNI)`
634/// - `UNCLASSIFIED//DOE UCNI` / `(U//UCNI)`
635/// - `SECRET//TFNI//NOFORN` / `(S//TFNI//NF)`
636///
637/// # Key rules (CAPCO-2016)
638///
639/// - RD and FRD always require NOFORN unless a sharing agreement exists
640///   (default severity: Error, configurable to Warn via `.marque.toml`)
641/// - CNWDI may only be used with TS or S RD (not standalone, not with FRD)
642/// - SIGMA 14, 15, 18, 20 may only be used with TS or S RD or FRD
643/// - RD takes precedence over FRD and TFNI in both banners and portions
644/// - SIGMA numbers must be in numerical order, space-separated
645#[derive(Debug, Clone, PartialEq, Eq, Hash)]
646#[non_exhaustive]
647pub enum AeaMarking {
648    /// Compound RD block: `RD`, `RD-CNWDI`, `RD-SIGMA 20`, `RD-CNWDI-SIGMA 18 20`
649    Rd(RdBlock),
650    /// Compound FRD block: `FRD`, `FRD-SIGMA 14`
651    Frd(FrdBlock),
652    /// DOD UCNI / DCNI — standalone, unclassified only
653    DodUcni,
654    /// DOE UCNI / UCNI — standalone, unclassified only
655    DoeUcni,
656    /// TFNI — standalone
657    Tfni,
658}
659
660/// Restricted Data block with optional modifiers.
661///
662/// Rendered as `RD`, `RD-CNWDI`, `RD-SIGMA 20`, or `RD-CNWDI-SIGMA 18 20`.
663#[derive(Debug, Clone, PartialEq, Eq, Hash)]
664pub struct RdBlock {
665    /// Whether CNWDI is present. Only valid with TS or S classification.
666    pub cnwdi: bool,
667    /// SIGMA compartment numbers (14, 15, 18, 20). Must be in numerical order.
668    /// Empty if no SIGMA designation.
669    pub sigma: Box<[u8]>,
670}
671
672impl Default for RdBlock {
673    fn default() -> Self {
674        Self {
675            cnwdi: false,
676            sigma: Box::new([]),
677        }
678    }
679}
680
681/// Formerly Restricted Data block with optional SIGMA modifier.
682///
683/// Rendered as `FRD` or `FRD-SIGMA 14`.
684#[derive(Debug, Clone, PartialEq, Eq, Hash)]
685pub struct FrdBlock {
686    /// SIGMA compartment numbers. Must be in numerical order.
687    /// Empty if no SIGMA designation.
688    pub sigma: Box<[u8]>,
689}
690
691impl Default for FrdBlock {
692    fn default() -> Self {
693        Self {
694            sigma: Box::new([]),
695        }
696    }
697}
698
699impl AeaMarking {
700    /// Banner-line form.
701    pub fn banner_str(&self) -> String {
702        match self {
703            Self::Rd(rd) => {
704                let mut s = "RD".to_owned();
705                if rd.cnwdi {
706                    s.push_str("-CNWDI");
707                }
708                if !rd.sigma.is_empty() {
709                    s.push_str("-SIGMA ");
710                    let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
711                    s.push_str(&nums.join(" "));
712                }
713                s
714            }
715            Self::Frd(frd) => {
716                let mut s = "FRD".to_owned();
717                if !frd.sigma.is_empty() {
718                    s.push_str("-SIGMA ");
719                    let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
720                    s.push_str(&nums.join(" "));
721                }
722                s
723            }
724            Self::DodUcni => "DOD UCNI".to_owned(),
725            Self::DoeUcni => "DOE UCNI".to_owned(),
726            Self::Tfni => "TFNI".to_owned(),
727        }
728    }
729
730    /// Portion mark form.
731    pub fn portion_str(&self) -> String {
732        match self {
733            Self::Rd(rd) => {
734                let mut s = "RD".to_owned();
735                if rd.cnwdi {
736                    s.push_str("-CNWDI");
737                }
738                if !rd.sigma.is_empty() {
739                    s.push_str("-SG ");
740                    let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
741                    s.push_str(&nums.join(" "));
742                }
743                s
744            }
745            Self::Frd(frd) => {
746                let mut s = "FRD".to_owned();
747                if !frd.sigma.is_empty() {
748                    s.push_str("-SG ");
749                    let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
750                    s.push_str(&nums.join(" "));
751                }
752                s
753            }
754            Self::DodUcni => "DCNI".to_owned(),
755            Self::DoeUcni => "UCNI".to_owned(),
756            Self::Tfni => "TFNI".to_owned(),
757        }
758    }
759
760    /// Parse a `//`-delimited AEA block from either banner or portion form.
761    ///
762    /// Handles compound tokens: `RD`, `RD-CNWDI`, `RD-SIGMA 20`,
763    /// `RD-CNWDI-SIGMA 18 20`, `FRD`, `FRD-SIGMA 14`, etc.
764    pub fn parse(s: &str) -> Option<Self> {
765        // Standalone non-compound markings.
766        match s {
767            "DOD UCNI" | "DCNI" => return Some(Self::DodUcni),
768            "DOE UCNI" | "UCNI" => return Some(Self::DoeUcni),
769            "TFNI" | "TRANSCLASSIFIED FOREIGN NUCLEAR INFORMATION" => return Some(Self::Tfni),
770            _ => {}
771        }
772
773        // RD compound block: RD, RD-CNWDI, RD-SIGMA ##, RD-CNWDI-SIGMA ##,
774        // RESTRICTED DATA, RESTRICTED DATA-CNWDI, etc.
775        if s == "RD" || s == "RESTRICTED DATA" {
776            return Some(Self::Rd(RdBlock::default()));
777        }
778        if let Some(rest) = s
779            .strip_prefix("RD-")
780            .or_else(|| s.strip_prefix("RESTRICTED DATA-"))
781        {
782            return Self::parse_rd_modifiers(rest);
783        }
784
785        // FRD compound block: FRD, FRD-SIGMA ##,
786        // FORMERLY RESTRICTED DATA, etc.
787        if s == "FRD" || s == "FORMERLY RESTRICTED DATA" {
788            return Some(Self::Frd(FrdBlock::default()));
789        }
790        if let Some(rest) = s
791            .strip_prefix("FRD-")
792            .or_else(|| s.strip_prefix("FORMERLY RESTRICTED DATA-"))
793        {
794            return Self::parse_frd_modifiers(rest);
795        }
796
797        None
798    }
799
800    /// Parse RD modifiers after the `RD-` prefix.
801    /// Handles: `CNWDI`, `SIGMA ##`, `CNWDI-SIGMA ##`, `SG ##`, `CNWDI-SG ##`.
802    fn parse_rd_modifiers(s: &str) -> Option<Self> {
803        let mut cnwdi = false;
804        let mut rest = s;
805
806        // Check for CNWDI prefix.
807        if let Some(after) = rest.strip_prefix("CNWDI") {
808            cnwdi = true;
809            rest = after.strip_prefix('-').unwrap_or(after);
810        } else if rest == "N" {
811            // DoD shorthand: RD-N means RD-CNWDI (per CAPCO-2016 §6)
812            return Some(Self::Rd(RdBlock {
813                cnwdi: true,
814                sigma: Box::new([]),
815            }));
816        }
817
818        // Check for SIGMA/SG.
819        let sigma = parse_sigma_numbers(rest);
820
821        if rest.is_empty() || !sigma.is_empty() {
822            Some(Self::Rd(RdBlock {
823                cnwdi,
824                sigma: sigma.into(),
825            }))
826        } else {
827            None
828        }
829    }
830
831    /// Parse FRD modifiers after the `FRD-` prefix.
832    /// Handles: `SIGMA ##`, `SG ##`.
833    fn parse_frd_modifiers(s: &str) -> Option<Self> {
834        let sigma = parse_sigma_numbers(s);
835        if !sigma.is_empty() {
836            Some(Self::Frd(FrdBlock {
837                sigma: sigma.into(),
838            }))
839        } else {
840            None
841        }
842    }
843}
844
845/// Parse SIGMA/SG numbers from a string like `SIGMA 18 20` or `SG 14`.
846fn parse_sigma_numbers(s: &str) -> Vec<u8> {
847    let rest = s
848        .strip_prefix("SIGMA ")
849        .or_else(|| s.strip_prefix("SG "))
850        .unwrap_or("");
851    if rest.is_empty() {
852        return vec![];
853    }
854    rest.split_whitespace()
855        .filter_map(|n| n.parse::<u8>().ok())
856        .collect()
857}
858
859impl std::fmt::Display for AeaMarking {
860    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
861        f.write_str(&self.portion_str())
862    }
863}
864
865// ---------------------------------------------------------------------------
866// FGI marker (in US-classified markings)
867// ---------------------------------------------------------------------------
868
869/// FGI marker in a US-classified marking: `FGI` or `FGI [LIST]`.
870///
871/// Appears in the FGI block (after SAR, before dissem controls) when a
872/// US-classified document references foreign government information.
873///
874/// This is NOT the same as [`FgiClassification`] — that represents a
875/// marking where the classification itself IS foreign. This marker says
876/// "this US-classified marking contains foreign government information."
877///
878/// An empty `countries` list represents source-concealed FGI (no country
879/// attribution). If a document mixes source-concealed and source-acknowledged
880/// FGI portions, the banner must use the bare `FGI` form without countries
881/// to avoid compromising the concealed source.
882#[derive(Debug, Clone, PartialEq, Eq)]
883pub struct FgiMarker {
884    /// Countries (space-delimited in source).
885    /// Empty for source-concealed FGI.
886    pub countries: Box<[CountryCode]>,
887}
888
889// ===========================================================================
890// Non-IC dissemination controls
891// ===========================================================================
892
893/// Non-Intelligence Community dissemination control markings (CAPCO Register §9).
894///
895/// These operate under a separate authority framework from IC dissem controls.
896/// In classified documents, most non-IC dissem controls appear **only in portion
897/// markings** — they are stripped from banners. However, some controls propagate
898/// to classified banners: LIMDIS (NGA Title 10), LES, LES-NF, and SSI. See
899/// [`NonIcDissem::propagates_to_classified_banner`] for the authoritative list.
900/// When the page is **unclassified**, all non-IC dissem controls propagate to
901/// the banner.
902///
903/// LES-NF and SBU-NF carry NOFORN treatment even when stripped from the banner.
904///
905/// # CUI note
906///
907/// CUI (Controlled Unclassified Information) is recognized but not validated.
908/// Full CUI rule support is planned for a dedicated crate. The IC equivalent
909/// (FOUO) remains in active use in the `DissemControl` enum.
910#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
911#[non_exhaustive]
912pub enum NonIcDissem {
913    /// LIMITED DISTRIBUTION / LIMDIS / DS
914    Limdis,
915    /// EXCLUSIVE DISTRIBUTION / EXDIS / XD
916    Exdis,
917    /// NO DISTRIBUTION / NODIS / ND
918    Nodis,
919    /// SENSITIVE BUT UNCLASSIFIED / SBU / SBU
920    Sbu,
921    /// SENSITIVE BUT UNCLASSIFIED NOFORN / SBU NOFORN / SBU-NF
922    /// Carries NOFORN treatment even when stripped from banner.
923    SbuNf,
924    /// LAW ENFORCEMENT SENSITIVE / LES / LES
925    Les,
926    /// LAW ENFORCEMENT SENSITIVE NOFORN / LES NOFORN / LES-NF
927    /// Carries NOFORN treatment even when stripped from banner.
928    LesNf,
929    /// SENSITIVE SECURITY INFORMATION / SSI / SSI
930    Ssi,
931}
932
933impl NonIcDissem {
934    /// Banner-line abbreviation form.
935    pub fn banner_str(self) -> &'static str {
936        match self {
937            Self::Limdis => "LIMDIS",
938            Self::Exdis => "EXDIS",
939            Self::Nodis => "NODIS",
940            Self::Sbu => "SBU",
941            Self::SbuNf => "SBU NOFORN",
942            Self::Les => "LES",
943            Self::LesNf => "LES NOFORN",
944            Self::Ssi => "SSI",
945        }
946    }
947
948    /// Portion mark abbreviation.
949    pub fn portion_str(self) -> &'static str {
950        match self {
951            Self::Limdis => "DS",
952            Self::Exdis => "XD",
953            Self::Nodis => "ND",
954            Self::Sbu => "SBU",
955            Self::SbuNf => "SBU-NF",
956            Self::Les => "LES",
957            Self::LesNf => "LES-NF",
958            Self::Ssi => "SSI",
959        }
960    }
961
962    /// Parse from either banner or portion form.
963    pub fn parse(s: &str) -> Option<Self> {
964        match s {
965            "LIMDIS" | "DS" => Some(Self::Limdis),
966            "EXDIS" | "XD" => Some(Self::Exdis),
967            "NODIS" | "ND" => Some(Self::Nodis),
968            "SBU" => Some(Self::Sbu),
969            "SBU NOFORN" | "SBU-NF" => Some(Self::SbuNf),
970            "LES" => Some(Self::Les),
971            "LES NOFORN" | "LES-NF" => Some(Self::LesNf),
972            "SSI" => Some(Self::Ssi),
973            _ => None,
974        }
975    }
976
977    /// Returns true if this control carries NOFORN treatment.
978    pub fn carries_noforn(self) -> bool {
979        matches!(self, Self::SbuNf | Self::LesNf)
980    }
981
982    /// Returns true if this control propagates to classified banners.
983    ///
984    /// Authoritative source: `crates/capco/docs/CAPCO-2016.md` §H.9
985    /// "Precedence Rules for Banner Line Guidance" for each marking.
986    /// The per-marking rows below cite the specific line of the vendored
987    /// manual.
988    ///
989    /// | Marking  | Propagates | Source (CAPCO-2016 §H.9)                                                                                            |
990    /// |----------|------------|----------------------------------------------------------------------------------------------------------------------|
991    /// | LIMDIS   | no         | line 4180: "When a document contains LIMDIS and classified portions, LIMDIS is not used in the banner line."         |
992    /// | EXDIS    | yes        | line 4240: "If EXDIS is contained in any portion … EXDIS must appear in the banner line." Example banner: `SECRET//NOFORN//EXDIS` |
993    /// | NODIS    | yes        | line 4300: "If NODIS is contained in any portion of a document, it must appear in the banner line." Example banner: `SECRET//NOFORN//NODIS` |
994    /// | SBU      | no         | line 4358: "When a document contains SBU and classified portions, SBU is not used in the banner line."               |
995    /// | SBU-NF   | no (†)     | line 4408: SBU NOFORN "Applicable only to unclassified information." (The §H.9 notional example on p179 shows a `SECRET//NOFORN` banner with a `(U//SBU-NF)` portion — SBU-NF absent from banner.) |
996    /// | LES      | yes        | line 4479: "The LES marking always appears in the banner line if contained in any portion, regardless of classification level." |
997    /// | LES-NF   | yes (*)    | line 4557: "The LES marking always appears in the banner line if LES information (either LES or LES NOFORN) is contained in the document, regardless of the document's classification level." |
998    /// | SSI      | yes        | line 4651: "If the SSI marking is contained in any portion of a document it must appear in the banner line, regardless of the document's overall classification level." |
999    ///
1000    /// (*) LES-NF carries a §H.9 canonicalization that is **not modeled
1001    ///     here**: in classified docs, `LES NOFORN` → `LES` at the banner
1002    ///     with NOFORN split into the dissem block (line 4558: "the 'LES'
1003    ///     marking is used in the banner line and the NOFORN marking is
1004    ///     applied as a Dissemination Control Marking. For example:
1005    ///     `SECRET//NOFORN//LES`."). The split itself is handled by
1006    ///     [`crate::PageContext::expected_non_ic_dissem`]; this predicate
1007    ///     only answers the binary "does the marking appear in the
1008    ///     banner at all?" question, which is what W003 consumes.
1009    ///     Treating `SECRET//LES NOFORN` as non-canonical (so that the
1010    ///     canonicalization becomes fixable) is a separate page-rewrite
1011    ///     concern, not a W003 concern.
1012    ///
1013    /// (†) "Does not propagate" for SBU-NF refers to the **SBU** half of
1014    ///     the marking — the literal `SBU NOFORN` banner form is
1015    ///     non-canonical in a classified document per §H.9 line 4408
1016    ///     ("applicable only to unclassified information"). The **NOFORN
1017    ///     half does propagate** via
1018    ///     [`crate::PageContext::expected_non_ic_dissem`], which splits a
1019    ///     portion-level `SBU-NF` into `SBU + NF-flag` and emits the
1020    ///     resulting NOFORN into the classified banner's dissem block.
1021    ///     So a document with a `(U//SBU-NF)` portion rolls up to a
1022    ///     `SECRET//NOFORN` banner (NOFORN present, SBU dropped), not
1023    ///     `SECRET//SBU NOFORN`. W003 therefore fires on the literal
1024    ///     `SECRET//SBU NOFORN` banner input because that surface form
1025    ///     is the non-canonical one, not because NOFORN is disallowed.
1026    pub fn propagates_to_classified_banner(self) -> bool {
1027        match self {
1028            // Do NOT propagate — banner-absent in classified documents.
1029            Self::Limdis | Self::Sbu | Self::SbuNf => false,
1030            // DO propagate — "must appear in the banner line" per §H.9.
1031            Self::Exdis | Self::Nodis | Self::Les | Self::LesNf | Self::Ssi => true,
1032        }
1033    }
1034
1035    /// All valid values.
1036    pub const ALL: &[NonIcDissem] = &[
1037        Self::Limdis,
1038        Self::Exdis,
1039        Self::Nodis,
1040        Self::Sbu,
1041        Self::SbuNf,
1042        Self::Les,
1043        Self::LesNf,
1044        Self::Ssi,
1045    ];
1046}
1047
1048impl std::fmt::Display for NonIcDissem {
1049    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1050        f.write_str(self.portion_str())
1051    }
1052}
1053
1054// ===========================================================================
1055// CountryCode
1056// ===========================================================================
1057
1058/// Maximum byte length of a CAPCO country code.
1059///
1060/// The longest entry in `CVEnumISMCATRelTo.xsd` is `AUSTRALIA_GROUP`
1061/// (15 bytes); 16 leaves one byte of headroom for any future
1062/// addition without forcing a struct-layout change.
1063const COUNTRY_CODE_CAPACITY: usize = 16;
1064
1065/// A CAPCO country / country-group code, 2–16 ASCII bytes.
1066///
1067/// Covers every entry in the CVE country code list:
1068/// - 1× 2-char (`EU`)
1069/// - 280× 3-char trigraphs (`USA`, `GBR`, `AUS`, …)
1070/// - 58× 4-char tetragraphs / country-group codes (`FVEY`, `ACGU`,
1071///   `NATO`, `RSMA`, …)
1072/// - 1× 15-char (`AUSTRALIA_GROUP`)
1073///
1074/// The inner bytes are private; construction goes through
1075/// [`CountryCode::try_new`] which enforces the CAPCO byte-set invariant
1076/// (ASCII uppercase letters, ASCII digits, underscore — covers `AX2`,
1077/// `AX3`, `AUSTRALIA_GROUP`, and the standard alpha trigraphs/
1078/// tetragraphs) so that [`CountryCode::as_str`] can return a `&str`
1079/// infallibly without panicking at runtime.
1080///
1081/// `Copy` is preserved so the type composes in iterator chains and
1082/// `BTreeSet`-based intersection without manual `.clone()` calls.
1083/// The fixed-array form keeps each `CountryCode` entry inline in
1084/// `IsmAttributes::rel_to` (`Box<[CountryCode]>`) on the parsing
1085/// hot path — no per-code heap allocation.
1086#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1087pub struct CountryCode {
1088    /// Code bytes, zero-padded after `len`. Derived `Ord` compares
1089    /// lexicographically on the padded bytes; zero-padding makes
1090    /// shorter codes with a shared prefix sort first, matching `&str`
1091    /// ordering on ASCII.
1092    bytes: [u8; COUNTRY_CODE_CAPACITY],
1093    /// Active byte count, `2..=COUNTRY_CODE_CAPACITY`.
1094    len: u8,
1095}
1096
1097impl CountryCode {
1098    /// The always-valid `USA` country code constant.
1099    ///
1100    /// Constructed via [`CountryCode::try_new`] in `const` context;
1101    /// the `expect` is statically unreachable for `b"USA"` (3 bytes,
1102    /// all ASCII uppercase) and exists only to satisfy `const`
1103    /// unwrap.
1104    pub const USA: Self = match Self::try_new(b"USA") {
1105        Some(c) => c,
1106        None => panic!("CountryCode::USA literal must satisfy try_new invariants"),
1107    };
1108
1109    /// Returns `true` if `b` is in the CAPCO country-code byte set:
1110    /// ASCII uppercase letter, ASCII digit, or underscore. Digits cover
1111    /// `AX2`/`AX3`; underscore covers `AUSTRALIA_GROUP`.
1112    #[inline]
1113    const fn is_valid_byte(b: u8) -> bool {
1114        b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_'
1115    }
1116
1117    /// Attempt to construct a country code from a byte slice.
1118    ///
1119    /// Returns `None` if `bytes`:
1120    /// - is shorter than 2 bytes (`EU` is the shortest CVE entry) or
1121    ///   longer than [`COUNTRY_CODE_CAPACITY`] bytes
1122    /// - contains any byte outside the CAPCO country-code byte set
1123    ///   (ASCII uppercase letter, ASCII digit, underscore)
1124    ///
1125    /// Membership in the CVE recognition set is a separate check —
1126    /// see [`crate::CapcoTokenSet::is_trigraph`] (the trait method
1127    /// covers any known country code, not only 3-char trigraphs).
1128    #[inline]
1129    pub const fn try_new(bytes: &[u8]) -> Option<Self> {
1130        let len = bytes.len();
1131        if len < 2 || len > COUNTRY_CODE_CAPACITY {
1132            return None;
1133        }
1134        let mut padded = [0u8; COUNTRY_CODE_CAPACITY];
1135        let mut i = 0;
1136        while i < len {
1137            if !Self::is_valid_byte(bytes[i]) {
1138                return None;
1139            }
1140            padded[i] = bytes[i];
1141            i += 1;
1142        }
1143        Some(Self {
1144            bytes: padded,
1145            len: len as u8,
1146        })
1147    }
1148
1149    /// Return the country code as a string slice.
1150    ///
1151    /// Infallible because construction via [`CountryCode::try_new`]
1152    /// (or [`CountryCode::USA`]) guarantees every active byte is in the
1153    /// CAPCO byte set, which is a subset of ASCII / valid UTF-8.
1154    #[inline]
1155    pub fn as_str(&self) -> &str {
1156        // SAFETY: `CountryCode` can only be constructed via
1157        // `try_new` or constants (e.g. `CountryCode::USA`) that
1158        // route through `try_new` in const context. Both paths
1159        // require every active byte to be ASCII uppercase, ASCII
1160        // digit, or underscore. ASCII is a subset of valid UTF-8.
1161        #[allow(unsafe_code)]
1162        unsafe {
1163            std::str::from_utf8_unchecked(self.as_bytes())
1164        }
1165    }
1166
1167    /// Active byte slice (excludes the zero padding).
1168    #[inline]
1169    pub fn as_bytes(&self) -> &[u8] {
1170        &self.bytes[..self.len as usize]
1171    }
1172
1173    /// Number of active bytes, `2..=COUNTRY_CODE_CAPACITY`.
1174    #[inline]
1175    pub const fn len(&self) -> usize {
1176        self.len as usize
1177    }
1178
1179    /// Always `false` — `CountryCode` invariants forbid empty codes.
1180    /// Provided for clippy-`len_without_is_empty` compliance.
1181    #[inline]
1182    pub const fn is_empty(&self) -> bool {
1183        false
1184    }
1185}
1186
1187impl std::fmt::Display for CountryCode {
1188    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1189        f.write_str(self.as_str())
1190    }
1191}
1192
1193#[cfg(test)]
1194#[cfg_attr(coverage_nightly, coverage(off))]
1195mod country_code_tests {
1196    use super::CountryCode;
1197
1198    #[test]
1199    fn try_new_accepts_two_byte_eu() {
1200        let eu = CountryCode::try_new(b"EU").unwrap();
1201        assert_eq!(eu.as_str(), "EU");
1202        assert_eq!(eu.len(), 2);
1203    }
1204
1205    #[test]
1206    fn try_new_accepts_three_byte_trigraph() {
1207        let usa = CountryCode::try_new(b"USA").unwrap();
1208        assert_eq!(usa, CountryCode::USA);
1209        assert_eq!(usa.as_str(), "USA");
1210    }
1211
1212    #[test]
1213    fn try_new_accepts_four_byte_tetragraph() {
1214        let fvey = CountryCode::try_new(b"FVEY").unwrap();
1215        assert_eq!(fvey.as_str(), "FVEY");
1216        assert_eq!(fvey.len(), 4);
1217    }
1218
1219    #[test]
1220    fn try_new_accepts_australia_group_with_underscore() {
1221        let ag = CountryCode::try_new(b"AUSTRALIA_GROUP").unwrap();
1222        assert_eq!(ag.as_str(), "AUSTRALIA_GROUP");
1223        assert_eq!(ag.len(), 15);
1224    }
1225
1226    #[test]
1227    fn try_new_accepts_digits_in_ax2_ax3() {
1228        assert_eq!(CountryCode::try_new(b"AX2").unwrap().as_str(), "AX2");
1229        assert_eq!(CountryCode::try_new(b"AX3").unwrap().as_str(), "AX3");
1230    }
1231
1232    #[test]
1233    fn try_new_rejects_too_short() {
1234        assert!(CountryCode::try_new(b"").is_none());
1235        assert!(CountryCode::try_new(b"X").is_none());
1236    }
1237
1238    #[test]
1239    fn try_new_rejects_too_long() {
1240        // 17 bytes — one over capacity.
1241        assert!(CountryCode::try_new(b"ABCDEFGHIJKLMNOPQ").is_none());
1242    }
1243
1244    #[test]
1245    fn try_new_rejects_lowercase() {
1246        assert!(CountryCode::try_new(b"usa").is_none());
1247        assert!(CountryCode::try_new(b"Fvey").is_none());
1248    }
1249
1250    #[test]
1251    fn try_new_rejects_non_ascii() {
1252        // 'É' is two UTF-8 bytes (0xC3 0x89); first byte fails the
1253        // is_valid_byte check.
1254        let bytes = "ÉU".as_bytes();
1255        assert!(CountryCode::try_new(bytes).is_none());
1256    }
1257
1258    #[test]
1259    fn ord_matches_str_lex_for_mixed_lengths() {
1260        let eu = CountryCode::try_new(b"EU").unwrap();
1261        let aus = CountryCode::try_new(b"AUS").unwrap();
1262        let usa = CountryCode::USA;
1263        let usab = CountryCode::try_new(b"USAB").unwrap();
1264        let mut all = [eu, aus, usa, usab];
1265        all.sort();
1266        assert_eq!(all[0].as_str(), "AUS");
1267        assert_eq!(all[1].as_str(), "EU");
1268        assert_eq!(all[2].as_str(), "USA");
1269        assert_eq!(all[3].as_str(), "USAB");
1270    }
1271
1272    #[test]
1273    fn copy_semantics_preserved() {
1274        let original = CountryCode::USA;
1275        let copy = original;
1276        // Both still usable — `Copy` not `Move`.
1277        assert_eq!(original, copy);
1278        assert_eq!(original.as_str(), copy.as_str());
1279    }
1280
1281    #[test]
1282    fn display_renders_active_bytes_only() {
1283        // Display impl writes the active byte slice; the zero
1284        // padding past `len` must never reach the formatter.
1285        let usa = CountryCode::USA;
1286        let fvey = CountryCode::try_new(b"FVEY").unwrap();
1287        let ag = CountryCode::try_new(b"AUSTRALIA_GROUP").unwrap();
1288        assert_eq!(format!("{usa}"), "USA");
1289        assert_eq!(format!("{fvey}"), "FVEY");
1290        assert_eq!(format!("{ag}"), "AUSTRALIA_GROUP");
1291    }
1292
1293    #[test]
1294    fn as_bytes_excludes_zero_padding() {
1295        let usa = CountryCode::USA;
1296        assert_eq!(usa.as_bytes(), b"USA");
1297        let fvey = CountryCode::try_new(b"FVEY").unwrap();
1298        assert_eq!(fvey.as_bytes(), b"FVEY");
1299    }
1300
1301    #[test]
1302    fn is_empty_invariant_always_false() {
1303        // `try_new` rejects `len < 2`, so a constructed `CountryCode`
1304        // is never empty. `is_empty` exists only to satisfy clippy's
1305        // `len_without_is_empty`; pin the invariant so a future
1306        // refactor that loosens `try_new` is forced to revisit it.
1307        assert!(!CountryCode::USA.is_empty());
1308        assert!(!CountryCode::try_new(b"EU").unwrap().is_empty());
1309        assert!(!CountryCode::try_new(b"AUSTRALIA_GROUP").unwrap().is_empty());
1310    }
1311
1312    #[test]
1313    fn usa_constant_matches_try_new() {
1314        // `pub const USA` constructs via `try_new` in const context.
1315        // Pin the equivalence so a future change to either path
1316        // (e.g., adding a normalization step to `try_new` but not
1317        // the const constructor) breaks loudly.
1318        let runtime = CountryCode::try_new(b"USA").unwrap();
1319        assert_eq!(CountryCode::USA, runtime);
1320        assert_eq!(CountryCode::USA.as_bytes(), runtime.as_bytes());
1321        assert_eq!(CountryCode::USA.len(), runtime.len());
1322    }
1323}
1324
1325// ===========================================================================
1326// SCI structural types (spec 003-sci-compartments)
1327// ===========================================================================
1328
1329/// A fully-parsed SCI category-block entry.
1330///
1331/// A banner or portion may carry multiple `SciMarking` entries separated by
1332/// `/` within one SCI category block (e.g., `//SI-G/TK-BLFH//`).
1333///
1334/// Construction is restricted to [`SciMarking::new`] (the struct is
1335/// `#[non_exhaustive]`) so new fields can be added without breaking the
1336/// parser.
1337#[non_exhaustive]
1338#[derive(Debug, Clone, PartialEq, Eq)]
1339pub struct SciMarking {
1340    /// The control-system anchor. One of the published bare control
1341    /// systems (see [`SciControlBare`]) or a structurally-parsed custom
1342    /// value.
1343    pub system: SciControlSystem,
1344
1345    /// Compartments in source order. Sort-order validation is the concern
1346    /// of CAPCO rule E033 (not the parser).
1347    pub compartments: Box<[SciCompartment]>,
1348
1349    /// If the `{system}-{first_compartment}` composite exactly matches an
1350    /// ODNI CVE value (e.g., `SI-G`, `HCS-P`, `TK-BLFH`), this records the
1351    /// matching [`SciControl`] variant. Only populated when the matching
1352    /// compartment has NO sub-compartments — sub-compartments imply the
1353    /// compound is a structural anchor rather than a CVE atom. `None`
1354    /// otherwise.
1355    pub canonical_enum: Option<SciControl>,
1356}
1357
1358impl SciMarking {
1359    /// Construct a new `SciMarking`. Used by the parser (`marque-core`) to
1360    /// populate [`IsmAttributes::sci_markings`].
1361    pub fn new(
1362        system: SciControlSystem,
1363        compartments: Box<[SciCompartment]>,
1364        canonical_enum: Option<SciControl>,
1365    ) -> Self {
1366        Self {
1367            system,
1368            compartments,
1369            canonical_enum,
1370        }
1371    }
1372}
1373
1374/// Which kind of SCI control system a [`SciMarking`] anchors on.
1375///
1376/// This is a closed set of two variants: either a published bare system
1377/// drawn from the live ODNI CVE, or an agency-allocated custom identifier
1378/// (per CAPCO-2016 §A.6 p15).
1379#[derive(Debug, Clone, PartialEq, Eq)]
1380pub enum SciControlSystem {
1381    /// One of the published bare control systems.
1382    Published(SciControlBare),
1383    /// An agency-allocated system matching `[A-Z0-9]{2,5}` (per CAPCO-2016
1384    /// §A.6 p15 `123` example). Stores the raw text exactly as it appeared
1385    /// in the source.
1386    Custom(Box<str>),
1387}
1388
1389/// A single compartment under an SCI control system.
1390///
1391/// Compartments carry an identifier plus zero or more sub-compartments in
1392/// source order. Construction is restricted to [`SciCompartment::new`]
1393/// (the struct is `#[non_exhaustive]`).
1394#[non_exhaustive]
1395#[derive(Debug, Clone, PartialEq, Eq)]
1396pub struct SciCompartment {
1397    /// Compartment identifier (alphanumeric). Example: `G` in `SI-G`.
1398    pub identifier: Box<str>,
1399    /// Sub-compartments in source order. Example: `ABCD`, `DEFG` in
1400    /// `SI-G ABCD DEFG`.
1401    pub sub_compartments: Box<[Box<str>]>,
1402}
1403
1404impl SciCompartment {
1405    /// Construct a new `SciCompartment`. Used by the parser to populate
1406    /// [`SciMarking::compartments`].
1407    pub fn new(identifier: Box<str>, sub_compartments: Box<[Box<str>]>) -> Self {
1408        Self {
1409            identifier,
1410            sub_compartments,
1411        }
1412    }
1413}
1414
1415#[cfg(test)]
1416#[cfg_attr(coverage_nightly, coverage(off))]
1417mod tests {
1418    use super::*;
1419
1420    #[test]
1421    fn classification_ord_is_restrictiveness() {
1422        assert!(Classification::Unclassified < Classification::Restricted);
1423        assert!(Classification::Restricted < Classification::Confidential);
1424        assert!(Classification::Confidential < Classification::Secret);
1425        assert!(Classification::Secret < Classification::TopSecret);
1426    }
1427
1428    #[test]
1429    fn classification_banner_portion_round_trip() {
1430        for c in [
1431            Classification::Unclassified,
1432            Classification::Restricted,
1433            Classification::Confidential,
1434            Classification::Secret,
1435            Classification::TopSecret,
1436        ] {
1437            assert!(!c.banner_str().is_empty());
1438            assert!(!c.portion_str().is_empty());
1439        }
1440    }
1441
1442    #[test]
1443    fn nato_us_equivalent_mapping() {
1444        assert_eq!(
1445            NatoClassification::CosmicTopSecret.us_equivalent(),
1446            Classification::TopSecret,
1447        );
1448        assert_eq!(
1449            NatoClassification::NatoSecret.us_equivalent(),
1450            Classification::Secret,
1451        );
1452        assert_eq!(
1453            NatoClassification::NatoRestricted.us_equivalent(),
1454            Classification::Restricted,
1455        );
1456    }
1457
1458    #[test]
1459    fn nato_banner_portion_round_trip() {
1460        for n in [
1461            NatoClassification::NatoUnclassified,
1462            NatoClassification::NatoRestricted,
1463            NatoClassification::NatoConfidential,
1464            NatoClassification::NatoConfidentialAtomal,
1465            NatoClassification::NatoSecret,
1466            NatoClassification::NatoSecretAtomal,
1467            NatoClassification::CosmicTopSecret,
1468            NatoClassification::CosmicTopSecretAtomal,
1469            NatoClassification::CosmicTopSecretBohemia,
1470            NatoClassification::CosmicTopSecretBalk,
1471        ] {
1472            assert!(!n.banner_str().is_empty());
1473            assert!(!n.portion_str().is_empty());
1474        }
1475    }
1476
1477    #[test]
1478    fn us_classification_convenience_returns_us() {
1479        let attrs = IsmAttributes {
1480            classification: Some(MarkingClassification::Us(Classification::Secret)),
1481            ..Default::default()
1482        };
1483        assert_eq!(attrs.us_classification(), Some(Classification::Secret));
1484    }
1485
1486    #[test]
1487    fn us_classification_convenience_returns_none_for_nato() {
1488        let attrs = IsmAttributes {
1489            classification: Some(MarkingClassification::Nato(NatoClassification::NatoSecret)),
1490            ..Default::default()
1491        };
1492        assert_eq!(attrs.us_classification(), None);
1493    }
1494
1495    #[test]
1496    fn us_classification_convenience_returns_resolved_for_conflict() {
1497        let attrs = IsmAttributes {
1498            classification: Some(MarkingClassification::Conflict {
1499                us: Classification::TopSecret,
1500                foreign: Box::new(ForeignClassification::Nato(
1501                    NatoClassification::CosmicTopSecret,
1502                )),
1503            }),
1504            ..Default::default()
1505        };
1506        assert_eq!(attrs.us_classification(), Some(Classification::TopSecret));
1507    }
1508}