marque_ism/attrs.rs
1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5//! `IsmAttributes` — the canonical in-memory representation of a classification marking.
6//!
7//! Mirrors the IC ISM XML attribute model. Every source format (free text, XML, web forms)
8//! normalizes into this struct before rule validation.
9//!
10//! # Type design
11//! Multi-value fields use `Box<[T]>` rather than `Vec<T>` to avoid over-allocation
12//! after parsing. Most markings have 0–4 values per field.
13//!
14//! # Classification systems
15//!
16//! A marking carries exactly one classification system: US, FGI (non-US),
17//! NATO, or JOINT. This is represented by [`MarkingClassification`]. Non-US
18//! classifications start with `//` (the US classification slot is empty).
19//!
20//! When the parser encounters two classification systems in one marking
21//! (e.g., `SECRET//NATO SECRET//NOFORN`), it resolves to
22//! [`MarkingClassification::Conflict`] — US wins at the greater of the two
23//! levels, and the foreign part is preserved for rule-generated fixes.
24//!
25//! # Code generation
26//! CVE enum variants (`SciControl`, `DissemControl`, `DeclassExemption`) are
27//! generated by `build.rs` from ODNI CVE XML files and re-exported from
28//! `crate::generated::values`.
29//!
30//! SAR is NOT code-generated — SAR program identifiers are agency-assigned
31//! codewords, not a closed vocabulary. SAR is modeled structurally via
32//! [`SarMarking`] / [`SarProgram`] / [`SarCompartment`].
33
34use crate::date::IsmDate;
35use crate::generated::values;
36use crate::span::Span;
37
38// Re-export generated enum types for convenience.
39pub use values::{DeclassExemption, DissemControl, SciControl, SciControlBare};
40
41/// Canonical in-memory representation of a classification marking.
42///
43/// Produced by `marque-core::parser` from scanner candidates.
44/// Consumed by `marque-rules::Rule` implementations for validation.
45///
46/// # Block ordering (CAPCO)
47///
48/// Fields are ordered per CAPCO block sequence:
49/// Classification → SCI → SAR → FGI marker → Dissem (incl. REL TO)
50#[non_exhaustive]
51#[derive(Debug, Clone, Default, PartialEq, Eq)]
52pub struct IsmAttributes {
53 /// The marking's classification system and level.
54 /// `None` means parsing failed to identify a classification.
55 pub classification: Option<MarkingClassification>,
56
57 /// SCI controls (e.g., SI, TK, HCS-P). Ordered per CAPCO block ordering.
58 ///
59 /// This is the *enum projection* populated by the parser's CVE exact-match
60 /// path. Retained for back-compat with existing rules (E010, E011). New
61 /// rules that need compartment / sub-compartment structure should read
62 /// [`IsmAttributes::sci_markings`] instead.
63 pub sci_controls: Box<[SciControl]>,
64
65 /// Structural view of SCI category-block entries.
66 ///
67 /// Each entry corresponds to one `/`-separated marking within an SCI
68 /// category block (e.g., `//SI-G/TK-BLFH//` yields two `SciMarking`
69 /// entries). Populated alongside `sci_controls`; `sci_markings` is the
70 /// authoritative source for rules that inspect compartments or
71 /// sub-compartments. See spec 003-sci-compartments.
72 pub sci_markings: Box<[SciMarking]>,
73
74 /// Special Access Required block, if present. Only one SAR block is
75 /// permitted per marking per §A.6; cardinality is `Option`, not `Vec`.
76 /// See [`SarMarking`] for the structural representation.
77 pub sar_markings: Option<SarMarking>,
78
79 /// Atomic Energy Act markings (CAPCO Register §6).
80 ///
81 /// Includes RD, FRD, CNWDI, TFNI, SIGMA, and UCNI variants.
82 /// Positioned between SAR and FGI in CAPCO block ordering.
83 pub aea_markings: Box<[AeaMarking]>,
84
85 /// FGI block in US-classified markings: `FGI` or `FGI [LIST]`.
86 ///
87 /// Present when a US-classified document references foreign government
88 /// information. This is the *marker* in the banner/portion — distinct
89 /// from [`MarkingClassification::Fgi`], which means the marking IS
90 /// foreign-classified.
91 ///
92 /// `None` when no FGI marker is present.
93 pub fgi_marker: Option<FgiMarker>,
94
95 /// Dissemination controls (e.g., NOFORN, RELIDO, ORCON, FISA).
96 pub dissem_controls: Box<[DissemControl]>,
97
98 /// Non-IC dissemination controls (e.g., LIMDIS, SBU, LES, SSI).
99 ///
100 /// Separate authority framework (CAPCO Register §9), distinct from IC
101 /// dissem controls. In classified documents these are generally portion-
102 /// only and stripped from banners, but some values propagate to the
103 /// classified banner; see [`NonIcDissem::propagates_to_classified_banner`]
104 /// for the authoritative rule. On unclassified pages they propagate to
105 /// the banner. LES-NF and SBU-NF carry NOFORN treatment even when
106 /// stripped.
107 pub non_ic_dissem: Box<[NonIcDissem]>,
108
109 /// REL TO country / country-group codes. USA must be present and
110 /// first when the marking targets a US release.
111 ///
112 /// Holds the full CAPCO country-code surface — trigraphs (`USA`,
113 /// `GBR`), tetragraphs / country-group codes (`FVEY`, `ACGU`,
114 /// `NATO`, `RSMA`, …), and the longer registered codes (`EU`,
115 /// `AUSTRALIA_GROUP`). Tetragraph membership expansion (FVEY →
116 /// {AUS, CAN, GBR, NZL, USA}) happens at banner-roll-up time in
117 /// [`PageContext::expected_rel_to`], not at parse time, so this
118 /// list preserves the source vocabulary as written.
119 ///
120 /// Structurally part of the dissem block (comma-delimited), but
121 /// kept as a typed field for E002 and REL TO validation rules.
122 pub rel_to: Box<[CountryCode]>,
123
124 /// Declassification date from CAB (ISM precision-tier union).
125 ///
126 /// Typed as [`IsmDate`] to preserve the precision tier from the original
127 /// source. In CAPCO text markings the parser accepts:
128 /// - `YYYY` (4-digit year → [`IsmDate::Year`])
129 /// - `YYYYMMDD` (8-digit no-hyphen → [`IsmDate::Date`])
130 /// - ISO 8601 with hyphens (`YYYY-MM-DD`, etc.) for XML-sourced markings.
131 ///
132 /// `Year(y)` represents the entire calendar year — its end-of-span is
133 /// December 31 of year `y`, which is later than any date in that year.
134 /// Use [`IsmDate::end_cmp`] when determining the most-conservative
135 /// (furthest-out) date across portions.
136 pub declassify_on: Option<IsmDate>,
137
138 /// Free-text "Classified By" identifier from CAB.
139 pub classified_by: Option<Box<str>>,
140
141 /// Free-text "Derived From" source from CAB.
142 pub derived_from: Option<Box<str>>,
143
144 /// Declassification exemption code from CAB (e.g., 25X1, 50X1-HUM).
145 pub declass_exemption: Option<DeclassExemption>,
146
147 /// Per-token byte spans into the *original source buffer*, recorded by
148 /// the parser as it walks the marking string. Phase 3 added this so
149 /// rules can point at the exact offending byte range instead of the
150 /// whole marking. Empty for CAB markings (CAB parsing is line-structured
151 /// and doesn't go through the token-walking path).
152 ///
153 /// Indexing convention: `token_spans` is in document order. To find the
154 /// span for the Nth `DissemControl`, walk the slice and pick the Nth
155 /// entry whose `kind == TokenKind::DissemControl`.
156 pub token_spans: Box<[TokenSpan]>,
157}
158
159impl IsmAttributes {
160 /// Convenience accessor: returns the US classification level if this
161 /// marking uses the US or Conflict classification system.
162 ///
163 /// Returns `None` for pure FGI, NATO, or JOINT markings (use
164 /// `self.classification` directly for those).
165 pub fn us_classification(&self) -> Option<Classification> {
166 match self.classification {
167 Some(MarkingClassification::Us(c)) => Some(c),
168 Some(MarkingClassification::Conflict { us, .. }) => Some(us),
169 _ => None,
170 }
171 }
172}
173
174/// One parser-recognized token plus its byte span in the original source.
175///
176/// Used by Phase 3 rules to surface byte-precise diagnostic spans without
177/// re-parsing the source. The `text` field carries the literal token bytes
178/// so rules that need the source content (E006, E007, E008 against migration
179/// keys) can look up entries without threading `&[u8] source` through every
180/// `Rule::check` signature.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct TokenSpan {
183 pub kind: TokenKind,
184 pub span: Span,
185 pub text: Box<str>,
186}
187
188/// Discriminant for `TokenSpan`. Phase 3 rules read these to filter
189/// token-span lookups by category.
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
191pub enum TokenKind {
192 /// Classification level token (S, SECRET, TS, TOP SECRET, ...).
193 Classification,
194 /// SCI control token (SI, TK, HCS, ...).
195 ///
196 /// Emitted by the existing CVE exact-match path. For new structural
197 /// parsing (spec 003-sci-compartments) see [`TokenKind::SciSystem`],
198 /// [`TokenKind::SciCompartment`], and [`TokenKind::SciSubCompartment`].
199 SciControl,
200 /// Structural SCI control-system anchor (e.g., `SI`, `TK`, `123`).
201 ///
202 /// Emitted by the structural SCI parser introduced in spec
203 /// 003-sci-compartments alongside the existing [`TokenKind::SciControl`]
204 /// token for exact-CVE matches.
205 SciSystem,
206 /// Structural SCI compartment identifier (e.g., `G` in `SI-G`).
207 SciCompartment,
208 /// Structural SCI sub-compartment identifier (e.g., `ABCD` in `SI-G ABCD`).
209 SciSubCompartment,
210 /// Legacy SAR identifier token. Superseded by `SarIndicator` +
211 /// `SarProgram` + `SarCompartment` + `SarSubCompartment` after the
212 /// structural SAR model landed. No longer emitted by the parser.
213 #[deprecated(note = "use SarIndicator/SarProgram/SarCompartment/SarSubCompartment")]
214 SarIdentifier,
215 /// SAR category indicator — `SAR-` or `SPECIAL ACCESS REQUIRED-`.
216 /// One per SAR block; serves as the anchor for block-ordering rules.
217 SarIndicator,
218 /// SAR program identifier (e.g., `BP`, `BUTTER POPCORN`).
219 SarProgram,
220 /// SAR compartment identifier (e.g., `J12`).
221 SarCompartment,
222 /// SAR sub-compartment identifier (e.g., `J54`).
223 SarSubCompartment,
224 /// Atomic Energy Act marking token (RD, FRD, CNWDI, TFNI, SIGMA ##, etc.).
225 AeaMarking,
226 /// FGI marker token (`FGI`, `FGI DEU`, `FGI DEU GBR`).
227 FgiMarker,
228 /// Dissemination control token (NOFORN, NF, ORCON, OC, RELIDO, ...).
229 DissemControl,
230 /// Non-IC dissemination control token (LIMDIS, DS, SBU, LES, SSI, ...).
231 NonIcDissem,
232 /// REL TO country trigraph (USA, GBR, AUS, ...). One per token, not the
233 /// whole REL TO list.
234 RelToTrigraph,
235 /// The full `REL TO ...` block text. Recorded so E013 can inspect the
236 /// raw source for delimiter errors (spaces instead of commas).
237 RelToBlock,
238 /// Declassification exemption code in CAB or banner (25X1, 50X1-HUM).
239 DeclassExemption,
240 /// Declassification date in CAB or banner (YYYYMMDD or YYYY).
241 DeclassDate,
242 /// `//` separator between blocks. Recorded so E004 can detect extra/
243 /// missing separator runs.
244 Separator,
245 /// A non-empty block that did not match any known token kind. E008 fires
246 /// one diagnostic per `Unknown` entry.
247 Unknown,
248}
249
250// ===========================================================================
251// SAR (Special Access Required) structural types
252// ===========================================================================
253//
254// See CAPCO Register §H.5 (pp 99–102) and §A.6 (pp 15–17) for the source
255// grammar. SAR identifiers are agency-assigned codewords and cannot be
256// enumerated — this type hierarchy validates shape and roll-up rather than
257// membership.
258
259/// Complete SAR category block parsed from a marking.
260///
261/// Produced by `marque-core::parser::parse_sar_category` (P2) and stored on
262/// [`IsmAttributes::sar_markings`]. Only one SAR block is permitted per
263/// marking per §A.6; multiple `//SAR-…//` blocks in the same marking yield
264/// an `E030 sar-indicator-repeat` diagnostic.
265#[non_exhaustive]
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub struct SarMarking {
268 /// The form of SAR indicator used in the source marking.
269 pub indicator: SarIndicator,
270 /// Programs in the order they appeared. Sort-order validation is
271 /// performed by rule E028, not at parse time.
272 pub programs: Box<[SarProgram]>,
273}
274
275/// Which SAR indicator form a marking uses. Banner lines may use either;
276/// portion marks may only use `Abbrev` (rule E026 enforces this).
277#[derive(Debug, Clone, Copy, PartialEq, Eq)]
278pub enum SarIndicator {
279 /// `SAR-` (portion and banner).
280 Abbrev,
281 /// `SPECIAL ACCESS REQUIRED-` (banner only).
282 Full,
283}
284
285/// A single Special Access Program with optional compartments.
286///
287/// Identifier forms (§A.6 grammar):
288/// - Abbreviated: 2–3 alphanumeric characters (`BP`, `CD`, `XR`).
289/// - Full (nickname): uppercase letters with optional spaces
290/// (`BUTTER POPCORN`).
291#[non_exhaustive]
292#[derive(Debug, Clone, PartialEq, Eq)]
293pub struct SarProgram {
294 /// Program identifier as it appeared in the source.
295 pub identifier: Box<str>,
296 /// Compartments in source order. May be empty.
297 pub compartments: Box<[SarCompartment]>,
298}
299
300/// A compartment within a SAR program, optionally carrying sub-compartments.
301///
302/// §H.5 p100 explicitly forbids depicting hierarchy below the sub-compartment
303/// level.
304#[non_exhaustive]
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct SarCompartment {
307 /// Compartment identifier (alphanumeric).
308 pub identifier: Box<str>,
309 /// Sub-compartments in source order. May be empty.
310 pub sub_compartments: Box<[Box<str>]>,
311}
312
313impl SarMarking {
314 /// Construct a [`SarMarking`] from an indicator form and a list of
315 /// programs. `programs` SHOULD be in source order — sort validation is
316 /// performed by rule E028, not here.
317 pub fn new(indicator: SarIndicator, programs: Box<[SarProgram]>) -> Self {
318 Self {
319 indicator,
320 programs,
321 }
322 }
323}
324
325impl SarProgram {
326 /// Construct a [`SarProgram`] with an optional compartment list.
327 pub fn new(identifier: Box<str>, compartments: Box<[SarCompartment]>) -> Self {
328 Self {
329 identifier,
330 compartments,
331 }
332 }
333}
334
335impl SarCompartment {
336 /// Construct a [`SarCompartment`] with an optional sub-compartment list.
337 pub fn new(identifier: Box<str>, sub_compartments: Box<[Box<str>]>) -> Self {
338 Self {
339 identifier,
340 sub_compartments,
341 }
342 }
343}
344
345// ===========================================================================
346// Classification types
347// ===========================================================================
348
349/// The classification system and level for a marking.
350///
351/// A marking has exactly one classification system. When the parser finds
352/// two (e.g., `SECRET//NATO SECRET//...`), it resolves to [`Conflict`](Self::Conflict).
353#[derive(Debug, Clone, PartialEq, Eq)]
354pub enum MarkingClassification {
355 /// US IC classification.
356 Us(Classification),
357 /// Non-US (FGI) classification: `//GBR S//...`
358 Fgi(FgiClassification),
359 /// NATO classification: `//NS//...`
360 Nato(NatoClassification),
361 /// JOINT classification (US co-owned): `//JOINT S USA GBR//...`
362 Joint(JointClassification),
363 /// Parser found two classification systems in one marking.
364 ///
365 /// US wins, upgraded to the greater of the two levels.
366 /// The foreign part is preserved so rules can suggest the FGI fix.
367 ///
368 /// Example: `SECRET//COSMIC TOP SECRET//REL TO USA, NATO`
369 /// → `us: TopSecret`, `foreign: Nato(CosmicTopSecret)`
370 /// → fix: `TOP SECRET//FGI NATO//REL TO USA, NATO`
371 Conflict {
372 /// Resolved US classification (max of both levels).
373 us: Classification,
374 /// The foreign classification that should become an FGI marker.
375 foreign: Box<ForeignClassification>,
376 },
377}
378
379impl MarkingClassification {
380 /// The effective classification level for ordering purposes, regardless of
381 /// classification system.
382 ///
383 /// NATO levels are mapped to their US equivalents via
384 /// [`NatoClassification::us_equivalent`]. All systems use the
385 /// [`Classification`] ladder for comparison so that `Iterator::max()` on
386 /// a mixed set of portions returns the most restrictive level overall.
387 pub fn effective_level(&self) -> Classification {
388 match self {
389 Self::Us(c) => *c,
390 Self::Fgi(f) => f.level,
391 Self::Nato(n) => n.us_equivalent(),
392 Self::Joint(j) => j.level,
393 Self::Conflict { us, .. } => *us,
394 }
395 }
396}
397
398impl Default for MarkingClassification {
399 fn default() -> Self {
400 Self::Us(Classification::Unclassified)
401 }
402}
403
404/// The non-US classification in a [`MarkingClassification::Conflict`].
405///
406/// Preserves enough information for rules to generate the FGI fix:
407/// the foreign system, its level, and any associated countries.
408#[derive(Debug, Clone, PartialEq, Eq)]
409pub enum ForeignClassification {
410 Fgi(FgiClassification),
411 Nato(NatoClassification),
412 Joint(JointClassification),
413}
414
415// ---------------------------------------------------------------------------
416// Classification level (US ladder + RESTRICTED for foreign interop)
417// ---------------------------------------------------------------------------
418
419/// Classification level. Ordered by restrictiveness: U < R < C < S < TS.
420///
421/// Includes `Restricted` for foreign-origin markings — many non-US
422/// classification systems (and NATO) have a RESTRICTED level between
423/// UNCLASSIFIED and CONFIDENTIAL.
424///
425/// The derived `Ord` reflects restrictiveness ordering so that
426/// `Iterator::max()` returns the most restrictive level.
427#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
428pub enum Classification {
429 Unclassified,
430 Restricted,
431 Confidential,
432 Secret,
433 TopSecret,
434}
435
436impl Classification {
437 /// Banner form (full words, no abbreviations).
438 pub fn banner_str(self) -> &'static str {
439 match self {
440 Self::Unclassified => "UNCLASSIFIED",
441 Self::Restricted => "RESTRICTED",
442 Self::Confidential => "CONFIDENTIAL",
443 Self::Secret => "SECRET",
444 Self::TopSecret => "TOP SECRET",
445 }
446 }
447
448 /// Portion form (abbreviation used in portion markings).
449 pub fn portion_str(self) -> &'static str {
450 match self {
451 Self::Unclassified => "U",
452 Self::Restricted => "R",
453 Self::Confidential => "C",
454 Self::Secret => "S",
455 Self::TopSecret => "TS",
456 }
457 }
458}
459
460// ---------------------------------------------------------------------------
461// FGI classification (non-US, country-prefixed)
462// ---------------------------------------------------------------------------
463
464/// Non-US (FGI) classification.
465///
466/// Two forms exist:
467///
468/// - **Source-acknowledged**: country trigraph(s) identify the originator.
469/// `//GBR S//REL TO USA, GBR`
470/// - **Source-concealed**: `FGI` replaces the country trigraph(s) when
471/// the originating country is sensitive. `//FGI S//REL TO USA, GBR`
472/// An empty `countries` list indicates source-concealed FGI.
473///
474/// Countries are space-delimited in the source marking.
475///
476/// # Banner aggregation
477///
478/// If a document contains **any** source-concealed FGI portions alongside
479/// source-acknowledged FGI portions, the banner must use `FGI` without
480/// country codes — revealing the country list would compromise the
481/// concealed source. This rule is enforced at the `PageContext` level
482/// during banner validation.
483#[derive(Debug, Clone, PartialEq, Eq)]
484pub struct FgiClassification {
485 /// Originating countries (space-delimited in source).
486 /// Empty for source-concealed FGI (`//FGI S//...`).
487 pub countries: Box<[CountryCode]>,
488 /// Classification level (includes RESTRICTED).
489 pub level: Classification,
490}
491
492// ---------------------------------------------------------------------------
493// NATO classification
494// ---------------------------------------------------------------------------
495
496/// NATO classification ladder with optional SAP designation.
497///
498/// NATO uses a separate classification system governed by treaty.
499/// Not everyone with a US clearance is cleared for NATO; many US systems
500/// are not approved for NATO information.
501///
502/// # NATO SAP markings
503///
504/// Three NATO SAP programs exist, each with specific constraints:
505///
506/// - **ATOMAL**: Applies to CTS, NS, and NC levels. Space-separated in
507/// banner (`COSMIC TOP SECRET ATOMAL`). Portion marks: CTSA, NSAT, NCA.
508/// Alternative portion forms CTS-A, NS-A, NC-A also appear in practice.
509/// - **BOHEMIA**: CTS-only. Hyphenated (`COSMIC TOP SECRET-BOHEMIA` → `CTS-B`).
510/// - **BALK**: CTS-only, exercise replacement for BOHEMIA.
511/// Hyphenated (`COSMIC TOP SECRET-BALK` → `CTS-BALK`).
512///
513/// Per the CAPCO Register, bare `COSMIC TOP SECRET` requires either
514/// BOHEMIA or BALK — standalone CTS without a SAP suffix is an error.
515#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
516pub enum NatoClassification {
517 NatoUnclassified, // NU
518 NatoRestricted, // NR
519 NatoConfidential, // NC
520 NatoConfidentialAtomal, // NCA (alt: NC-A)
521 NatoSecret, // NS
522 NatoSecretAtomal, // NSAT (alt: NS-A)
523 CosmicTopSecret, // CTS (requires BOHEMIA or BALK)
524 CosmicTopSecretAtomal, // CTSA (alt: CTS-A)
525 CosmicTopSecretBohemia, // CTS-B
526 CosmicTopSecretBalk, // CTS-BALK
527}
528
529impl NatoClassification {
530 /// Banner form (full words, as used in banner marking lines).
531 pub fn banner_str(self) -> &'static str {
532 match self {
533 Self::NatoUnclassified => "NATO UNCLASSIFIED",
534 Self::NatoRestricted => "NATO RESTRICTED",
535 Self::NatoConfidential => "NATO CONFIDENTIAL",
536 Self::NatoConfidentialAtomal => "NATO CONFIDENTIAL ATOMAL",
537 Self::NatoSecret => "NATO SECRET",
538 Self::NatoSecretAtomal => "NATO SECRET ATOMAL",
539 Self::CosmicTopSecret => "COSMIC TOP SECRET",
540 Self::CosmicTopSecretAtomal => "COSMIC TOP SECRET ATOMAL",
541 Self::CosmicTopSecretBohemia => "COSMIC TOP SECRET-BOHEMIA",
542 Self::CosmicTopSecretBalk => "COSMIC TOP SECRET-BALK",
543 }
544 }
545
546 /// Portion form (primary abbreviation from the CAPCO Register).
547 pub fn portion_str(self) -> &'static str {
548 match self {
549 Self::NatoUnclassified => "NU",
550 Self::NatoRestricted => "NR",
551 Self::NatoConfidential => "NC",
552 Self::NatoConfidentialAtomal => "NCA",
553 Self::NatoSecret => "NS",
554 Self::NatoSecretAtomal => "NSAT",
555 Self::CosmicTopSecret => "CTS",
556 Self::CosmicTopSecretAtomal => "CTSA",
557 Self::CosmicTopSecretBohemia => "CTS-B",
558 Self::CosmicTopSecretBalk => "CTS-BALK",
559 }
560 }
561
562 /// The base classification level (without SAP), for ordering comparisons.
563 pub fn base_level(self) -> NatoLevel {
564 match self {
565 Self::NatoUnclassified => NatoLevel::NatoUnclassified,
566 Self::NatoRestricted => NatoLevel::NatoRestricted,
567 Self::NatoConfidential | Self::NatoConfidentialAtomal => NatoLevel::NatoConfidential,
568 Self::NatoSecret | Self::NatoSecretAtomal => NatoLevel::NatoSecret,
569 Self::CosmicTopSecret
570 | Self::CosmicTopSecretAtomal
571 | Self::CosmicTopSecretBohemia
572 | Self::CosmicTopSecretBalk => NatoLevel::CosmicTopSecret,
573 }
574 }
575
576 /// Map the NATO level to the equivalent US classification for conflict
577 /// resolution (US wins at the greater of the two).
578 pub fn us_equivalent(self) -> Classification {
579 match self.base_level() {
580 NatoLevel::NatoUnclassified => Classification::Unclassified,
581 NatoLevel::NatoRestricted => Classification::Restricted,
582 NatoLevel::NatoConfidential => Classification::Confidential,
583 NatoLevel::NatoSecret => Classification::Secret,
584 NatoLevel::CosmicTopSecret => Classification::TopSecret,
585 }
586 }
587}
588
589/// NATO classification level without SAP, for ordering comparisons.
590#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
591pub enum NatoLevel {
592 NatoUnclassified,
593 NatoRestricted,
594 NatoConfidential,
595 NatoSecret,
596 CosmicTopSecret,
597}
598
599// ---------------------------------------------------------------------------
600// JOINT classification
601// ---------------------------------------------------------------------------
602
603/// JOINT classification: US is co-owner with other nations.
604///
605/// `//JOINT S USA GBR//REL TO USA, GBR`
606///
607/// Country list is space-delimited (NOT comma-delimited like REL TO).
608/// Must include USA. All JOINT participants must also appear in REL TO.
609#[derive(Debug, Clone, PartialEq, Eq)]
610pub struct JointClassification {
611 /// Classification level (US ladder, includes RESTRICTED).
612 pub level: Classification,
613 /// Co-owning countries (space-delimited in source). Must include USA.
614 pub countries: Box<[CountryCode]>,
615}
616
617// ---------------------------------------------------------------------------
618// Atomic Energy Act markings
619// ---------------------------------------------------------------------------
620
621/// Atomic Energy Act information markings (CAPCO Register §6).
622///
623/// AEA markings appear as a single `//`-delimited block in the marking string,
624/// using hyphen separators for compound forms:
625/// - `SECRET//RD//NOFORN` — RD alone
626/// - `SECRET//RD-CNWDI//NOFORN` — RD with CNWDI modifier
627/// - `SECRET//RD-SIGMA 20//NOFORN` — RD with SIGMA compartment
628/// - `SECRET//RD-SIGMA 18 20//NOFORN` — RD with multiple SIGMAs
629/// - `SECRET//FRD//NOFORN` — FRD alone
630/// - `SECRET//FRD-SIGMA 14//NOFORN` — FRD with SIGMA
631///
632/// Standalone (non-compound) markings:
633/// - `UNCLASSIFIED//DOD UCNI` / `(U//DCNI)`
634/// - `UNCLASSIFIED//DOE UCNI` / `(U//UCNI)`
635/// - `SECRET//TFNI//NOFORN` / `(S//TFNI//NF)`
636///
637/// # Key rules (CAPCO-2016)
638///
639/// - RD and FRD always require NOFORN unless a sharing agreement exists
640/// (default severity: Error, configurable to Warn via `.marque.toml`)
641/// - CNWDI may only be used with TS or S RD (not standalone, not with FRD)
642/// - SIGMA 14, 15, 18, 20 may only be used with TS or S RD or FRD
643/// - RD takes precedence over FRD and TFNI in both banners and portions
644/// - SIGMA numbers must be in numerical order, space-separated
645#[derive(Debug, Clone, PartialEq, Eq, Hash)]
646#[non_exhaustive]
647pub enum AeaMarking {
648 /// Compound RD block: `RD`, `RD-CNWDI`, `RD-SIGMA 20`, `RD-CNWDI-SIGMA 18 20`
649 Rd(RdBlock),
650 /// Compound FRD block: `FRD`, `FRD-SIGMA 14`
651 Frd(FrdBlock),
652 /// DOD UCNI / DCNI — standalone, unclassified only
653 DodUcni,
654 /// DOE UCNI / UCNI — standalone, unclassified only
655 DoeUcni,
656 /// TFNI — standalone
657 Tfni,
658}
659
660/// Restricted Data block with optional modifiers.
661///
662/// Rendered as `RD`, `RD-CNWDI`, `RD-SIGMA 20`, or `RD-CNWDI-SIGMA 18 20`.
663#[derive(Debug, Clone, PartialEq, Eq, Hash)]
664pub struct RdBlock {
665 /// Whether CNWDI is present. Only valid with TS or S classification.
666 pub cnwdi: bool,
667 /// SIGMA compartment numbers (14, 15, 18, 20). Must be in numerical order.
668 /// Empty if no SIGMA designation.
669 pub sigma: Box<[u8]>,
670}
671
672impl Default for RdBlock {
673 fn default() -> Self {
674 Self {
675 cnwdi: false,
676 sigma: Box::new([]),
677 }
678 }
679}
680
681/// Formerly Restricted Data block with optional SIGMA modifier.
682///
683/// Rendered as `FRD` or `FRD-SIGMA 14`.
684#[derive(Debug, Clone, PartialEq, Eq, Hash)]
685pub struct FrdBlock {
686 /// SIGMA compartment numbers. Must be in numerical order.
687 /// Empty if no SIGMA designation.
688 pub sigma: Box<[u8]>,
689}
690
691impl Default for FrdBlock {
692 fn default() -> Self {
693 Self {
694 sigma: Box::new([]),
695 }
696 }
697}
698
699impl AeaMarking {
700 /// Banner-line form.
701 pub fn banner_str(&self) -> String {
702 match self {
703 Self::Rd(rd) => {
704 let mut s = "RD".to_owned();
705 if rd.cnwdi {
706 s.push_str("-CNWDI");
707 }
708 if !rd.sigma.is_empty() {
709 s.push_str("-SIGMA ");
710 let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
711 s.push_str(&nums.join(" "));
712 }
713 s
714 }
715 Self::Frd(frd) => {
716 let mut s = "FRD".to_owned();
717 if !frd.sigma.is_empty() {
718 s.push_str("-SIGMA ");
719 let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
720 s.push_str(&nums.join(" "));
721 }
722 s
723 }
724 Self::DodUcni => "DOD UCNI".to_owned(),
725 Self::DoeUcni => "DOE UCNI".to_owned(),
726 Self::Tfni => "TFNI".to_owned(),
727 }
728 }
729
730 /// Portion mark form.
731 pub fn portion_str(&self) -> String {
732 match self {
733 Self::Rd(rd) => {
734 let mut s = "RD".to_owned();
735 if rd.cnwdi {
736 s.push_str("-CNWDI");
737 }
738 if !rd.sigma.is_empty() {
739 s.push_str("-SG ");
740 let nums: Vec<String> = rd.sigma.iter().map(|n| n.to_string()).collect();
741 s.push_str(&nums.join(" "));
742 }
743 s
744 }
745 Self::Frd(frd) => {
746 let mut s = "FRD".to_owned();
747 if !frd.sigma.is_empty() {
748 s.push_str("-SG ");
749 let nums: Vec<String> = frd.sigma.iter().map(|n| n.to_string()).collect();
750 s.push_str(&nums.join(" "));
751 }
752 s
753 }
754 Self::DodUcni => "DCNI".to_owned(),
755 Self::DoeUcni => "UCNI".to_owned(),
756 Self::Tfni => "TFNI".to_owned(),
757 }
758 }
759
760 /// Parse a `//`-delimited AEA block from either banner or portion form.
761 ///
762 /// Handles compound tokens: `RD`, `RD-CNWDI`, `RD-SIGMA 20`,
763 /// `RD-CNWDI-SIGMA 18 20`, `FRD`, `FRD-SIGMA 14`, etc.
764 pub fn parse(s: &str) -> Option<Self> {
765 // Standalone non-compound markings.
766 match s {
767 "DOD UCNI" | "DCNI" => return Some(Self::DodUcni),
768 "DOE UCNI" | "UCNI" => return Some(Self::DoeUcni),
769 "TFNI" | "TRANSCLASSIFIED FOREIGN NUCLEAR INFORMATION" => return Some(Self::Tfni),
770 _ => {}
771 }
772
773 // RD compound block: RD, RD-CNWDI, RD-SIGMA ##, RD-CNWDI-SIGMA ##,
774 // RESTRICTED DATA, RESTRICTED DATA-CNWDI, etc.
775 if s == "RD" || s == "RESTRICTED DATA" {
776 return Some(Self::Rd(RdBlock::default()));
777 }
778 if let Some(rest) = s
779 .strip_prefix("RD-")
780 .or_else(|| s.strip_prefix("RESTRICTED DATA-"))
781 {
782 return Self::parse_rd_modifiers(rest);
783 }
784
785 // FRD compound block: FRD, FRD-SIGMA ##,
786 // FORMERLY RESTRICTED DATA, etc.
787 if s == "FRD" || s == "FORMERLY RESTRICTED DATA" {
788 return Some(Self::Frd(FrdBlock::default()));
789 }
790 if let Some(rest) = s
791 .strip_prefix("FRD-")
792 .or_else(|| s.strip_prefix("FORMERLY RESTRICTED DATA-"))
793 {
794 return Self::parse_frd_modifiers(rest);
795 }
796
797 None
798 }
799
800 /// Parse RD modifiers after the `RD-` prefix.
801 /// Handles: `CNWDI`, `SIGMA ##`, `CNWDI-SIGMA ##`, `SG ##`, `CNWDI-SG ##`.
802 fn parse_rd_modifiers(s: &str) -> Option<Self> {
803 let mut cnwdi = false;
804 let mut rest = s;
805
806 // Check for CNWDI prefix.
807 if let Some(after) = rest.strip_prefix("CNWDI") {
808 cnwdi = true;
809 rest = after.strip_prefix('-').unwrap_or(after);
810 } else if rest == "N" {
811 // DoD shorthand: RD-N means RD-CNWDI (per CAPCO-2016 §6)
812 return Some(Self::Rd(RdBlock {
813 cnwdi: true,
814 sigma: Box::new([]),
815 }));
816 }
817
818 // Check for SIGMA/SG.
819 let sigma = parse_sigma_numbers(rest);
820
821 if rest.is_empty() || !sigma.is_empty() {
822 Some(Self::Rd(RdBlock {
823 cnwdi,
824 sigma: sigma.into(),
825 }))
826 } else {
827 None
828 }
829 }
830
831 /// Parse FRD modifiers after the `FRD-` prefix.
832 /// Handles: `SIGMA ##`, `SG ##`.
833 fn parse_frd_modifiers(s: &str) -> Option<Self> {
834 let sigma = parse_sigma_numbers(s);
835 if !sigma.is_empty() {
836 Some(Self::Frd(FrdBlock {
837 sigma: sigma.into(),
838 }))
839 } else {
840 None
841 }
842 }
843}
844
845/// Parse SIGMA/SG numbers from a string like `SIGMA 18 20` or `SG 14`.
846fn parse_sigma_numbers(s: &str) -> Vec<u8> {
847 let rest = s
848 .strip_prefix("SIGMA ")
849 .or_else(|| s.strip_prefix("SG "))
850 .unwrap_or("");
851 if rest.is_empty() {
852 return vec![];
853 }
854 rest.split_whitespace()
855 .filter_map(|n| n.parse::<u8>().ok())
856 .collect()
857}
858
859impl std::fmt::Display for AeaMarking {
860 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
861 f.write_str(&self.portion_str())
862 }
863}
864
865// ---------------------------------------------------------------------------
866// FGI marker (in US-classified markings)
867// ---------------------------------------------------------------------------
868
869/// FGI marker in a US-classified marking: `FGI` or `FGI [LIST]`.
870///
871/// Appears in the FGI block (after SAR, before dissem controls) when a
872/// US-classified document references foreign government information.
873///
874/// This is NOT the same as [`FgiClassification`] — that represents a
875/// marking where the classification itself IS foreign. This marker says
876/// "this US-classified marking contains foreign government information."
877///
878/// An empty `countries` list represents source-concealed FGI (no country
879/// attribution). If a document mixes source-concealed and source-acknowledged
880/// FGI portions, the banner must use the bare `FGI` form without countries
881/// to avoid compromising the concealed source.
882#[derive(Debug, Clone, PartialEq, Eq)]
883pub struct FgiMarker {
884 /// Countries (space-delimited in source).
885 /// Empty for source-concealed FGI.
886 pub countries: Box<[CountryCode]>,
887}
888
889// ===========================================================================
890// Non-IC dissemination controls
891// ===========================================================================
892
893/// Non-Intelligence Community dissemination control markings (CAPCO Register §9).
894///
895/// These operate under a separate authority framework from IC dissem controls.
896/// In classified documents, most non-IC dissem controls appear **only in portion
897/// markings** — they are stripped from banners. However, some controls propagate
898/// to classified banners: LIMDIS (NGA Title 10), LES, LES-NF, and SSI. See
899/// [`NonIcDissem::propagates_to_classified_banner`] for the authoritative list.
900/// When the page is **unclassified**, all non-IC dissem controls propagate to
901/// the banner.
902///
903/// LES-NF and SBU-NF carry NOFORN treatment even when stripped from the banner.
904///
905/// # CUI note
906///
907/// CUI (Controlled Unclassified Information) is recognized but not validated.
908/// Full CUI rule support is planned for a dedicated crate. The IC equivalent
909/// (FOUO) remains in active use in the `DissemControl` enum.
910#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
911#[non_exhaustive]
912pub enum NonIcDissem {
913 /// LIMITED DISTRIBUTION / LIMDIS / DS
914 Limdis,
915 /// EXCLUSIVE DISTRIBUTION / EXDIS / XD
916 Exdis,
917 /// NO DISTRIBUTION / NODIS / ND
918 Nodis,
919 /// SENSITIVE BUT UNCLASSIFIED / SBU / SBU
920 Sbu,
921 /// SENSITIVE BUT UNCLASSIFIED NOFORN / SBU NOFORN / SBU-NF
922 /// Carries NOFORN treatment even when stripped from banner.
923 SbuNf,
924 /// LAW ENFORCEMENT SENSITIVE / LES / LES
925 Les,
926 /// LAW ENFORCEMENT SENSITIVE NOFORN / LES NOFORN / LES-NF
927 /// Carries NOFORN treatment even when stripped from banner.
928 LesNf,
929 /// SENSITIVE SECURITY INFORMATION / SSI / SSI
930 Ssi,
931}
932
933impl NonIcDissem {
934 /// Banner-line abbreviation form.
935 pub fn banner_str(self) -> &'static str {
936 match self {
937 Self::Limdis => "LIMDIS",
938 Self::Exdis => "EXDIS",
939 Self::Nodis => "NODIS",
940 Self::Sbu => "SBU",
941 Self::SbuNf => "SBU NOFORN",
942 Self::Les => "LES",
943 Self::LesNf => "LES NOFORN",
944 Self::Ssi => "SSI",
945 }
946 }
947
948 /// Portion mark abbreviation.
949 pub fn portion_str(self) -> &'static str {
950 match self {
951 Self::Limdis => "DS",
952 Self::Exdis => "XD",
953 Self::Nodis => "ND",
954 Self::Sbu => "SBU",
955 Self::SbuNf => "SBU-NF",
956 Self::Les => "LES",
957 Self::LesNf => "LES-NF",
958 Self::Ssi => "SSI",
959 }
960 }
961
962 /// Parse from either banner or portion form.
963 pub fn parse(s: &str) -> Option<Self> {
964 match s {
965 "LIMDIS" | "DS" => Some(Self::Limdis),
966 "EXDIS" | "XD" => Some(Self::Exdis),
967 "NODIS" | "ND" => Some(Self::Nodis),
968 "SBU" => Some(Self::Sbu),
969 "SBU NOFORN" | "SBU-NF" => Some(Self::SbuNf),
970 "LES" => Some(Self::Les),
971 "LES NOFORN" | "LES-NF" => Some(Self::LesNf),
972 "SSI" => Some(Self::Ssi),
973 _ => None,
974 }
975 }
976
977 /// Returns true if this control carries NOFORN treatment.
978 pub fn carries_noforn(self) -> bool {
979 matches!(self, Self::SbuNf | Self::LesNf)
980 }
981
982 /// Returns true if this control propagates to classified banners.
983 ///
984 /// Authoritative source: `crates/capco/docs/CAPCO-2016.md` §H.9
985 /// "Precedence Rules for Banner Line Guidance" for each marking.
986 /// The per-marking rows below cite the specific line of the vendored
987 /// manual.
988 ///
989 /// | Marking | Propagates | Source (CAPCO-2016 §H.9) |
990 /// |----------|------------|----------------------------------------------------------------------------------------------------------------------|
991 /// | LIMDIS | no | line 4180: "When a document contains LIMDIS and classified portions, LIMDIS is not used in the banner line." |
992 /// | EXDIS | yes | line 4240: "If EXDIS is contained in any portion … EXDIS must appear in the banner line." Example banner: `SECRET//NOFORN//EXDIS` |
993 /// | NODIS | yes | line 4300: "If NODIS is contained in any portion of a document, it must appear in the banner line." Example banner: `SECRET//NOFORN//NODIS` |
994 /// | SBU | no | line 4358: "When a document contains SBU and classified portions, SBU is not used in the banner line." |
995 /// | SBU-NF | no (†) | line 4408: SBU NOFORN "Applicable only to unclassified information." (The §H.9 notional example on p179 shows a `SECRET//NOFORN` banner with a `(U//SBU-NF)` portion — SBU-NF absent from banner.) |
996 /// | LES | yes | line 4479: "The LES marking always appears in the banner line if contained in any portion, regardless of classification level." |
997 /// | LES-NF | yes (*) | line 4557: "The LES marking always appears in the banner line if LES information (either LES or LES NOFORN) is contained in the document, regardless of the document's classification level." |
998 /// | SSI | yes | line 4651: "If the SSI marking is contained in any portion of a document it must appear in the banner line, regardless of the document's overall classification level." |
999 ///
1000 /// (*) LES-NF carries a §H.9 canonicalization that is **not modeled
1001 /// here**: in classified docs, `LES NOFORN` → `LES` at the banner
1002 /// with NOFORN split into the dissem block (line 4558: "the 'LES'
1003 /// marking is used in the banner line and the NOFORN marking is
1004 /// applied as a Dissemination Control Marking. For example:
1005 /// `SECRET//NOFORN//LES`."). The split itself is handled by
1006 /// [`crate::PageContext::expected_non_ic_dissem`]; this predicate
1007 /// only answers the binary "does the marking appear in the
1008 /// banner at all?" question, which is what W003 consumes.
1009 /// Treating `SECRET//LES NOFORN` as non-canonical (so that the
1010 /// canonicalization becomes fixable) is a separate page-rewrite
1011 /// concern, not a W003 concern.
1012 ///
1013 /// (†) "Does not propagate" for SBU-NF refers to the **SBU** half of
1014 /// the marking — the literal `SBU NOFORN` banner form is
1015 /// non-canonical in a classified document per §H.9 line 4408
1016 /// ("applicable only to unclassified information"). The **NOFORN
1017 /// half does propagate** via
1018 /// [`crate::PageContext::expected_non_ic_dissem`], which splits a
1019 /// portion-level `SBU-NF` into `SBU + NF-flag` and emits the
1020 /// resulting NOFORN into the classified banner's dissem block.
1021 /// So a document with a `(U//SBU-NF)` portion rolls up to a
1022 /// `SECRET//NOFORN` banner (NOFORN present, SBU dropped), not
1023 /// `SECRET//SBU NOFORN`. W003 therefore fires on the literal
1024 /// `SECRET//SBU NOFORN` banner input because that surface form
1025 /// is the non-canonical one, not because NOFORN is disallowed.
1026 pub fn propagates_to_classified_banner(self) -> bool {
1027 match self {
1028 // Do NOT propagate — banner-absent in classified documents.
1029 Self::Limdis | Self::Sbu | Self::SbuNf => false,
1030 // DO propagate — "must appear in the banner line" per §H.9.
1031 Self::Exdis | Self::Nodis | Self::Les | Self::LesNf | Self::Ssi => true,
1032 }
1033 }
1034
1035 /// All valid values.
1036 pub const ALL: &[NonIcDissem] = &[
1037 Self::Limdis,
1038 Self::Exdis,
1039 Self::Nodis,
1040 Self::Sbu,
1041 Self::SbuNf,
1042 Self::Les,
1043 Self::LesNf,
1044 Self::Ssi,
1045 ];
1046}
1047
1048impl std::fmt::Display for NonIcDissem {
1049 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1050 f.write_str(self.portion_str())
1051 }
1052}
1053
1054// ===========================================================================
1055// CountryCode
1056// ===========================================================================
1057
1058/// Maximum byte length of a CAPCO country code.
1059///
1060/// The longest entry in `CVEnumISMCATRelTo.xsd` is `AUSTRALIA_GROUP`
1061/// (15 bytes); 16 leaves one byte of headroom for any future
1062/// addition without forcing a struct-layout change.
1063const COUNTRY_CODE_CAPACITY: usize = 16;
1064
1065/// A CAPCO country / country-group code, 2–16 ASCII bytes.
1066///
1067/// Covers every entry in the CVE country code list:
1068/// - 1× 2-char (`EU`)
1069/// - 280× 3-char trigraphs (`USA`, `GBR`, `AUS`, …)
1070/// - 58× 4-char tetragraphs / country-group codes (`FVEY`, `ACGU`,
1071/// `NATO`, `RSMA`, …)
1072/// - 1× 15-char (`AUSTRALIA_GROUP`)
1073///
1074/// The inner bytes are private; construction goes through
1075/// [`CountryCode::try_new`] which enforces the CAPCO byte-set invariant
1076/// (ASCII uppercase letters, ASCII digits, underscore — covers `AX2`,
1077/// `AX3`, `AUSTRALIA_GROUP`, and the standard alpha trigraphs/
1078/// tetragraphs) so that [`CountryCode::as_str`] can return a `&str`
1079/// infallibly without panicking at runtime.
1080///
1081/// `Copy` is preserved so the type composes in iterator chains and
1082/// `BTreeSet`-based intersection without manual `.clone()` calls.
1083/// The fixed-array form keeps each `CountryCode` entry inline in
1084/// `IsmAttributes::rel_to` (`Box<[CountryCode]>`) on the parsing
1085/// hot path — no per-code heap allocation.
1086#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1087pub struct CountryCode {
1088 /// Code bytes, zero-padded after `len`. Derived `Ord` compares
1089 /// lexicographically on the padded bytes; zero-padding makes
1090 /// shorter codes with a shared prefix sort first, matching `&str`
1091 /// ordering on ASCII.
1092 bytes: [u8; COUNTRY_CODE_CAPACITY],
1093 /// Active byte count, `2..=COUNTRY_CODE_CAPACITY`.
1094 len: u8,
1095}
1096
1097impl CountryCode {
1098 /// The always-valid `USA` country code constant.
1099 ///
1100 /// Constructed via [`CountryCode::try_new`] in `const` context;
1101 /// the `expect` is statically unreachable for `b"USA"` (3 bytes,
1102 /// all ASCII uppercase) and exists only to satisfy `const`
1103 /// unwrap.
1104 pub const USA: Self = match Self::try_new(b"USA") {
1105 Some(c) => c,
1106 None => panic!("CountryCode::USA literal must satisfy try_new invariants"),
1107 };
1108
1109 /// Returns `true` if `b` is in the CAPCO country-code byte set:
1110 /// ASCII uppercase letter, ASCII digit, or underscore. Digits cover
1111 /// `AX2`/`AX3`; underscore covers `AUSTRALIA_GROUP`.
1112 #[inline]
1113 const fn is_valid_byte(b: u8) -> bool {
1114 b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_'
1115 }
1116
1117 /// Attempt to construct a country code from a byte slice.
1118 ///
1119 /// Returns `None` if `bytes`:
1120 /// - is shorter than 2 bytes (`EU` is the shortest CVE entry) or
1121 /// longer than [`COUNTRY_CODE_CAPACITY`] bytes
1122 /// - contains any byte outside the CAPCO country-code byte set
1123 /// (ASCII uppercase letter, ASCII digit, underscore)
1124 ///
1125 /// Membership in the CVE recognition set is a separate check —
1126 /// see [`crate::CapcoTokenSet::is_trigraph`] (the trait method
1127 /// covers any known country code, not only 3-char trigraphs).
1128 #[inline]
1129 pub const fn try_new(bytes: &[u8]) -> Option<Self> {
1130 let len = bytes.len();
1131 if len < 2 || len > COUNTRY_CODE_CAPACITY {
1132 return None;
1133 }
1134 let mut padded = [0u8; COUNTRY_CODE_CAPACITY];
1135 let mut i = 0;
1136 while i < len {
1137 if !Self::is_valid_byte(bytes[i]) {
1138 return None;
1139 }
1140 padded[i] = bytes[i];
1141 i += 1;
1142 }
1143 Some(Self {
1144 bytes: padded,
1145 len: len as u8,
1146 })
1147 }
1148
1149 /// Return the country code as a string slice.
1150 ///
1151 /// Infallible because construction via [`CountryCode::try_new`]
1152 /// (or [`CountryCode::USA`]) guarantees every active byte is in the
1153 /// CAPCO byte set, which is a subset of ASCII / valid UTF-8.
1154 #[inline]
1155 pub fn as_str(&self) -> &str {
1156 // SAFETY: `CountryCode` can only be constructed via
1157 // `try_new` or constants (e.g. `CountryCode::USA`) that
1158 // route through `try_new` in const context. Both paths
1159 // require every active byte to be ASCII uppercase, ASCII
1160 // digit, or underscore. ASCII is a subset of valid UTF-8.
1161 #[allow(unsafe_code)]
1162 unsafe {
1163 std::str::from_utf8_unchecked(self.as_bytes())
1164 }
1165 }
1166
1167 /// Active byte slice (excludes the zero padding).
1168 #[inline]
1169 pub fn as_bytes(&self) -> &[u8] {
1170 &self.bytes[..self.len as usize]
1171 }
1172
1173 /// Number of active bytes, `2..=COUNTRY_CODE_CAPACITY`.
1174 #[inline]
1175 pub const fn len(&self) -> usize {
1176 self.len as usize
1177 }
1178
1179 /// Always `false` — `CountryCode` invariants forbid empty codes.
1180 /// Provided for clippy-`len_without_is_empty` compliance.
1181 #[inline]
1182 pub const fn is_empty(&self) -> bool {
1183 false
1184 }
1185}
1186
1187impl std::fmt::Display for CountryCode {
1188 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1189 f.write_str(self.as_str())
1190 }
1191}
1192
1193#[cfg(test)]
1194#[cfg_attr(coverage_nightly, coverage(off))]
1195mod country_code_tests {
1196 use super::CountryCode;
1197
1198 #[test]
1199 fn try_new_accepts_two_byte_eu() {
1200 let eu = CountryCode::try_new(b"EU").unwrap();
1201 assert_eq!(eu.as_str(), "EU");
1202 assert_eq!(eu.len(), 2);
1203 }
1204
1205 #[test]
1206 fn try_new_accepts_three_byte_trigraph() {
1207 let usa = CountryCode::try_new(b"USA").unwrap();
1208 assert_eq!(usa, CountryCode::USA);
1209 assert_eq!(usa.as_str(), "USA");
1210 }
1211
1212 #[test]
1213 fn try_new_accepts_four_byte_tetragraph() {
1214 let fvey = CountryCode::try_new(b"FVEY").unwrap();
1215 assert_eq!(fvey.as_str(), "FVEY");
1216 assert_eq!(fvey.len(), 4);
1217 }
1218
1219 #[test]
1220 fn try_new_accepts_australia_group_with_underscore() {
1221 let ag = CountryCode::try_new(b"AUSTRALIA_GROUP").unwrap();
1222 assert_eq!(ag.as_str(), "AUSTRALIA_GROUP");
1223 assert_eq!(ag.len(), 15);
1224 }
1225
1226 #[test]
1227 fn try_new_accepts_digits_in_ax2_ax3() {
1228 assert_eq!(CountryCode::try_new(b"AX2").unwrap().as_str(), "AX2");
1229 assert_eq!(CountryCode::try_new(b"AX3").unwrap().as_str(), "AX3");
1230 }
1231
1232 #[test]
1233 fn try_new_rejects_too_short() {
1234 assert!(CountryCode::try_new(b"").is_none());
1235 assert!(CountryCode::try_new(b"X").is_none());
1236 }
1237
1238 #[test]
1239 fn try_new_rejects_too_long() {
1240 // 17 bytes — one over capacity.
1241 assert!(CountryCode::try_new(b"ABCDEFGHIJKLMNOPQ").is_none());
1242 }
1243
1244 #[test]
1245 fn try_new_rejects_lowercase() {
1246 assert!(CountryCode::try_new(b"usa").is_none());
1247 assert!(CountryCode::try_new(b"Fvey").is_none());
1248 }
1249
1250 #[test]
1251 fn try_new_rejects_non_ascii() {
1252 // 'É' is two UTF-8 bytes (0xC3 0x89); first byte fails the
1253 // is_valid_byte check.
1254 let bytes = "ÉU".as_bytes();
1255 assert!(CountryCode::try_new(bytes).is_none());
1256 }
1257
1258 #[test]
1259 fn ord_matches_str_lex_for_mixed_lengths() {
1260 let eu = CountryCode::try_new(b"EU").unwrap();
1261 let aus = CountryCode::try_new(b"AUS").unwrap();
1262 let usa = CountryCode::USA;
1263 let usab = CountryCode::try_new(b"USAB").unwrap();
1264 let mut all = [eu, aus, usa, usab];
1265 all.sort();
1266 assert_eq!(all[0].as_str(), "AUS");
1267 assert_eq!(all[1].as_str(), "EU");
1268 assert_eq!(all[2].as_str(), "USA");
1269 assert_eq!(all[3].as_str(), "USAB");
1270 }
1271
1272 #[test]
1273 fn copy_semantics_preserved() {
1274 let original = CountryCode::USA;
1275 let copy = original;
1276 // Both still usable — `Copy` not `Move`.
1277 assert_eq!(original, copy);
1278 assert_eq!(original.as_str(), copy.as_str());
1279 }
1280
1281 #[test]
1282 fn display_renders_active_bytes_only() {
1283 // Display impl writes the active byte slice; the zero
1284 // padding past `len` must never reach the formatter.
1285 let usa = CountryCode::USA;
1286 let fvey = CountryCode::try_new(b"FVEY").unwrap();
1287 let ag = CountryCode::try_new(b"AUSTRALIA_GROUP").unwrap();
1288 assert_eq!(format!("{usa}"), "USA");
1289 assert_eq!(format!("{fvey}"), "FVEY");
1290 assert_eq!(format!("{ag}"), "AUSTRALIA_GROUP");
1291 }
1292
1293 #[test]
1294 fn as_bytes_excludes_zero_padding() {
1295 let usa = CountryCode::USA;
1296 assert_eq!(usa.as_bytes(), b"USA");
1297 let fvey = CountryCode::try_new(b"FVEY").unwrap();
1298 assert_eq!(fvey.as_bytes(), b"FVEY");
1299 }
1300
1301 #[test]
1302 fn is_empty_invariant_always_false() {
1303 // `try_new` rejects `len < 2`, so a constructed `CountryCode`
1304 // is never empty. `is_empty` exists only to satisfy clippy's
1305 // `len_without_is_empty`; pin the invariant so a future
1306 // refactor that loosens `try_new` is forced to revisit it.
1307 assert!(!CountryCode::USA.is_empty());
1308 assert!(!CountryCode::try_new(b"EU").unwrap().is_empty());
1309 assert!(!CountryCode::try_new(b"AUSTRALIA_GROUP").unwrap().is_empty());
1310 }
1311
1312 #[test]
1313 fn usa_constant_matches_try_new() {
1314 // `pub const USA` constructs via `try_new` in const context.
1315 // Pin the equivalence so a future change to either path
1316 // (e.g., adding a normalization step to `try_new` but not
1317 // the const constructor) breaks loudly.
1318 let runtime = CountryCode::try_new(b"USA").unwrap();
1319 assert_eq!(CountryCode::USA, runtime);
1320 assert_eq!(CountryCode::USA.as_bytes(), runtime.as_bytes());
1321 assert_eq!(CountryCode::USA.len(), runtime.len());
1322 }
1323}
1324
1325// ===========================================================================
1326// SCI structural types (spec 003-sci-compartments)
1327// ===========================================================================
1328
1329/// A fully-parsed SCI category-block entry.
1330///
1331/// A banner or portion may carry multiple `SciMarking` entries separated by
1332/// `/` within one SCI category block (e.g., `//SI-G/TK-BLFH//`).
1333///
1334/// Construction is restricted to [`SciMarking::new`] (the struct is
1335/// `#[non_exhaustive]`) so new fields can be added without breaking the
1336/// parser.
1337#[non_exhaustive]
1338#[derive(Debug, Clone, PartialEq, Eq)]
1339pub struct SciMarking {
1340 /// The control-system anchor. One of the published bare control
1341 /// systems (see [`SciControlBare`]) or a structurally-parsed custom
1342 /// value.
1343 pub system: SciControlSystem,
1344
1345 /// Compartments in source order. Sort-order validation is the concern
1346 /// of CAPCO rule E033 (not the parser).
1347 pub compartments: Box<[SciCompartment]>,
1348
1349 /// If the `{system}-{first_compartment}` composite exactly matches an
1350 /// ODNI CVE value (e.g., `SI-G`, `HCS-P`, `TK-BLFH`), this records the
1351 /// matching [`SciControl`] variant. Only populated when the matching
1352 /// compartment has NO sub-compartments — sub-compartments imply the
1353 /// compound is a structural anchor rather than a CVE atom. `None`
1354 /// otherwise.
1355 pub canonical_enum: Option<SciControl>,
1356}
1357
1358impl SciMarking {
1359 /// Construct a new `SciMarking`. Used by the parser (`marque-core`) to
1360 /// populate [`IsmAttributes::sci_markings`].
1361 pub fn new(
1362 system: SciControlSystem,
1363 compartments: Box<[SciCompartment]>,
1364 canonical_enum: Option<SciControl>,
1365 ) -> Self {
1366 Self {
1367 system,
1368 compartments,
1369 canonical_enum,
1370 }
1371 }
1372}
1373
1374/// Which kind of SCI control system a [`SciMarking`] anchors on.
1375///
1376/// This is a closed set of two variants: either a published bare system
1377/// drawn from the live ODNI CVE, or an agency-allocated custom identifier
1378/// (per CAPCO-2016 §A.6 p15).
1379#[derive(Debug, Clone, PartialEq, Eq)]
1380pub enum SciControlSystem {
1381 /// One of the published bare control systems.
1382 Published(SciControlBare),
1383 /// An agency-allocated system matching `[A-Z0-9]{2,5}` (per CAPCO-2016
1384 /// §A.6 p15 `123` example). Stores the raw text exactly as it appeared
1385 /// in the source.
1386 Custom(Box<str>),
1387}
1388
1389/// A single compartment under an SCI control system.
1390///
1391/// Compartments carry an identifier plus zero or more sub-compartments in
1392/// source order. Construction is restricted to [`SciCompartment::new`]
1393/// (the struct is `#[non_exhaustive]`).
1394#[non_exhaustive]
1395#[derive(Debug, Clone, PartialEq, Eq)]
1396pub struct SciCompartment {
1397 /// Compartment identifier (alphanumeric). Example: `G` in `SI-G`.
1398 pub identifier: Box<str>,
1399 /// Sub-compartments in source order. Example: `ABCD`, `DEFG` in
1400 /// `SI-G ABCD DEFG`.
1401 pub sub_compartments: Box<[Box<str>]>,
1402}
1403
1404impl SciCompartment {
1405 /// Construct a new `SciCompartment`. Used by the parser to populate
1406 /// [`SciMarking::compartments`].
1407 pub fn new(identifier: Box<str>, sub_compartments: Box<[Box<str>]>) -> Self {
1408 Self {
1409 identifier,
1410 sub_compartments,
1411 }
1412 }
1413}
1414
1415#[cfg(test)]
1416#[cfg_attr(coverage_nightly, coverage(off))]
1417mod tests {
1418 use super::*;
1419
1420 #[test]
1421 fn classification_ord_is_restrictiveness() {
1422 assert!(Classification::Unclassified < Classification::Restricted);
1423 assert!(Classification::Restricted < Classification::Confidential);
1424 assert!(Classification::Confidential < Classification::Secret);
1425 assert!(Classification::Secret < Classification::TopSecret);
1426 }
1427
1428 #[test]
1429 fn classification_banner_portion_round_trip() {
1430 for c in [
1431 Classification::Unclassified,
1432 Classification::Restricted,
1433 Classification::Confidential,
1434 Classification::Secret,
1435 Classification::TopSecret,
1436 ] {
1437 assert!(!c.banner_str().is_empty());
1438 assert!(!c.portion_str().is_empty());
1439 }
1440 }
1441
1442 #[test]
1443 fn nato_us_equivalent_mapping() {
1444 assert_eq!(
1445 NatoClassification::CosmicTopSecret.us_equivalent(),
1446 Classification::TopSecret,
1447 );
1448 assert_eq!(
1449 NatoClassification::NatoSecret.us_equivalent(),
1450 Classification::Secret,
1451 );
1452 assert_eq!(
1453 NatoClassification::NatoRestricted.us_equivalent(),
1454 Classification::Restricted,
1455 );
1456 }
1457
1458 #[test]
1459 fn nato_banner_portion_round_trip() {
1460 for n in [
1461 NatoClassification::NatoUnclassified,
1462 NatoClassification::NatoRestricted,
1463 NatoClassification::NatoConfidential,
1464 NatoClassification::NatoConfidentialAtomal,
1465 NatoClassification::NatoSecret,
1466 NatoClassification::NatoSecretAtomal,
1467 NatoClassification::CosmicTopSecret,
1468 NatoClassification::CosmicTopSecretAtomal,
1469 NatoClassification::CosmicTopSecretBohemia,
1470 NatoClassification::CosmicTopSecretBalk,
1471 ] {
1472 assert!(!n.banner_str().is_empty());
1473 assert!(!n.portion_str().is_empty());
1474 }
1475 }
1476
1477 #[test]
1478 fn us_classification_convenience_returns_us() {
1479 let attrs = IsmAttributes {
1480 classification: Some(MarkingClassification::Us(Classification::Secret)),
1481 ..Default::default()
1482 };
1483 assert_eq!(attrs.us_classification(), Some(Classification::Secret));
1484 }
1485
1486 #[test]
1487 fn us_classification_convenience_returns_none_for_nato() {
1488 let attrs = IsmAttributes {
1489 classification: Some(MarkingClassification::Nato(NatoClassification::NatoSecret)),
1490 ..Default::default()
1491 };
1492 assert_eq!(attrs.us_classification(), None);
1493 }
1494
1495 #[test]
1496 fn us_classification_convenience_returns_resolved_for_conflict() {
1497 let attrs = IsmAttributes {
1498 classification: Some(MarkingClassification::Conflict {
1499 us: Classification::TopSecret,
1500 foreign: Box::new(ForeignClassification::Nato(
1501 NatoClassification::CosmicTopSecret,
1502 )),
1503 }),
1504 ..Default::default()
1505 };
1506 assert_eq!(attrs.us_classification(), Some(Classification::TopSecret));
1507 }
1508}