Skip to main content

omni_dev/atlassian/adf_schema/
mod.rs

1//! ADF content-model schema and structural validator.
2//!
3//! Encodes the per-parent content expressions from the upstream
4//! `@atlaskit/adf-schema` npm package as a static lookup table, and exposes a
5//! [`validate_document`] walker that reports nesting **and** arity violations
6//! against that model.
7//!
8//! # Source of truth
9//!
10//! The lookup table is a manual transcription of the per-node `content:`
11//! expressions defined in the upstream schema. The pinned version is recorded
12//! in [`SCHEMA_VERSION`] and the upstream tarball SHA-256 in
13//! [`UPSTREAM_TARBALL_SHA256`]. When refreshing the snapshot, bump both
14//! constants and re-verify each entry against the upstream source files
15//! (`packages/adf-schema/src/schema/nodes/<node>.ts`).
16//!
17//! # Forward compatibility
18//!
19//! The walker is **permissive on unknown parents**: a node whose `node_type` is
20//! not in the table is treated as opaque and its children are not validated.
21//! This preserves the round-trip guarantee of ADR-0020's `adf-unsupported`
22//! escape hatch — the validator never rejects a document just because it
23//! contains a node type the snapshot doesn't know about.
24//!
25//! `unsupportedBlock` and `unsupportedInline` are accepted under any known
26//! parent and **count toward arity** for the parent's current content term, so
27//! a round-tripped document carrying a preservation wrapper still satisfies
28//! the parent's `+` / `Exactly(n)` requirements.
29//!
30//! # Coverage in this slice
31//!
32//! - Allowed-children sets for every container node type (PR #717 / ADR-0023).
33//! - Per-term quantifiers (`?`, `*`, `+`, exact, range) and per-parent term
34//!   sequences (PR #733). Empty `bulletList`, two-`media` `mediaSingle`,
35//!   `layoutSection` with one column, etc. are all flagged via
36//!   [`AdfSchemaViolation::Arity`].
37//!
38//! Mark whitelists and attribute schemas are still out of scope; they are
39//! addressed in the follow-up sub-PRs of #733.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::sync::LazyLock;
43
44use crate::atlassian::adf::{AdfDocument, AdfNode};
45
46pub mod drift;
47pub mod generated;
48
49/// Crate-internal view of the schema as a `BTreeMap`, used by the drift
50/// detector to diff against an upstream-derived map of the same shape.
51///
52/// Built by flattening every parent's [`CONTENT_ENTRIES`] terms into the
53/// union of their atoms. Quantifier and order information is intentionally
54/// stripped because the drift detector compares against the upstream JSON
55/// schema's `anyOf` of `$ref` items, which has the same flat-set shape.
56#[must_use]
57pub(crate) fn local_schema_map() -> BTreeMap<&'static str, BTreeSet<&'static str>> {
58    let mut m = BTreeMap::new();
59    for (parent, terms) in CONTENT_ENTRIES {
60        let children: BTreeSet<&'static str> =
61            terms.iter().flat_map(|t| t.atoms.iter().copied()).collect();
62        m.insert(*parent, children);
63    }
64    m
65}
66
67/// Pinned upstream schema version.
68///
69/// Format: `<npm-package-version>-<transcription-date>`. Bumped manually when
70/// the lookup table is refreshed against a new upstream release.
71pub const SCHEMA_VERSION: &str = "52.9.5-2026-05-10";
72
73/// SHA-256 of the upstream `@atlaskit/adf-schema` tarball used as the source
74/// for the current transcription.
75///
76/// Recorded for reproducibility. Kept here (not in the ADR) so the binary
77/// itself carries the provenance and so refreshing the snapshot is a single
78/// file change.
79///
80/// To verify locally:
81/// ```text
82/// curl -sL https://registry.npmjs.org/@atlaskit/adf-schema/-/adf-schema-52.9.5.tgz \
83///   | shasum -a 256
84/// ```
85pub const UPSTREAM_TARBALL_SHA256: &str =
86    "90b9b26f5cdf6f0850cebe5cf2df7662601b249322d6bcbeead712ca018e0b56";
87
88// -----------------------------------------------------------------------------
89// Quantifier and content-term types
90// -----------------------------------------------------------------------------
91
92/// A quantifier applied to a single term in a parent's content expression.
93///
94/// Mirrors the ProseMirror content-expression grammar used by
95/// `@atlaskit/adf-schema`. `Range` is used for the only known range case
96/// (`layoutSection` permits 2–3 `layoutColumn` children); `Exactly` is used
97/// for `mediaSingle`'s required `media` child.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub enum Quantifier {
100    /// `?` — zero or one (optional).
101    ZeroOrOne,
102    /// `*` — zero or more.
103    ZeroOrMore,
104    /// `+` — one or more.
105    OneOrMore,
106    /// `{n}` — exactly `n`.
107    Exactly(usize),
108    /// `{min,max}` — between `min` and `max` inclusive.
109    Range(usize, usize),
110}
111
112impl Quantifier {
113    /// True when a count of `n` is acceptable for this quantifier.
114    #[must_use]
115    pub fn satisfied_by(&self, n: usize) -> bool {
116        match *self {
117            Self::ZeroOrOne => n <= 1,
118            Self::ZeroOrMore => true,
119            Self::OneOrMore => n >= 1,
120            Self::Exactly(k) => n == k,
121            Self::Range(lo, hi) => n >= lo && n <= hi,
122        }
123    }
124
125    /// Human-readable phrasing used in [`AdfSchemaViolation`] messages.
126    fn phrasing(&self) -> String {
127        match *self {
128            Self::ZeroOrOne => "at most one".to_string(),
129            Self::ZeroOrMore => "any number of".to_string(),
130            Self::OneOrMore => "at least one".to_string(),
131            Self::Exactly(1) => "exactly one".to_string(),
132            Self::Exactly(n) => format!("exactly {n}"),
133            Self::Range(lo, hi) => format!("between {lo} and {hi}"),
134        }
135    }
136}
137
138/// One term in a parent's content expression: an atom (or alternation of
139/// atoms), with a quantifier.
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub struct ContentTerm {
142    /// One or more allowed node types. A list of length 1 is a single atom; a
143    /// list of length >1 is an alternation.
144    pub atoms: &'static [&'static str],
145    /// Quantifier applied to this term.
146    pub quant: Quantifier,
147}
148
149// -----------------------------------------------------------------------------
150// Violation enum
151// -----------------------------------------------------------------------------
152
153/// A structural violation reported by the validator.
154///
155/// Each variant corresponds to a distinct class of issue so callers can opt in
156/// to strictness (e.g. surface only [`Self::DisallowedChild`] today, then layer
157/// in arity checks once their pipeline is ready). New variants are added in
158/// later sub-PRs of #733 (marks, attributes); pattern matches should remain
159/// non-exhaustive-aware.
160#[derive(Debug, Clone, PartialEq)]
161pub enum AdfSchemaViolation {
162    /// A child node type appears under a parent that does not permit it.
163    DisallowedChild {
164        /// The `node_type` of the offending child.
165        child_type: String,
166        /// The `node_type` of the parent that does not permit the child.
167        parent_type: String,
168        /// Index path from the document root to the offending child.
169        ///
170        /// Each element is the position of the node in its parent's `content`
171        /// array. The last element identifies the child within its parent.
172        path: Vec<usize>,
173    },
174
175    /// A parent has the wrong number of children matching one of its content
176    /// terms.
177    ///
178    /// Examples:
179    /// - `mediaSingle` with two `media` children: `expected = Exactly(1)`,
180    ///   `actual = 2`, `atoms = ["media"]`.
181    /// - Empty `bulletList`: `expected = OneOrMore`, `actual = 0`,
182    ///   `atoms = ["listItem"]`.
183    /// - `layoutSection` with one column: `expected = Range(2, 3)`,
184    ///   `actual = 1`, `atoms = ["layoutColumn"]`.
185    Arity {
186        /// The `node_type` of the parent whose content count is wrong.
187        parent_type: String,
188        /// The term's atoms (alternation list). Length 1 for a single atom,
189        /// >1 for an alternation like `["tableCell", "tableHeader"]`.
190        atoms: Vec<&'static str>,
191        /// The quantifier the term expects.
192        expected: Quantifier,
193        /// The actual number of children matching the term's atoms.
194        actual: usize,
195        /// Index path from the document root to the **parent** node.
196        path: Vec<usize>,
197    },
198
199    /// A node's `attrs` value is missing a required field.
200    ///
201    /// Example: `panel` without `panelType`, `heading` without `level`.
202    MissingAttr {
203        /// The `node_type` whose attrs are incomplete.
204        node_type: String,
205        /// The name of the missing attribute.
206        attr_name: String,
207        /// Index path from the document root to the offending node.
208        path: Vec<usize>,
209    },
210
211    /// A node's `attrs` value has the wrong shape for a declared field.
212    ///
213    /// Examples: `panel.panelType: "purple"` (not in the enum),
214    /// `heading.level: 7` (out of range), `heading.level: "two"` (wrong
215    /// type), `embedCard.url: "not a url"` (bad format).
216    InvalidAttr {
217        /// The `node_type` whose attrs are malformed.
218        node_type: String,
219        /// The name of the offending attribute.
220        attr_name: String,
221        /// What is wrong with the value (enum / range / type / format).
222        problem: crate::atlassian::adf_attr_schema::AttrProblem,
223        /// Index path from the document root to the offending node.
224        path: Vec<usize>,
225    },
226
227    /// A mark appears in a context that does not permit it.
228    ///
229    /// Examples: `code` mark on text inside a `heading`, `border` mark on a
230    /// `paragraph` (block marks like `border` are tableCell-only).
231    DisallowedMark {
232        /// The `mark_type` of the offending mark.
233        mark_type: String,
234        /// The context that rejects this mark — for inline marks, the
235        /// inline-content parent (e.g. `"heading"`); for block marks, the
236        /// node whose own `marks` array contains the mark (e.g.
237        /// `"paragraph"`).
238        parent_type: String,
239        /// For inline-mark violations, the position of the inline node
240        /// within its parent. `None` for block-mark violations.
241        inline_index: Option<usize>,
242        /// Index path from the document root to the node whose marks were
243        /// being validated.
244        path: Vec<usize>,
245    },
246
247    /// A mark's `attrs` value has the wrong shape for a declared field, or
248    /// is missing a required field.
249    ///
250    /// Examples: `link.href: "not a url"` (bad format),
251    /// `subsup.type: "side"` (not in enum), `border.size: 5` (out of range
252    /// 1..=3), `link` without `href` (required field absent).
253    InvalidMarkAttr {
254        /// The `mark_type` whose attrs are malformed.
255        mark_type: String,
256        /// The name of the offending attribute.
257        attr_name: String,
258        /// What is wrong with the value.
259        problem: crate::atlassian::adf_attr_schema::AttrProblem,
260        /// The position of the mark within the node's `marks` array (for
261        /// disambiguation when a node carries multiple marks).
262        inline_index: Option<usize>,
263        /// Index path from the document root to the node whose mark is
264        /// malformed.
265        path: Vec<usize>,
266    },
267}
268
269impl AdfSchemaViolation {
270    /// Path from the document root to the violation site.
271    ///
272    /// For [`Self::DisallowedChild`] this is the child; for [`Self::Arity`]
273    /// this is the parent whose count is wrong; for [`Self::MissingAttr`]
274    /// and [`Self::InvalidAttr`] this is the node whose attrs are wrong.
275    #[must_use]
276    pub fn path(&self) -> &[usize] {
277        match self {
278            Self::DisallowedChild { path, .. }
279            | Self::Arity { path, .. }
280            | Self::MissingAttr { path, .. }
281            | Self::InvalidAttr { path, .. }
282            | Self::DisallowedMark { path, .. }
283            | Self::InvalidMarkAttr { path, .. } => path,
284        }
285    }
286}
287
288impl std::fmt::Display for AdfSchemaViolation {
289    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
290        let path_str = self
291            .path()
292            .iter()
293            .map(usize::to_string)
294            .collect::<Vec<_>>()
295            .join("/");
296        match self {
297            Self::DisallowedChild {
298                child_type,
299                parent_type,
300                ..
301            } => write!(
302                f,
303                "ADF schema violation at /{path_str}: '{child_type}' is not permitted inside '{parent_type}'",
304            ),
305            Self::Arity {
306                parent_type,
307                atoms,
308                expected,
309                actual,
310                ..
311            } => write!(
312                f,
313                "ADF schema violation at /{path_str}: '{parent_type}' must contain {phrasing} {atoms_str} (found {actual})",
314                phrasing = expected.phrasing(),
315                atoms_str = format_atoms(atoms),
316            ),
317            Self::MissingAttr {
318                node_type,
319                attr_name,
320                ..
321            } => write!(
322                f,
323                "ADF schema violation at /{path_str}: '{node_type}' is missing required attribute '{attr_name}'",
324            ),
325            Self::InvalidAttr {
326                node_type,
327                attr_name,
328                problem,
329                ..
330            } => write!(
331                f,
332                "ADF schema violation at /{path_str}: '{node_type}.{attr_name}' is invalid — {problem}",
333            ),
334            Self::DisallowedMark {
335                mark_type,
336                parent_type,
337                ..
338            } => write!(
339                f,
340                "ADF schema violation at /{path_str}: '{mark_type}' mark is not permitted on '{parent_type}'",
341            ),
342            Self::InvalidMarkAttr {
343                mark_type,
344                attr_name,
345                problem,
346                ..
347            } => write!(
348                f,
349                "ADF schema violation at /{path_str}: '{mark_type}' mark's '{attr_name}' is invalid — {problem}",
350            ),
351        }
352    }
353}
354
355fn format_atoms(atoms: &[&str]) -> String {
356    if atoms.len() == 1 {
357        format!("'{}'", atoms[0])
358    } else {
359        let inner = atoms
360            .iter()
361            .map(|a| format!("'{a}'"))
362            .collect::<Vec<_>>()
363            .join(", ");
364        format!("{{{inner}}}")
365    }
366}
367
368// -----------------------------------------------------------------------------
369// Common atom slices
370// -----------------------------------------------------------------------------
371
372/// Inline content shared by `paragraph`, `heading`, `taskItem`, `decisionItem`.
373///
374/// `caption`'s inline list is a strict subset (no `inlineExtension`, no
375/// `mediaInline`) and is inlined below to keep the per-parent diff against
376/// upstream exact.
377const FULL_INLINE_ATOMS: &[&str] = &[
378    "date",
379    "emoji",
380    "hardBreak",
381    "inlineCard",
382    "inlineExtension",
383    "mediaInline",
384    "mention",
385    "placeholder",
386    "status",
387    "text",
388];
389
390const CAPTION_INLINE_ATOMS: &[&str] = &[
391    "date",
392    "emoji",
393    "hardBreak",
394    "inlineCard",
395    "mention",
396    "placeholder",
397    "status",
398    "text",
399];
400
401const LISTITEM_BLOCK_ATOMS: &[&str] = &[
402    "bulletList",
403    "codeBlock",
404    "extension",
405    "mediaSingle",
406    "orderedList",
407    "paragraph",
408    "taskList",
409];
410
411const PANEL_BLOCK_ATOMS: &[&str] = &[
412    "blockCard",
413    "bulletList",
414    "codeBlock",
415    "decisionList",
416    "extension",
417    "heading",
418    "mediaGroup",
419    "mediaSingle",
420    "orderedList",
421    "paragraph",
422    "rule",
423    "taskList",
424];
425
426const NESTED_EXPAND_BLOCK_ATOMS: &[&str] = &[
427    "blockquote",
428    "bulletList",
429    "codeBlock",
430    "decisionList",
431    "extension",
432    "heading",
433    "mediaGroup",
434    "mediaSingle",
435    "orderedList",
436    "panel",
437    "paragraph",
438    "rule",
439    "taskList",
440];
441
442const EXPAND_BLOCK_ATOMS: &[&str] = &[
443    "blockCard",
444    "blockquote",
445    "bulletList",
446    "codeBlock",
447    "decisionList",
448    "embedCard",
449    "extension",
450    "heading",
451    "mediaGroup",
452    "mediaSingle",
453    "nestedExpand",
454    "orderedList",
455    "panel",
456    "paragraph",
457    "rule",
458    "table",
459    "taskList",
460];
461
462const BODIED_EXTENSION_BLOCK_ATOMS: &[&str] = &[
463    "blockCard",
464    "blockquote",
465    "bulletList",
466    "codeBlock",
467    "decisionList",
468    "embedCard",
469    "extension",
470    "heading",
471    "mediaGroup",
472    "mediaSingle",
473    "orderedList",
474    "panel",
475    "paragraph",
476    "rule",
477    "table",
478    "taskList",
479];
480
481const BODIED_SYNC_BLOCK_ATOMS: &[&str] = &[
482    "blockCard",
483    "blockquote",
484    "bulletList",
485    "codeBlock",
486    "decisionList",
487    "embedCard",
488    "expand",
489    "heading",
490    "layoutSection",
491    "mediaGroup",
492    "mediaSingle",
493    "orderedList",
494    "panel",
495    "paragraph",
496    "rule",
497    "table",
498    "taskList",
499];
500
501const LAYOUT_COLUMN_BLOCK_ATOMS: &[&str] = &[
502    "blockCard",
503    "blockquote",
504    "bodiedExtension",
505    "bulletList",
506    "codeBlock",
507    "decisionList",
508    "embedCard",
509    "expand",
510    "extension",
511    "heading",
512    "mediaGroup",
513    "mediaSingle",
514    "orderedList",
515    "panel",
516    "paragraph",
517    "rule",
518    "table",
519    "taskList",
520];
521
522const TABLE_CELL_BLOCK_ATOMS: &[&str] = &[
523    "blockCard",
524    "blockquote",
525    "bulletList",
526    "codeBlock",
527    "decisionList",
528    "embedCard",
529    "extension",
530    "heading",
531    "mediaGroup",
532    "mediaSingle",
533    "nestedExpand",
534    "orderedList",
535    "panel",
536    "paragraph",
537    "rule",
538    "taskList",
539];
540
541const DOC_BLOCK_ATOMS: &[&str] = &[
542    "blockCard",
543    "blockquote",
544    "bodiedExtension",
545    "bodiedSyncBlock",
546    "bulletList",
547    "codeBlock",
548    "decisionList",
549    "embedCard",
550    "expand",
551    "extension",
552    "heading",
553    "layoutSection",
554    "mediaGroup",
555    "mediaSingle",
556    "orderedList",
557    "panel",
558    "paragraph",
559    "rule",
560    "syncBlock",
561    "table",
562    "taskList",
563];
564
565// -----------------------------------------------------------------------------
566// Schema entries
567// -----------------------------------------------------------------------------
568//
569// One entry per container node, transcribed faithfully from the upstream JSON
570// schema (`json-schema/v1/full.json` in the package tarball pinned by
571// `SCHEMA_VERSION` / `UPSTREAM_TARBALL_SHA256`). Leaf nodes (text, hardBreak,
572// mention, emoji, date, status, inlineCard, mediaInline, placeholder,
573// inlineExtension, media, rule, blockCard, embedCard, extension, syncBlock,
574// unsupportedBlock, unsupportedInline) have no `content` upstream and are
575// intentionally absent.
576//
577// `unsupportedBlock` and `unsupportedInline` are NOT listed in any parent's
578// allowed-children atoms — they are runtime-only preservation wrappers. The
579// walker accepts them under any known parent and counts them toward the
580// parent's current term arity; see [`is_unsupported`] and [`walk_children`].
581//
582// Lenient deviations from upstream (where strict would break common
583// real-world inputs) are commented inline:
584//
585// - `doc`: upstream is `block+`; we use `block*` so `AdfDocument::new()` (the
586//   canonical empty document, returned for missing JIRA descriptions) does
587//   not produce an arity violation.
588// - `tableCell` / `tableHeader`: upstream is `block+`; we use `block*` so
589//   visibly-empty cells in real Confluence tables do not produce arity
590//   violations.
591
592/// Allowed-children entries, sorted alphabetically by parent. Crate-visible
593/// so the drift detector ([`drift`]) can flatten them into a `BTreeMap`
594/// without re-deriving from the runtime `ALLOWED_CHILDREN` cache.
595pub(crate) type ModelEntry = (&'static str, &'static [ContentTerm]);
596
597pub(crate) const CONTENT_ENTRIES: &[ModelEntry] = &[
598    // blockTaskItem — definitions/blockTaskItem_node
599    // upstream: (extension | paragraph)+
600    (
601        "blockTaskItem",
602        &[ContentTerm {
603            atoms: &["extension", "paragraph"],
604            quant: Quantifier::OneOrMore,
605        }],
606    ),
607    // blockquote — definitions/blockquote_node
608    // upstream: (paragraph | bulletList | orderedList | mediaGroup |
609    //            mediaSingle | codeBlock | extension)+
610    (
611        "blockquote",
612        &[ContentTerm {
613            atoms: &[
614                "bulletList",
615                "codeBlock",
616                "extension",
617                "mediaGroup",
618                "mediaSingle",
619                "orderedList",
620                "paragraph",
621            ],
622            quant: Quantifier::OneOrMore,
623        }],
624    ),
625    // bodiedExtension — definitions/bodiedExtension_node
626    (
627        "bodiedExtension",
628        &[ContentTerm {
629            atoms: BODIED_EXTENSION_BLOCK_ATOMS,
630            quant: Quantifier::OneOrMore,
631        }],
632    ),
633    // bodiedSyncBlock — definitions/bodiedSyncBlock_node
634    (
635        "bodiedSyncBlock",
636        &[ContentTerm {
637            atoms: BODIED_SYNC_BLOCK_ATOMS,
638            quant: Quantifier::OneOrMore,
639        }],
640    ),
641    // bulletList — definitions/bulletList_node
642    // upstream: listItem+
643    (
644        "bulletList",
645        &[ContentTerm {
646            atoms: &["listItem"],
647            quant: Quantifier::OneOrMore,
648        }],
649    ),
650    // caption — definitions/caption_node
651    // upstream: inline* (subset of FULL_INLINE: no inlineExtension, no
652    // mediaInline)
653    (
654        "caption",
655        &[ContentTerm {
656            atoms: CAPTION_INLINE_ATOMS,
657            quant: Quantifier::ZeroOrMore,
658        }],
659    ),
660    // codeBlock — definitions/codeBlock_node
661    // upstream: text* (hardBreak NOT permitted by the JSON schema even
662    // though some renderers handle it)
663    (
664        "codeBlock",
665        &[ContentTerm {
666            atoms: &["text"],
667            quant: Quantifier::ZeroOrMore,
668        }],
669    ),
670    // decisionItem — definitions/decisionItem_node
671    // upstream: inline*
672    (
673        "decisionItem",
674        &[ContentTerm {
675            atoms: FULL_INLINE_ATOMS,
676            quant: Quantifier::ZeroOrMore,
677        }],
678    ),
679    // decisionList — definitions/decisionList_node
680    // upstream: decisionItem+
681    (
682        "decisionList",
683        &[ContentTerm {
684            atoms: &["decisionItem"],
685            quant: Quantifier::OneOrMore,
686        }],
687    ),
688    // doc — definitions/doc_node
689    // upstream: block+; LENIENT: block* — empty docs are the canonical
690    // value for missing JIRA descriptions (`AdfDocument::new()`).
691    (
692        "doc",
693        &[ContentTerm {
694            atoms: DOC_BLOCK_ATOMS,
695            quant: Quantifier::ZeroOrMore,
696        }],
697    ),
698    // expand — definitions/expand_node
699    // upstream: block+ (DOES permit nestedExpand, NOT another expand)
700    (
701        "expand",
702        &[ContentTerm {
703            atoms: EXPAND_BLOCK_ATOMS,
704            quant: Quantifier::OneOrMore,
705        }],
706    ),
707    // heading — definitions/heading_node
708    // upstream: inline*
709    (
710        "heading",
711        &[ContentTerm {
712            atoms: FULL_INLINE_ATOMS,
713            quant: Quantifier::ZeroOrMore,
714        }],
715    ),
716    // layoutColumn — definitions/layoutColumn_node
717    // upstream: block+ (permits expand and bodiedExtension, NOT
718    // nestedExpand)
719    (
720        "layoutColumn",
721        &[ContentTerm {
722            atoms: LAYOUT_COLUMN_BLOCK_ATOMS,
723            quant: Quantifier::OneOrMore,
724        }],
725    ),
726    // layoutSection — definitions/layoutSection_node
727    // upstream: layoutColumn{2,3}
728    (
729        "layoutSection",
730        &[ContentTerm {
731            atoms: &["layoutColumn"],
732            quant: Quantifier::Range(2, 3),
733        }],
734    ),
735    // listItem — definitions/listItem_node
736    // upstream: paragraph (paragraph | bulletList | orderedList |
737    //                       mediaSingle | codeBlock | taskList)*
738    // LENIENT: simplified to (one-or-more of the union) — most listItems
739    // start with a paragraph in practice; flagging pure-list-of-list items
740    // would be noisy.
741    (
742        "listItem",
743        &[ContentTerm {
744            atoms: LISTITEM_BLOCK_ATOMS,
745            quant: Quantifier::OneOrMore,
746        }],
747    ),
748    // mediaGroup — definitions/mediaGroup_node
749    // upstream: media+
750    (
751        "mediaGroup",
752        &[ContentTerm {
753            atoms: &["media"],
754            quant: Quantifier::OneOrMore,
755        }],
756    ),
757    // mediaSingle — definitions/mediaSingle_caption_node /
758    //               mediaSingle_full_node
759    // upstream: media (caption)?  (in this order)
760    (
761        "mediaSingle",
762        &[
763            ContentTerm {
764                atoms: &["media"],
765                quant: Quantifier::Exactly(1),
766            },
767            ContentTerm {
768                atoms: &["caption"],
769                quant: Quantifier::ZeroOrOne,
770            },
771        ],
772    ),
773    // nestedExpand — definitions/nestedExpand_node
774    // upstream: block+ (permits panel and blockquote; NOT table, blockCard,
775    // embedCard, expand, or nestedExpand itself)
776    (
777        "nestedExpand",
778        &[ContentTerm {
779            atoms: NESTED_EXPAND_BLOCK_ATOMS,
780            quant: Quantifier::OneOrMore,
781        }],
782    ),
783    // orderedList — definitions/orderedList_node
784    // upstream: listItem+
785    (
786        "orderedList",
787        &[ContentTerm {
788            atoms: &["listItem"],
789            quant: Quantifier::OneOrMore,
790        }],
791    ),
792    // panel — definitions/panel_node
793    // upstream: block+ (subset)
794    (
795        "panel",
796        &[ContentTerm {
797            atoms: PANEL_BLOCK_ATOMS,
798            quant: Quantifier::OneOrMore,
799        }],
800    ),
801    // paragraph — definitions/paragraph_node
802    // upstream: inline*
803    (
804        "paragraph",
805        &[ContentTerm {
806            atoms: FULL_INLINE_ATOMS,
807            quant: Quantifier::ZeroOrMore,
808        }],
809    ),
810    // table — definitions/table_node
811    // upstream: tableRow+
812    (
813        "table",
814        &[ContentTerm {
815            atoms: &["tableRow"],
816            quant: Quantifier::OneOrMore,
817        }],
818    ),
819    // tableCell — definitions/table_cell_content
820    // upstream: block+; LENIENT: block* — visibly-empty cells in real
821    // Confluence tables are common and accepted by the renderer.
822    (
823        "tableCell",
824        &[ContentTerm {
825            atoms: TABLE_CELL_BLOCK_ATOMS,
826            quant: Quantifier::ZeroOrMore,
827        }],
828    ),
829    // tableHeader — definitions/table_header_node (uses table_cell_content)
830    // upstream: block+; LENIENT: block* — same reason as tableCell.
831    (
832        "tableHeader",
833        &[ContentTerm {
834            atoms: TABLE_CELL_BLOCK_ATOMS,
835            quant: Quantifier::ZeroOrMore,
836        }],
837    ),
838    // tableRow — definitions/table_row_node
839    // upstream: (tableCell | tableHeader)+
840    (
841        "tableRow",
842        &[ContentTerm {
843            atoms: &["tableCell", "tableHeader"],
844            quant: Quantifier::OneOrMore,
845        }],
846    ),
847    // taskItem — definitions/taskItem_node
848    // upstream: inline*
849    (
850        "taskItem",
851        &[ContentTerm {
852            atoms: FULL_INLINE_ATOMS,
853            quant: Quantifier::ZeroOrMore,
854        }],
855    ),
856    // taskList — definitions/taskList_node
857    // upstream: (taskItem | taskList | blockTaskItem)+
858    (
859        "taskList",
860        &[ContentTerm {
861            atoms: &["blockTaskItem", "taskItem", "taskList"],
862            quant: Quantifier::OneOrMore,
863        }],
864    ),
865];
866
867/// Forward-compat preservation wrappers. Accepted under any known parent by
868/// the walker, regardless of whether the parent's allowed-children set lists
869/// them. Atlassian's renderer uses these to wrap content the schema doesn't
870/// know how to validate; flagging them as violations would be noisy and would
871/// break the round-trip guarantee of [ADR-0020]'s `adf-unsupported` fenced
872/// block.
873const UNSUPPORTED_NODES: &[&str] = &["unsupportedBlock", "unsupportedInline"];
874
875fn is_unsupported(node_type: &str) -> bool {
876    UNSUPPORTED_NODES.contains(&node_type)
877}
878
879static CONTENT_MODELS: LazyLock<HashMap<&'static str, &'static [ContentTerm]>> =
880    LazyLock::new(|| CONTENT_ENTRIES.iter().copied().collect());
881
882/// Per-parent flattened allowed-children atoms, computed once from
883/// [`CONTENT_ENTRIES`] and used by the back-compat [`allowed_children`] /
884/// [`permits_child`] helpers. Sorted and deduplicated within each entry.
885static ALLOWED_CHILDREN: LazyLock<HashMap<&'static str, Vec<&'static str>>> = LazyLock::new(|| {
886    CONTENT_ENTRIES
887        .iter()
888        .map(|(parent, terms)| {
889            let mut atoms: Vec<&'static str> =
890                terms.iter().flat_map(|t| t.atoms.iter().copied()).collect();
891            atoms.sort_unstable();
892            atoms.dedup();
893            (*parent, atoms)
894        })
895        .collect()
896});
897
898/// Returns the allowed direct children for a parent node type.
899///
900/// `None` means the node has no entry in the schema (either a leaf type or a
901/// type unknown to this snapshot). Unknown parents are treated permissively
902/// by [`permits_child`] and the walker.
903///
904/// The returned slice is the union of all atoms across the parent's content
905/// terms (sorted, deduplicated). Quantifier and order information is not
906/// surfaced through this helper — use [`content_model`] for that.
907#[must_use]
908pub fn allowed_children(parent: &str) -> Option<&'static [&'static str]> {
909    ALLOWED_CHILDREN.get(parent).map(Vec::as_slice)
910}
911
912/// Returns the full content model (sequence of quantified terms) for a parent
913/// node type, or `None` if the parent has no entry.
914#[must_use]
915pub fn content_model(parent: &str) -> Option<&'static [ContentTerm]> {
916    CONTENT_MODELS.get(parent).copied()
917}
918
919/// Returns `true` if `child` is permitted as a direct child of `parent`.
920///
921/// Returns `true` (permissive) when `parent` has no schema entry — see the
922/// module-level docs for rationale. Also returns `true` when `child` is
923/// `unsupportedBlock` or `unsupportedInline`, regardless of `parent`, because
924/// those are forward-compat preservation wrappers, not first-class content.
925#[must_use]
926pub fn permits_child(parent: &str, child: &str) -> bool {
927    if is_unsupported(child) {
928        return true;
929    }
930    match allowed_children(parent) {
931        Some(children) => children.contains(&child),
932        None => true,
933    }
934}
935
936/// Validates an entire ADF document and returns all violations found.
937///
938/// An empty `Vec` means the document is structurally valid against the
939/// snapshot. The walker is depth-first: violations under a child are reported
940/// after the child's own violations, so the overall ordering is "each parent's
941/// own checks, then descend into each child in turn." Arity violations on a
942/// parent appear at the position the parent is visited, before any of its
943/// descendants' violations.
944#[must_use]
945pub fn validate_document(doc: &AdfDocument) -> Vec<AdfSchemaViolation> {
946    let mut violations = Vec::new();
947    let mut path = Vec::new();
948    if let Some(model) = content_model(&doc.doc_type) {
949        walk_children(
950            &doc.content,
951            &doc.doc_type,
952            model,
953            &mut path,
954            &mut violations,
955        );
956    }
957    violations
958}
959
960/// Walks `children` against `model`, reporting `DisallowedChild` and `Arity`
961/// violations into `out`. Recurses into each child's subtree if its
962/// `node_type` has a schema entry.
963///
964/// `path` is the index path from the document root to the **parent** of
965/// `children` (i.e. the index of the current child is pushed/popped inside the
966/// loop). On entry, `path` identifies the parent; on exit it is unchanged.
967fn walk_children(
968    children: &[AdfNode],
969    parent_type: &str,
970    model: &[ContentTerm],
971    path: &mut Vec<usize>,
972    out: &mut Vec<AdfSchemaViolation>,
973) {
974    // Per-term match counts. Index aligned with `model`.
975    let mut term_counts: Vec<usize> = vec![0; model.len()];
976    // Index of the term we are currently consuming children into. Advances
977    // monotonically — children that don't match the current term try later
978    // terms, but we never go backwards (this matches ProseMirror's greedy
979    // sequence-matching semantics).
980    let mut current_term: usize = 0;
981
982    for (idx, child) in children.iter().enumerate() {
983        path.push(idx);
984
985        let child_type = child.node_type.as_str();
986
987        // Validate this child's attrs (per PR #733-attrs slice). Permissive
988        // on unknown node types; emits `MissingAttr` / `InvalidAttr` for
989        // declared fields. Always runs — independent of disallowed-child
990        // / arity bookkeeping.
991        crate::atlassian::adf_attr_schema::validate_attrs(
992            child_type,
993            child.attrs.as_ref(),
994            path,
995            out,
996        );
997
998        // Validate this child's marks (per PR #733-marks slice). The
999        // `parent_type` is the node enclosing `child` — it determines the
1000        // inline-mark allow-list when `child` is an inline node like text.
1001        // Permissive on unknown contexts.
1002        crate::atlassian::adf_mark_schema::validate_marks(parent_type, child, path, out);
1003
1004        if is_unsupported(child_type) {
1005            // Round-trip escape hatch: count toward the current term's arity
1006            // (so a panel containing only an `unsupportedBlock` still
1007            // satisfies panel's `+`). Never emits a DisallowedChild.
1008            if current_term < model.len() {
1009                term_counts[current_term] += 1;
1010            }
1011        } else {
1012            // Find a term (at or after current_term) whose atoms accept this
1013            // child. Greedy: first match wins; subsequent children continue
1014            // from the matched term.
1015            let mut matched: Option<usize> = None;
1016            let mut try_idx = current_term;
1017            while try_idx < model.len() {
1018                if model[try_idx].atoms.contains(&child_type) {
1019                    matched = Some(try_idx);
1020                    break;
1021                }
1022                try_idx += 1;
1023            }
1024
1025            match matched {
1026                Some(t) => {
1027                    term_counts[t] += 1;
1028                    current_term = t;
1029                }
1030                None => {
1031                    out.push(AdfSchemaViolation::DisallowedChild {
1032                        child_type: child_type.to_string(),
1033                        parent_type: parent_type.to_string(),
1034                        path: path.clone(),
1035                    });
1036                    // Don't count toward any term — see the doc on
1037                    // `Arity` for why disallowed children should not satisfy
1038                    // arity for the parent (the user clearly tried to put a
1039                    // child here, but it's the wrong type — the right thing
1040                    // is to flag both DisallowedChild and any missing Arity).
1041                }
1042            }
1043        }
1044
1045        // Recurse into the child's content if it has a known schema. Treat
1046        // a missing `content` field as an empty content array so that arity
1047        // checks still fire for empty containers (`AdfNode::content: None`
1048        // is how the converter encodes "no children").
1049        if let Some(grand_model) = content_model(child_type) {
1050            let grand = child.content.as_deref().unwrap_or(&[]);
1051            walk_children(grand, child_type, grand_model, path, out);
1052        }
1053
1054        path.pop();
1055    }
1056
1057    // After consuming children, emit one arity violation per term whose count
1058    // doesn't satisfy its quantifier. Path here points at the parent (one
1059    // level up from the children we just walked).
1060    for (i, term) in model.iter().enumerate() {
1061        let count = term_counts[i];
1062        if !term.quant.satisfied_by(count) {
1063            out.push(AdfSchemaViolation::Arity {
1064                parent_type: parent_type.to_string(),
1065                atoms: term.atoms.to_vec(),
1066                expected: term.quant.clone(),
1067                actual: count,
1068                path: path.clone(),
1069            });
1070        }
1071    }
1072}
1073
1074#[cfg(test)]
1075#[allow(clippy::unwrap_used, clippy::expect_used)]
1076mod tests {
1077    use super::*;
1078    use crate::atlassian::adf::{AdfDocument, AdfNode};
1079
1080    fn node(node_type: &str, content: Vec<AdfNode>) -> AdfNode {
1081        AdfNode {
1082            node_type: node_type.to_string(),
1083            attrs: None,
1084            content: if content.is_empty() {
1085                None
1086            } else {
1087                Some(content)
1088            },
1089            text: None,
1090            marks: None,
1091            local_id: None,
1092            parameters: None,
1093        }
1094    }
1095
1096    fn leaf(node_type: &str) -> AdfNode {
1097        node(node_type, vec![])
1098    }
1099
1100    fn with_attrs(mut n: AdfNode, attrs: serde_json::Value) -> AdfNode {
1101        n.attrs = Some(attrs);
1102        n
1103    }
1104
1105    /// `panel` with a valid `panelType` so attribute validation does not
1106    /// add noise to tests focused on content-model behaviour.
1107    fn panel(content: Vec<AdfNode>) -> AdfNode {
1108        with_attrs(
1109            node("panel", content),
1110            serde_json::json!({"panelType": "info"}),
1111        )
1112    }
1113
1114    /// `media` with a valid `type`.
1115    fn media() -> AdfNode {
1116        with_attrs(
1117            leaf("media"),
1118            serde_json::json!({"type": "file", "id": "x"}),
1119        )
1120    }
1121
1122    /// `layoutColumn` with a valid `width`.
1123    fn layout_column(content: Vec<AdfNode>) -> AdfNode {
1124        with_attrs(
1125            node("layoutColumn", content),
1126            serde_json::json!({"width": 33.3}),
1127        )
1128    }
1129
1130    fn doc(content: Vec<AdfNode>) -> AdfDocument {
1131        AdfDocument {
1132            version: 1,
1133            doc_type: "doc".to_string(),
1134            content,
1135        }
1136    }
1137
1138    fn unwrap_disallowed(v: &AdfSchemaViolation) -> (&str, &str, &[usize]) {
1139        match v {
1140            AdfSchemaViolation::DisallowedChild {
1141                child_type,
1142                parent_type,
1143                path,
1144            } => (child_type.as_str(), parent_type.as_str(), path.as_slice()),
1145            other => panic!("expected DisallowedChild, got {other:?}"),
1146        }
1147    }
1148
1149    fn unwrap_arity(
1150        v: &AdfSchemaViolation,
1151    ) -> (&str, &[&'static str], &Quantifier, usize, &[usize]) {
1152        match v {
1153            AdfSchemaViolation::Arity {
1154                parent_type,
1155                atoms,
1156                expected,
1157                actual,
1158                path,
1159            } => (
1160                parent_type.as_str(),
1161                atoms.as_slice(),
1162                expected,
1163                *actual,
1164                path.as_slice(),
1165            ),
1166            other => panic!("expected Arity, got {other:?}"),
1167        }
1168    }
1169
1170    #[test]
1171    fn schema_has_entry_for_every_advertised_container() {
1172        let known_leaves = [
1173            "blockCard",
1174            "date",
1175            "embedCard",
1176            "emoji",
1177            "extension",
1178            "hardBreak",
1179            "inlineCard",
1180            "inlineExtension",
1181            "media",
1182            "mediaInline",
1183            "mention",
1184            "placeholder",
1185            "rule",
1186            "status",
1187            "syncBlock",
1188            "text",
1189            "unsupportedBlock",
1190            "unsupportedInline",
1191        ];
1192        for (_parent, terms) in CONTENT_ENTRIES {
1193            for term in *terms {
1194                for child in term.atoms {
1195                    let known = CONTENT_MODELS.contains_key(child) || known_leaves.contains(child);
1196                    assert!(
1197                        known,
1198                        "child '{child}' has no schema entry and is not in the leaf list"
1199                    );
1200                }
1201            }
1202        }
1203    }
1204
1205    #[test]
1206    fn child_lists_are_sorted_for_diffability() {
1207        for (parent, terms) in CONTENT_ENTRIES {
1208            for term in *terms {
1209                let mut sorted = term.atoms.to_vec();
1210                sorted.sort_unstable();
1211                assert_eq!(
1212                    term.atoms.to_vec(),
1213                    sorted,
1214                    "atom list for '{parent}' is not sorted"
1215                );
1216            }
1217        }
1218    }
1219
1220    // ---- Issue #717 examples ---------------------------------------------
1221
1222    #[test]
1223    fn panel_allows_examples_from_issue_717() {
1224        for child in [
1225            "paragraph",
1226            "heading",
1227            "bulletList",
1228            "orderedList",
1229            "blockCard",
1230            "mediaGroup",
1231            "mediaSingle",
1232            "codeBlock",
1233            "taskList",
1234            "rule",
1235            "decisionList",
1236            "unsupportedBlock",
1237            "extension",
1238        ] {
1239            assert!(
1240                permits_child("panel", child),
1241                "panel should permit '{child}'"
1242            );
1243        }
1244    }
1245
1246    #[test]
1247    fn panel_rejects_expand_and_nested_expand() {
1248        assert!(!permits_child("panel", "expand"));
1249        assert!(!permits_child("panel", "nestedExpand"));
1250    }
1251
1252    #[test]
1253    fn expand_allows_nested_block_types_and_nested_expand_but_not_self() {
1254        assert!(permits_child("expand", "panel"));
1255        assert!(permits_child("expand", "table"));
1256        assert!(permits_child("expand", "nestedExpand"));
1257        assert!(!permits_child("expand", "expand"));
1258    }
1259
1260    #[test]
1261    fn table_cell_allows_nested_expand_but_not_expand() {
1262        assert!(permits_child("tableCell", "nestedExpand"));
1263        assert!(!permits_child("tableCell", "expand"));
1264    }
1265
1266    #[test]
1267    fn blockquote_allowed_children_match_upstream_json_schema() {
1268        let expected = [
1269            "bulletList",
1270            "codeBlock",
1271            "extension",
1272            "mediaGroup",
1273            "mediaSingle",
1274            "orderedList",
1275            "paragraph",
1276        ];
1277        let got: Vec<&str> = allowed_children("blockquote")
1278            .expect("blockquote has an entry")
1279            .to_vec();
1280        assert_eq!(got, expected);
1281    }
1282
1283    // ---- Permissiveness invariants ---------------------------------------
1284
1285    #[test]
1286    fn unknown_parent_is_permissive() {
1287        assert!(permits_child("madeUpNode", "anything"));
1288        assert!(permits_child("madeUpNode", "alsoFake"));
1289    }
1290
1291    #[test]
1292    fn unknown_child_inside_known_parent_is_a_violation() {
1293        assert!(!permits_child("paragraph", "madeUpInline"));
1294    }
1295
1296    #[test]
1297    fn nested_expand_distinguished_from_expand() {
1298        assert!(permits_child("nestedExpand", "panel"));
1299        assert!(permits_child("nestedExpand", "blockquote"));
1300        assert!(!permits_child("nestedExpand", "table"));
1301        assert!(!permits_child("nestedExpand", "blockCard"));
1302        assert!(!permits_child("nestedExpand", "embedCard"));
1303        assert!(!permits_child("nestedExpand", "nestedExpand"));
1304        assert!(!permits_child("nestedExpand", "expand"));
1305    }
1306
1307    // ---- Walker behaviour: existing v1 cases -----------------------------
1308
1309    #[test]
1310    fn validate_succeeds_on_known_good_doc() {
1311        let document = doc(vec![
1312            AdfNode::paragraph(vec![AdfNode::text("hello")]),
1313            AdfNode::heading(2, vec![AdfNode::text("world")]),
1314        ]);
1315        assert_eq!(validate_document(&document), vec![]);
1316    }
1317
1318    #[test]
1319    fn validate_finds_expand_inside_panel() {
1320        // panel with [expand]: emits DisallowedChild for the expand AND an
1321        // Arity violation for the panel (panel needs 1+ valid children;
1322        // disallowed children do not satisfy arity).
1323        let bad_panel = panel(vec![with_attrs(
1324            node("expand", vec![AdfNode::paragraph(vec![])]),
1325            serde_json::json!({"title": "x"}),
1326        )]);
1327        let document = doc(vec![bad_panel]);
1328
1329        let violations = validate_document(&document);
1330        let disallowed: Vec<_> = violations
1331            .iter()
1332            .filter(|v| matches!(v, AdfSchemaViolation::DisallowedChild { .. }))
1333            .collect();
1334        let arity: Vec<_> = violations
1335            .iter()
1336            .filter(|v| matches!(v, AdfSchemaViolation::Arity { .. }))
1337            .collect();
1338
1339        assert_eq!(disallowed.len(), 1, "got: {violations:?}");
1340        let (child, parent, path) = unwrap_disallowed(disallowed[0]);
1341        assert_eq!(child, "expand");
1342        assert_eq!(parent, "panel");
1343        assert_eq!(path, [0, 0]);
1344
1345        assert_eq!(arity.len(), 1, "got: {violations:?}");
1346        let (parent, _, _, actual, path) = unwrap_arity(arity[0]);
1347        assert_eq!(parent, "panel");
1348        assert_eq!(actual, 0);
1349        assert_eq!(path, [0]);
1350    }
1351
1352    #[test]
1353    fn validate_finds_expand_inside_table_cell() {
1354        let bad_cell = node(
1355            "tableCell",
1356            vec![with_attrs(
1357                node("expand", vec![AdfNode::paragraph(vec![])]),
1358                serde_json::json!({"title": "x"}),
1359            )],
1360        );
1361        let row = node("tableRow", vec![bad_cell]);
1362        let table = node("table", vec![row]);
1363        let document = doc(vec![table]);
1364
1365        let violations = validate_document(&document);
1366        let disallowed: Vec<_> = violations
1367            .iter()
1368            .filter(|v| matches!(v, AdfSchemaViolation::DisallowedChild { .. }))
1369            .collect();
1370        assert_eq!(disallowed.len(), 1, "got: {violations:?}");
1371        let (child, parent, path) = unwrap_disallowed(disallowed[0]);
1372        assert_eq!(child, "expand");
1373        assert_eq!(parent, "tableCell");
1374        assert_eq!(path, [0, 0, 0, 0]);
1375    }
1376
1377    #[test]
1378    fn validate_walks_into_nested_violations_in_document_order() {
1379        let document = doc(vec![
1380            AdfNode::paragraph(vec![leaf("rule")]),
1381            panel(vec![with_attrs(
1382                node("expand", vec![AdfNode::paragraph(vec![])]),
1383                serde_json::json!({"title": "x"}),
1384            )]),
1385        ]);
1386
1387        let violations = validate_document(&document);
1388        // First violation: rule inside paragraph (DisallowedChild).
1389        // Then: panel's DisallowedChild for expand, panel's Arity (0 valid).
1390        // (Inline-content of paragraph #0 has no further descent because rule
1391        // is a leaf.)
1392        let first = violations.first().expect("at least one");
1393        let (child, parent, _) = unwrap_disallowed(first);
1394        assert_eq!(child, "rule");
1395        assert_eq!(parent, "paragraph");
1396    }
1397
1398    #[test]
1399    fn validate_is_permissive_under_unknown_parents() {
1400        let document = doc(vec![node("futureBlock", vec![node("expand", vec![])])]);
1401        let violations = validate_document(&document);
1402        // futureBlock is not in `doc`'s allowed atoms → DisallowedChild.
1403        // Since `doc` is `*` (lenient), no Arity violation.
1404        // futureBlock's subtree is not walked (unknown parent).
1405        assert_eq!(violations.len(), 1);
1406        let (child, parent, _) = unwrap_disallowed(&violations[0]);
1407        assert_eq!(child, "futureBlock");
1408        assert_eq!(parent, "doc");
1409    }
1410
1411    #[test]
1412    fn unsupported_block_is_universally_accepted_via_walker_escape_hatch() {
1413        for parent in [
1414            "doc",
1415            "panel",
1416            "expand",
1417            "tableCell",
1418            "blockquote",
1419            "listItem",
1420        ] {
1421            assert!(
1422                permits_child(parent, "unsupportedBlock"),
1423                "{parent} should permit unsupportedBlock via the escape hatch"
1424            );
1425            assert!(
1426                !allowed_children(parent).is_some_and(|c| c.contains(&"unsupportedBlock")),
1427                "{parent}'s allowed-children list must not list unsupportedBlock — \
1428                 acceptance comes from the walker escape hatch only"
1429            );
1430        }
1431    }
1432
1433    #[test]
1434    fn unsupported_inline_is_universally_accepted_via_walker_escape_hatch() {
1435        for parent in [
1436            "paragraph",
1437            "heading",
1438            "taskItem",
1439            "decisionItem",
1440            "caption",
1441        ] {
1442            assert!(
1443                permits_child(parent, "unsupportedInline"),
1444                "{parent} should permit unsupportedInline via the escape hatch"
1445            );
1446            assert!(
1447                !allowed_children(parent).is_some_and(|c| c.contains(&"unsupportedInline")),
1448                "{parent}'s allowed-children list must not list unsupportedInline"
1449            );
1450        }
1451    }
1452
1453    #[test]
1454    fn validate_returns_empty_when_doc_type_is_unknown() {
1455        let document = AdfDocument {
1456            version: 1,
1457            doc_type: "futureRoot".to_string(),
1458            content: vec![node("expand", vec![])],
1459        };
1460        assert_eq!(validate_document(&document), vec![]);
1461    }
1462
1463    #[test]
1464    fn walker_does_not_flag_unsupported_block_inside_panel() {
1465        // Panel contains only an unsupportedBlock: counts toward panel's
1466        // arity (so no Arity violation), and the wrapper is universally
1467        // accepted. Should validate cleanly.
1468        let document = doc(vec![panel(vec![leaf("unsupportedBlock")])]);
1469        assert_eq!(validate_document(&document), vec![]);
1470    }
1471
1472    // ---- Walker behaviour: arity (PR #733) -------------------------------
1473
1474    #[test]
1475    fn empty_bullet_list_flagged_as_arity_violation() {
1476        let document = doc(vec![node("bulletList", vec![])]);
1477        let violations = validate_document(&document);
1478        assert_eq!(violations.len(), 1, "got: {violations:?}");
1479        let (parent, atoms, expected, actual, path) = unwrap_arity(&violations[0]);
1480        assert_eq!(parent, "bulletList");
1481        assert_eq!(atoms, &["listItem"]);
1482        assert_eq!(expected, &Quantifier::OneOrMore);
1483        assert_eq!(actual, 0);
1484        assert_eq!(path, [0]);
1485    }
1486
1487    #[test]
1488    fn media_single_with_two_media_flagged_as_arity_violation() {
1489        // mediaSingle requires exactly one media; two media → Arity (too many).
1490        let media_single = node("mediaSingle", vec![media(), media()]);
1491        let document = doc(vec![media_single]);
1492        let violations = validate_document(&document);
1493
1494        let arity: Vec<_> = violations
1495            .iter()
1496            .filter(|v| matches!(v, AdfSchemaViolation::Arity { .. }))
1497            .collect();
1498        assert_eq!(arity.len(), 1, "got: {violations:?}");
1499        let (parent, atoms, expected, actual, _) = unwrap_arity(arity[0]);
1500        assert_eq!(parent, "mediaSingle");
1501        assert_eq!(atoms, &["media"]);
1502        assert_eq!(expected, &Quantifier::Exactly(1));
1503        assert_eq!(actual, 2);
1504    }
1505
1506    #[test]
1507    fn media_single_with_only_caption_flagged_missing_media() {
1508        // mediaSingle: media (caption)? — with [caption] alone, media is
1509        // missing AND caption is out-of-position. We currently emit only the
1510        // missing-media Arity (caption matches term 1 successfully).
1511        let document = doc(vec![node("mediaSingle", vec![leaf("caption")])]);
1512        let violations = validate_document(&document);
1513        let arity: Vec<_> = violations
1514            .iter()
1515            .filter(|v| matches!(v, AdfSchemaViolation::Arity { .. }))
1516            .collect();
1517        assert_eq!(arity.len(), 1, "got: {violations:?}");
1518        let (parent, atoms, expected, actual, _) = unwrap_arity(arity[0]);
1519        assert_eq!(parent, "mediaSingle");
1520        assert_eq!(atoms, &["media"]);
1521        assert_eq!(expected, &Quantifier::Exactly(1));
1522        assert_eq!(actual, 0);
1523    }
1524
1525    #[test]
1526    fn media_single_with_media_then_caption_validates() {
1527        let document = doc(vec![node(
1528            "mediaSingle",
1529            vec![media(), node("caption", vec![AdfNode::text("c")])],
1530        )]);
1531        assert_eq!(validate_document(&document), vec![]);
1532    }
1533
1534    #[test]
1535    fn media_single_with_just_one_media_validates() {
1536        let document = doc(vec![node("mediaSingle", vec![media()])]);
1537        assert_eq!(validate_document(&document), vec![]);
1538    }
1539
1540    #[test]
1541    fn empty_table_row_flagged_arity() {
1542        let document = doc(vec![node("table", vec![node("tableRow", vec![])])]);
1543        let violations = validate_document(&document);
1544        let arity: Vec<_> = violations
1545            .iter()
1546            .filter(|v| matches!(v, AdfSchemaViolation::Arity { .. }))
1547            .collect();
1548        assert_eq!(arity.len(), 1, "got: {violations:?}");
1549        let (parent, atoms, expected, actual, _) = unwrap_arity(arity[0]);
1550        assert_eq!(parent, "tableRow");
1551        assert_eq!(atoms, &["tableCell", "tableHeader"]);
1552        assert_eq!(expected, &Quantifier::OneOrMore);
1553        assert_eq!(actual, 0);
1554    }
1555
1556    #[test]
1557    fn empty_media_group_flagged_arity() {
1558        let document = doc(vec![node("mediaGroup", vec![])]);
1559        let violations = validate_document(&document);
1560        assert_eq!(violations.len(), 1);
1561        let (parent, atoms, expected, actual, _) = unwrap_arity(&violations[0]);
1562        assert_eq!(parent, "mediaGroup");
1563        assert_eq!(atoms, &["media"]);
1564        assert_eq!(expected, &Quantifier::OneOrMore);
1565        assert_eq!(actual, 0);
1566    }
1567
1568    #[test]
1569    fn layout_section_with_one_column_flagged_arity_range() {
1570        let document = doc(vec![node(
1571            "layoutSection",
1572            vec![node(
1573                "layoutColumn",
1574                vec![AdfNode::paragraph(vec![AdfNode::text("a")])],
1575            )],
1576        )]);
1577        let violations = validate_document(&document);
1578        let arity: Vec<_> = violations
1579            .iter()
1580            .filter(|v| matches!(v, AdfSchemaViolation::Arity { .. }))
1581            .collect();
1582        assert_eq!(arity.len(), 1, "got: {violations:?}");
1583        let (parent, atoms, expected, actual, _) = unwrap_arity(arity[0]);
1584        assert_eq!(parent, "layoutSection");
1585        assert_eq!(atoms, &["layoutColumn"]);
1586        assert_eq!(expected, &Quantifier::Range(2, 3));
1587        assert_eq!(actual, 1);
1588    }
1589
1590    #[test]
1591    fn layout_section_with_three_columns_validates() {
1592        let column = || layout_column(vec![AdfNode::paragraph(vec![AdfNode::text("x")])]);
1593        let document = doc(vec![node(
1594            "layoutSection",
1595            vec![column(), column(), column()],
1596        )]);
1597        assert_eq!(validate_document(&document), vec![]);
1598    }
1599
1600    #[test]
1601    fn layout_section_with_four_columns_flagged_too_many() {
1602        let column = || layout_column(vec![AdfNode::paragraph(vec![AdfNode::text("x")])]);
1603        let document = doc(vec![node(
1604            "layoutSection",
1605            vec![column(), column(), column(), column()],
1606        )]);
1607        let violations = validate_document(&document);
1608        let arity: Vec<_> = violations
1609            .iter()
1610            .filter(|v| matches!(v, AdfSchemaViolation::Arity { .. }))
1611            .collect();
1612        assert_eq!(arity.len(), 1, "got: {violations:?}");
1613        let (_, _, expected, actual, _) = unwrap_arity(arity[0]);
1614        assert_eq!(expected, &Quantifier::Range(2, 3));
1615        assert_eq!(actual, 4);
1616    }
1617
1618    #[test]
1619    fn empty_paragraph_validates_under_lenient_inline_star() {
1620        let document = doc(vec![AdfNode::paragraph(vec![])]);
1621        assert_eq!(validate_document(&document), vec![]);
1622    }
1623
1624    #[test]
1625    fn empty_doc_validates_under_lenient_block_star() {
1626        let document = doc(vec![]);
1627        assert_eq!(validate_document(&document), vec![]);
1628    }
1629
1630    #[test]
1631    fn empty_table_cell_validates_under_lenient_block_star() {
1632        let document = doc(vec![node(
1633            "table",
1634            vec![node("tableRow", vec![node("tableCell", vec![])])],
1635        )]);
1636        assert_eq!(validate_document(&document), vec![]);
1637    }
1638
1639    #[test]
1640    fn empty_panel_flagged_arity() {
1641        let document = doc(vec![panel(vec![])]);
1642        let violations = validate_document(&document);
1643        assert_eq!(violations.len(), 1, "got: {violations:?}");
1644        let (parent, _, expected, actual, _) = unwrap_arity(&violations[0]);
1645        assert_eq!(parent, "panel");
1646        assert_eq!(expected, &Quantifier::OneOrMore);
1647        assert_eq!(actual, 0);
1648    }
1649
1650    #[test]
1651    fn unsupported_block_satisfies_parent_arity() {
1652        // panel + with [unsupportedBlock] → no violation (round-trip
1653        // preservation: the wrapper counts toward panel's arity).
1654        let document = doc(vec![panel(vec![leaf("unsupportedBlock")])]);
1655        assert_eq!(validate_document(&document), vec![]);
1656    }
1657
1658    #[test]
1659    fn unsupported_inline_satisfies_inline_parent_arity() {
1660        // taskItem is `inline*` (lenient), so this is trivially OK; the
1661        // assertion is that we don't reject the unsupportedInline. Both
1662        // taskList and taskItem need a localId; taskItem also needs a
1663        // state.
1664        let task_item = with_attrs(
1665            node("taskItem", vec![leaf("unsupportedInline")]),
1666            serde_json::json!({"localId": "ti1", "state": "TODO"}),
1667        );
1668        let task_list = with_attrs(
1669            node("taskList", vec![task_item]),
1670            serde_json::json!({"localId": "tl1"}),
1671        );
1672        let document = doc(vec![task_list]);
1673        assert_eq!(validate_document(&document), vec![]);
1674    }
1675
1676    // ---- Display formatting ----------------------------------------------
1677
1678    #[test]
1679    fn display_format_for_disallowed_child_is_back_compat() {
1680        let v = AdfSchemaViolation::DisallowedChild {
1681            child_type: "expand".into(),
1682            parent_type: "panel".into(),
1683            path: vec![0, 1, 0],
1684        };
1685        assert_eq!(
1686            v.to_string(),
1687            "ADF schema violation at /0/1/0: 'expand' is not permitted inside 'panel'"
1688        );
1689    }
1690
1691    #[test]
1692    fn display_format_for_arity_one_or_more() {
1693        let v = AdfSchemaViolation::Arity {
1694            parent_type: "bulletList".into(),
1695            atoms: vec!["listItem"],
1696            expected: Quantifier::OneOrMore,
1697            actual: 0,
1698            path: vec![1],
1699        };
1700        assert_eq!(
1701            v.to_string(),
1702            "ADF schema violation at /1: 'bulletList' must contain at least one 'listItem' (found 0)"
1703        );
1704    }
1705
1706    #[test]
1707    fn display_format_for_arity_exactly_one() {
1708        let v = AdfSchemaViolation::Arity {
1709            parent_type: "mediaSingle".into(),
1710            atoms: vec!["media"],
1711            expected: Quantifier::Exactly(1),
1712            actual: 2,
1713            path: vec![0],
1714        };
1715        assert_eq!(
1716            v.to_string(),
1717            "ADF schema violation at /0: 'mediaSingle' must contain exactly one 'media' (found 2)"
1718        );
1719    }
1720
1721    #[test]
1722    fn display_format_for_arity_range() {
1723        let v = AdfSchemaViolation::Arity {
1724            parent_type: "layoutSection".into(),
1725            atoms: vec!["layoutColumn"],
1726            expected: Quantifier::Range(2, 3),
1727            actual: 1,
1728            path: vec![0],
1729        };
1730        assert_eq!(
1731            v.to_string(),
1732            "ADF schema violation at /0: 'layoutSection' must contain between 2 and 3 'layoutColumn' (found 1)"
1733        );
1734    }
1735
1736    #[test]
1737    fn display_format_for_arity_alternation() {
1738        let v = AdfSchemaViolation::Arity {
1739            parent_type: "tableRow".into(),
1740            atoms: vec!["tableCell", "tableHeader"],
1741            expected: Quantifier::OneOrMore,
1742            actual: 0,
1743            path: vec![0, 0],
1744        };
1745        assert_eq!(
1746            v.to_string(),
1747            "ADF schema violation at /0/0: 'tableRow' must contain at least one {'tableCell', 'tableHeader'} (found 0)"
1748        );
1749    }
1750
1751    #[test]
1752    fn display_format_for_missing_attr() {
1753        let v = AdfSchemaViolation::MissingAttr {
1754            node_type: "panel".into(),
1755            attr_name: "panelType".into(),
1756            path: vec![0],
1757        };
1758        assert_eq!(
1759            v.to_string(),
1760            "ADF schema violation at /0: 'panel' is missing required attribute 'panelType'"
1761        );
1762    }
1763
1764    #[test]
1765    fn display_format_for_invalid_attr() {
1766        let v = AdfSchemaViolation::InvalidAttr {
1767            node_type: "heading".into(),
1768            attr_name: "level".into(),
1769            problem: crate::atlassian::adf_attr_schema::AttrProblem::OutOfRange {
1770                lo: 1,
1771                hi: 6,
1772                actual: 7,
1773            },
1774            path: vec![0],
1775        };
1776        let s = v.to_string();
1777        assert!(s.contains("'heading.level'"), "got: {s}");
1778        assert!(s.contains("invalid"), "got: {s}");
1779        assert!(s.contains("[1, 6]"), "got: {s}");
1780    }
1781
1782    #[test]
1783    fn display_format_for_disallowed_mark() {
1784        let v = AdfSchemaViolation::DisallowedMark {
1785            mark_type: "code".into(),
1786            parent_type: "heading".into(),
1787            inline_index: Some(0),
1788            path: vec![0, 1],
1789        };
1790        assert_eq!(
1791            v.to_string(),
1792            "ADF schema violation at /0/1: 'code' mark is not permitted on 'heading'"
1793        );
1794    }
1795
1796    #[test]
1797    fn display_format_for_invalid_mark_attr() {
1798        let v = AdfSchemaViolation::InvalidMarkAttr {
1799            mark_type: "link".into(),
1800            attr_name: "href".into(),
1801            problem: crate::atlassian::adf_attr_schema::AttrProblem::BadFormat {
1802                reason: "not a valid URL",
1803            },
1804            inline_index: Some(0),
1805            path: vec![0, 1],
1806        };
1807        let s = v.to_string();
1808        assert!(s.contains("'link' mark"), "got: {s}");
1809        assert!(s.contains("'href'"), "got: {s}");
1810        assert!(s.contains("not a valid URL"), "got: {s}");
1811    }
1812
1813    // ---- Quantifier behaviour --------------------------------------------
1814
1815    #[test]
1816    fn quantifier_satisfied_by() {
1817        assert!(Quantifier::ZeroOrOne.satisfied_by(0));
1818        assert!(Quantifier::ZeroOrOne.satisfied_by(1));
1819        assert!(!Quantifier::ZeroOrOne.satisfied_by(2));
1820
1821        assert!(Quantifier::ZeroOrMore.satisfied_by(0));
1822        assert!(Quantifier::ZeroOrMore.satisfied_by(99));
1823
1824        assert!(!Quantifier::OneOrMore.satisfied_by(0));
1825        assert!(Quantifier::OneOrMore.satisfied_by(1));
1826
1827        assert!(!Quantifier::Exactly(2).satisfied_by(1));
1828        assert!(Quantifier::Exactly(2).satisfied_by(2));
1829        assert!(!Quantifier::Exactly(2).satisfied_by(3));
1830
1831        assert!(!Quantifier::Range(2, 3).satisfied_by(1));
1832        assert!(Quantifier::Range(2, 3).satisfied_by(2));
1833        assert!(Quantifier::Range(2, 3).satisfied_by(3));
1834        assert!(!Quantifier::Range(2, 3).satisfied_by(4));
1835    }
1836
1837    // ── Quantifier::phrasing arm coverage ─────────────────────────────
1838    //
1839    // Each variant has its own phrasing fragment used in
1840    // `AdfSchemaViolation::Arity`'s Display. The fixture-driven Display
1841    // tests above only exercise OneOrMore, Exactly(1), and Range; cover
1842    // the remaining arms (ZeroOrOne, ZeroOrMore, Exactly(n>1)) here so
1843    // future renumbering of the Display wording is caught.
1844
1845    #[test]
1846    fn display_format_for_arity_zero_or_one() {
1847        let v = AdfSchemaViolation::Arity {
1848            parent_type: "mediaSingle".into(),
1849            atoms: vec!["caption"],
1850            expected: Quantifier::ZeroOrOne,
1851            actual: 2,
1852            path: vec![0],
1853        };
1854        assert_eq!(
1855            v.to_string(),
1856            "ADF schema violation at /0: 'mediaSingle' must contain at most one 'caption' (found 2)"
1857        );
1858    }
1859
1860    #[test]
1861    fn display_format_for_arity_zero_or_more() {
1862        // ZeroOrMore is never violated (any count is OK), so the Arity
1863        // variant with ZeroOrMore is not produced by the walker. Construct
1864        // directly to exercise the Display arm.
1865        let v = AdfSchemaViolation::Arity {
1866            parent_type: "paragraph".into(),
1867            atoms: vec!["text"],
1868            expected: Quantifier::ZeroOrMore,
1869            actual: 0,
1870            path: vec![0],
1871        };
1872        assert_eq!(
1873            v.to_string(),
1874            "ADF schema violation at /0: 'paragraph' must contain any number of 'text' (found 0)"
1875        );
1876    }
1877
1878    #[test]
1879    fn display_format_for_arity_exactly_n_greater_than_one() {
1880        let v = AdfSchemaViolation::Arity {
1881            parent_type: "futureNode".into(),
1882            atoms: vec!["child"],
1883            expected: Quantifier::Exactly(3),
1884            actual: 2,
1885            path: vec![0],
1886        };
1887        assert_eq!(
1888            v.to_string(),
1889            "ADF schema violation at /0: 'futureNode' must contain exactly 3 'child' (found 2)"
1890        );
1891    }
1892
1893    // ── path() accessor: every variant returns its path ─────────────────
1894    //
1895    // The match in `path()` uses an or-pattern `A | B | C => path`, so
1896    // each arm needs to be exercised separately to count as covered.
1897
1898    #[test]
1899    fn path_accessor_returns_path_for_each_variant() {
1900        let v1 = AdfSchemaViolation::DisallowedChild {
1901            child_type: "x".into(),
1902            parent_type: "y".into(),
1903            path: vec![1],
1904        };
1905        assert_eq!(v1.path(), &[1]);
1906
1907        let v2 = AdfSchemaViolation::Arity {
1908            parent_type: "y".into(),
1909            atoms: vec!["x"],
1910            expected: Quantifier::OneOrMore,
1911            actual: 0,
1912            path: vec![2],
1913        };
1914        assert_eq!(v2.path(), &[2]);
1915
1916        let v3 = AdfSchemaViolation::MissingAttr {
1917            node_type: "y".into(),
1918            attr_name: "a".into(),
1919            path: vec![3],
1920        };
1921        assert_eq!(v3.path(), &[3]);
1922
1923        let v4 = AdfSchemaViolation::InvalidAttr {
1924            node_type: "y".into(),
1925            attr_name: "a".into(),
1926            problem: crate::atlassian::adf_attr_schema::AttrProblem::WrongType {
1927                expected: "string",
1928            },
1929            path: vec![4],
1930        };
1931        assert_eq!(v4.path(), &[4]);
1932
1933        let v5 = AdfSchemaViolation::DisallowedMark {
1934            mark_type: "code".into(),
1935            parent_type: "heading".into(),
1936            inline_index: Some(0),
1937            path: vec![5],
1938        };
1939        assert_eq!(v5.path(), &[5]);
1940
1941        let v6 = AdfSchemaViolation::InvalidMarkAttr {
1942            mark_type: "link".into(),
1943            attr_name: "href".into(),
1944            problem: crate::atlassian::adf_attr_schema::AttrProblem::BadFormat {
1945                reason: "not a valid URL",
1946            },
1947            inline_index: Some(0),
1948            path: vec![6],
1949        };
1950        assert_eq!(v6.path(), &[6]);
1951    }
1952
1953    /// Allowlist entry: `(parent, upstream_extra_atoms, local_extra_atoms,
1954    /// justification)`. See [`LENIENCY_ALLOWLIST`] below.
1955    type LenientEntry = (
1956        &'static str,
1957        &'static [&'static str],
1958        &'static [&'static str],
1959        &'static str,
1960    );
1961
1962    /// Result of comparing the local and upstream atom maps.
1963    #[derive(Debug, Default)]
1964    struct SchemaAtomDiff {
1965        /// Parents in `CONTENT_ENTRIES` but not in `UPSTREAM_ENTRIES`.
1966        local_only_parents: Vec<&'static str>,
1967        /// Parents in `UPSTREAM_ENTRIES` but not in `CONTENT_ENTRIES`.
1968        upstream_only_parents: Vec<&'static str>,
1969        /// Human-readable per-parent atom-set mismatches that are not
1970        /// covered by the leniency allowlist.
1971        per_parent_unexpected: Vec<String>,
1972    }
1973
1974    impl SchemaAtomDiff {
1975        fn is_clean(&self) -> bool {
1976            self.local_only_parents.is_empty()
1977                && self.upstream_only_parents.is_empty()
1978                && self.per_parent_unexpected.is_empty()
1979        }
1980    }
1981
1982    /// Pure helper: diff two `BTreeMap<&str, BTreeSet<&str>>` views of the
1983    /// schema, accounting for an allowlist of intentional leniencies.
1984    ///
1985    /// Extracted from `generated_upstream_atoms_match_local_snapshot` so the
1986    /// failure-detection branches can be exercised by tests with synthetic
1987    /// inputs (the production maps are intentionally in sync).
1988    fn diff_atom_sets(
1989        local: &std::collections::BTreeMap<&'static str, std::collections::BTreeSet<&'static str>>,
1990        upstream: &std::collections::BTreeMap<
1991            &'static str,
1992            std::collections::BTreeSet<&'static str>,
1993        >,
1994        leniency: &[LenientEntry],
1995    ) -> SchemaAtomDiff {
1996        let local_parents: std::collections::BTreeSet<&'static str> =
1997            local.keys().copied().collect();
1998        let upstream_parents: std::collections::BTreeSet<&'static str> =
1999            upstream.keys().copied().collect();
2000
2001        let mut diff = SchemaAtomDiff {
2002            local_only_parents: local_parents
2003                .difference(&upstream_parents)
2004                .copied()
2005                .collect(),
2006            upstream_only_parents: upstream_parents
2007                .difference(&local_parents)
2008                .copied()
2009                .collect(),
2010            per_parent_unexpected: Vec::new(),
2011        };
2012
2013        for parent in local_parents.intersection(&upstream_parents) {
2014            let l = &local[parent];
2015            let u = &upstream[parent];
2016
2017            let allowed_upstream_extra: std::collections::BTreeSet<&str> = leniency
2018                .iter()
2019                .filter(|(p, _, _, _)| p == parent)
2020                .flat_map(|(_, ue, _, _)| ue.iter().copied())
2021                .collect();
2022            let allowed_local_extra: std::collections::BTreeSet<&str> = leniency
2023                .iter()
2024                .filter(|(p, _, _, _)| p == parent)
2025                .flat_map(|(_, _, le, _)| le.iter().copied())
2026                .collect();
2027
2028            let upstream_extra: Vec<&str> = u
2029                .iter()
2030                .filter(|c| !l.contains(**c) && !allowed_upstream_extra.contains(**c))
2031                .copied()
2032                .collect();
2033            let local_extra: Vec<&str> = l
2034                .iter()
2035                .filter(|c| !u.contains(**c) && !allowed_local_extra.contains(**c))
2036                .copied()
2037                .collect();
2038
2039            if !upstream_extra.is_empty() || !local_extra.is_empty() {
2040                diff.per_parent_unexpected.push(format!(
2041                    "{parent}: upstream_only={upstream_extra:?}, local_only={local_extra:?}"
2042                ));
2043            }
2044        }
2045
2046        diff
2047    }
2048
2049    /// Intentional atom-set leniencies. Each entry is `(parent,
2050    /// upstream_extra_atoms, local_extra_atoms, justification)`. Keep
2051    /// synchronised with the "LENIENT" comments in [`CONTENT_ENTRIES`].
2052    ///
2053    /// All currently-documented leniencies are *quantifier-only* (e.g.
2054    /// `block+` → `block*`), so the atom sets remain identical and this
2055    /// table is empty. If a future leniency adds or drops atoms (e.g.
2056    /// narrowing `listItem` to forbid `taskList`), record it here.
2057    const LENIENCY_ALLOWLIST: &[LenientEntry] = &[];
2058
2059    /// Build the upstream `BTreeMap` view from `generated::UPSTREAM_ENTRIES`.
2060    fn upstream_atom_map(
2061    ) -> std::collections::BTreeMap<&'static str, std::collections::BTreeSet<&'static str>> {
2062        generated::UPSTREAM_ENTRIES
2063            .iter()
2064            .map(|(p, children)| (*p, children.iter().copied().collect()))
2065            .collect()
2066    }
2067
2068    /// Issue #732 — the code-generated upstream-atom snapshot must agree with
2069    /// the hand-maintained [`CONTENT_ENTRIES`] table (modulo a small allowlist
2070    /// of intentional leniency deviations that are quantifier-only and
2071    /// therefore preserve atom-set equality).
2072    ///
2073    /// If this test fails, either:
2074    ///
2075    /// - Upstream `@atlaskit/adf-schema` shipped a content-model change.
2076    ///   Refresh `assets/adf-schema/full.json`, re-run
2077    ///   `cargo run --bin adf-schema-codegen`, update [`CONTENT_ENTRIES`] to
2078    ///   match, and bump [`SCHEMA_VERSION`] / [`UPSTREAM_TARBALL_SHA256`].
2079    /// - You edited [`CONTENT_ENTRIES`] in a way that desynchronises it from
2080    ///   the upstream atoms. Fix the entry, or document a new entry in
2081    ///   `LENIENCY_ALLOWLIST` if the deviation is intentional.
2082    #[test]
2083    fn generated_upstream_atoms_match_local_snapshot() {
2084        let local = local_schema_map();
2085        let upstream = upstream_atom_map();
2086        let diff = diff_atom_sets(&local, &upstream, LENIENCY_ALLOWLIST);
2087        assert!(
2088            diff.is_clean(),
2089            "atom-set drift between CONTENT_ENTRIES and generated::UPSTREAM_ENTRIES:\n\
2090             local_only_parents={:?}\n\
2091             upstream_only_parents={:?}\n\
2092             per_parent_unexpected={:?}",
2093            diff.local_only_parents,
2094            diff.upstream_only_parents,
2095            diff.per_parent_unexpected,
2096        );
2097    }
2098
2099    #[test]
2100    fn diff_atom_sets_reports_clean_when_maps_agree() {
2101        let mut m: std::collections::BTreeMap<
2102            &'static str,
2103            std::collections::BTreeSet<&'static str>,
2104        > = std::collections::BTreeMap::new();
2105        m.insert("panel", ["paragraph", "heading"].into_iter().collect());
2106        let diff = diff_atom_sets(&m, &m.clone(), &[]);
2107        assert!(diff.is_clean());
2108        assert!(diff.local_only_parents.is_empty());
2109        assert!(diff.upstream_only_parents.is_empty());
2110        assert!(diff.per_parent_unexpected.is_empty());
2111    }
2112
2113    #[test]
2114    fn diff_atom_sets_reports_local_only_parents() {
2115        let mut local: std::collections::BTreeMap<
2116            &'static str,
2117            std::collections::BTreeSet<&'static str>,
2118        > = std::collections::BTreeMap::new();
2119        local.insert("legacyNode", std::iter::once("paragraph").collect());
2120        let upstream: std::collections::BTreeMap<
2121            &'static str,
2122            std::collections::BTreeSet<&'static str>,
2123        > = std::collections::BTreeMap::new();
2124        let diff = diff_atom_sets(&local, &upstream, &[]);
2125        assert!(!diff.is_clean());
2126        assert_eq!(diff.local_only_parents, vec!["legacyNode"]);
2127        assert!(diff.upstream_only_parents.is_empty());
2128    }
2129
2130    #[test]
2131    fn diff_atom_sets_reports_upstream_only_parents() {
2132        let local: std::collections::BTreeMap<
2133            &'static str,
2134            std::collections::BTreeSet<&'static str>,
2135        > = std::collections::BTreeMap::new();
2136        let mut upstream: std::collections::BTreeMap<
2137            &'static str,
2138            std::collections::BTreeSet<&'static str>,
2139        > = std::collections::BTreeMap::new();
2140        upstream.insert("newNode", std::iter::once("paragraph").collect());
2141        let diff = diff_atom_sets(&local, &upstream, &[]);
2142        assert!(!diff.is_clean());
2143        assert_eq!(diff.upstream_only_parents, vec!["newNode"]);
2144        assert!(diff.local_only_parents.is_empty());
2145    }
2146
2147    #[test]
2148    fn diff_atom_sets_reports_unexpected_per_parent_diffs() {
2149        let mut local: std::collections::BTreeMap<
2150            &'static str,
2151            std::collections::BTreeSet<&'static str>,
2152        > = std::collections::BTreeMap::new();
2153        local.insert(
2154            "panel",
2155            ["paragraph", "heading"]
2156                .into_iter()
2157                .collect::<std::collections::BTreeSet<_>>(),
2158        );
2159        let mut upstream = local.clone();
2160        upstream.insert("panel", ["paragraph", "blockCard"].into_iter().collect());
2161        let diff = diff_atom_sets(&local, &upstream, &[]);
2162        assert!(!diff.is_clean());
2163        let msg = diff.per_parent_unexpected.join("\n");
2164        assert!(msg.contains("panel"));
2165        assert!(
2166            msg.contains("blockCard"),
2167            "upstream_only should mention blockCard: {msg}"
2168        );
2169        assert!(
2170            msg.contains("heading"),
2171            "local_only should mention heading: {msg}"
2172        );
2173    }
2174
2175    #[test]
2176    fn diff_atom_sets_honours_leniency_allowlist() {
2177        let mut local: std::collections::BTreeMap<
2178            &'static str,
2179            std::collections::BTreeSet<&'static str>,
2180        > = std::collections::BTreeMap::new();
2181        local.insert("panel", ["paragraph", "heading"].into_iter().collect());
2182        let mut upstream: std::collections::BTreeMap<
2183            &'static str,
2184            std::collections::BTreeSet<&'static str>,
2185        > = std::collections::BTreeMap::new();
2186        upstream.insert("panel", ["paragraph", "blockCard"].into_iter().collect());
2187        // Allowlist the exact deviation we just constructed.
2188        let lenient: &[LenientEntry] = &[(
2189            "panel",
2190            &["blockCard"], // upstream-only
2191            &["heading"],   // local-only
2192            "synthetic test deviation",
2193        )];
2194        let diff = diff_atom_sets(&local, &upstream, lenient);
2195        assert!(diff.is_clean(), "allowlist should mask the diff: {diff:?}");
2196    }
2197
2198    #[test]
2199    fn generated_provenance_matches_local_constants() {
2200        assert_eq!(
2201            generated::UPSTREAM_TARBALL_SHA256,
2202            UPSTREAM_TARBALL_SHA256,
2203            "the vendored JSON's provenance SHA must match the runtime constant; \
2204             both are bumped together when the snapshot is refreshed",
2205        );
2206        // SCHEMA_VERSION is `<npm-version>-YYYY-MM-DD`. Strip the trailing
2207        // 11-char date suffix to recover the npm version, which must match
2208        // the version baked into the generated file.
2209        let date_len = "-YYYY-MM-DD".len();
2210        let local_npm_prefix = SCHEMA_VERSION
2211            .get(..SCHEMA_VERSION.len().saturating_sub(date_len))
2212            .unwrap_or(SCHEMA_VERSION);
2213        assert_eq!(
2214            generated::UPSTREAM_VERSION,
2215            local_npm_prefix,
2216            "generated UPSTREAM_VERSION must match the npm-version prefix of SCHEMA_VERSION",
2217        );
2218    }
2219}