Skip to main content

omni_dev/atlassian/adf_schema/
drift.rs

1//! Drift detection between the local `CONTENT_ENTRIES` snapshot and
2//! upstream `@atlaskit/adf-schema`.
3//!
4//! Issue [#731]: a scheduled CI job downloads the latest upstream tarball,
5//! parses `dist/json-schema/v1/full.json` into a per-parent allowed-children
6//! map, and diffs it against the locally-encoded snapshot. The output is
7//! consumed by `bin/adf-schema-drift` and the `.github/workflows/
8//! adf-schema-drift.yml` workflow.
9//!
10//! The parser is intentionally narrow: it understands only the subset of
11//! JSON-schema patterns the upstream artefact actually uses (`anyOf` of
12//! `$ref` items, with optional alias definitions whose own subtree contains
13//! more refs). Any layout change upstream surfaces as a structured error or
14//! visible drift, not as a silent empty diff.
15//!
16//! [#731]: https://github.com/rust-works/omni-dev/issues/731
17
18use std::collections::{BTreeMap, BTreeSet, HashSet};
19use std::io::Read;
20
21use anyhow::{anyhow, Context, Result};
22use serde::Serialize;
23use serde_json::{Map, Value};
24use sha2::{Digest, Sha256};
25
26use super::{local_schema_map, SCHEMA_VERSION, UPSTREAM_TARBALL_SHA256};
27
28/// npm registry endpoint that resolves the `latest` dist-tag for the package.
29const NPM_LATEST_URL: &str = "https://registry.npmjs.org/@atlaskit/adf-schema/latest";
30
31/// Optional env-var override for `NPM_LATEST_URL`.
32///
33/// Honoured by [`fetch_latest_drift_report`]. Used by integration tests to
34/// point the binary at a `wiremock` server, and available as an operational
35/// knob for teams running an npm mirror.
36pub const NPM_LATEST_URL_ENV: &str = "OMNI_DEV_ADF_SCHEMA_LATEST_URL";
37
38/// Per-parent drift: children that upstream now lists but the local snapshot
39/// does not (`added_children`), and children the local snapshot lists but
40/// upstream no longer does (`removed_children`).
41#[derive(Debug, Clone, Default, Serialize, PartialEq, Eq)]
42pub struct ParentDrift {
43    /// Children listed by upstream that the local snapshot does not list.
44    pub added_children: BTreeSet<String>,
45    /// Children listed by the local snapshot that upstream no longer lists.
46    pub removed_children: BTreeSet<String>,
47}
48
49/// Result of a drift comparison between upstream and the locally-encoded
50/// schema.
51///
52/// `version_changed` is true if the upstream npm version differs from the
53/// version embedded in [`SCHEMA_VERSION`] (after stripping the
54/// `-YYYY-MM-DD` transcription-date suffix). `per_parent` lists only parents
55/// that have content-model drift; parents in sync are omitted to keep the
56/// rendered report tight.
57#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
58pub struct DriftReport {
59    /// `version` field from the latest upstream `package.json` (npm).
60    pub upstream_version: String,
61    /// SHA-256 of the upstream tarball bytes we downloaded.
62    pub upstream_tarball_sha256: String,
63    /// Local `SCHEMA_VERSION` (npm version + transcription date).
64    pub local_version: String,
65    /// Local `UPSTREAM_TARBALL_SHA256`.
66    pub local_tarball_sha256: String,
67    /// True iff `upstream_version` differs from the npm-version prefix of
68    /// `local_version`.
69    pub version_changed: bool,
70    /// Parents present on both sides whose allowed-children sets differ.
71    /// Parents fully in sync are omitted.
72    pub per_parent: BTreeMap<String, ParentDrift>,
73    /// Parents listed by upstream that the local snapshot does not have.
74    pub added_parents: BTreeSet<String>,
75    /// Parents listed by the local snapshot that upstream no longer has.
76    pub removed_parents: BTreeSet<String>,
77}
78
79impl DriftReport {
80    /// True iff any per-parent diff or any added/removed parent was found.
81    #[must_use]
82    pub fn has_content_drift(&self) -> bool {
83        !self.per_parent.is_empty()
84            || !self.added_parents.is_empty()
85            || !self.removed_parents.is_empty()
86    }
87
88    /// True iff the upstream version differs OR any content-model drift was
89    /// found. The CI workflow uses this to decide whether to open or update
90    /// a tracking issue.
91    #[must_use]
92    pub fn has_any_drift(&self) -> bool {
93        self.version_changed || self.has_content_drift()
94    }
95
96    /// Render a markdown body suitable for `gh issue create --body-file`.
97    #[must_use]
98    pub fn render_markdown(&self) -> String {
99        let mut out = String::new();
100        out.push_str("# ADF schema drift report\n\n");
101
102        out.push_str("## Version\n\n");
103        out.push_str(&format!(
104            "- Upstream `@atlaskit/adf-schema`: `{}`\n",
105            self.upstream_version
106        ));
107        out.push_str(&format!(
108            "- Upstream tarball SHA-256: `{}`\n",
109            self.upstream_tarball_sha256
110        ));
111        out.push_str(&format!(
112            "- Local `SCHEMA_VERSION`: `{}`\n",
113            self.local_version
114        ));
115        out.push_str(&format!(
116            "- Local `UPSTREAM_TARBALL_SHA256`: `{}`\n",
117            self.local_tarball_sha256
118        ));
119        out.push_str(&format!(
120            "- Version changed: **{}**\n\n",
121            self.version_changed
122        ));
123
124        out.push_str("## Content-model drift\n\n");
125        if !self.has_content_drift() {
126            out.push_str("No content-model changes — version bump only.\n\n");
127        } else {
128            if !self.added_parents.is_empty() {
129                out.push_str("### New parents (upstream only)\n\n");
130                for p in &self.added_parents {
131                    out.push_str(&format!("- `{p}`\n"));
132                }
133                out.push('\n');
134            }
135            if !self.removed_parents.is_empty() {
136                out.push_str("### Removed parents (local only)\n\n");
137                for p in &self.removed_parents {
138                    out.push_str(&format!("- `{p}`\n"));
139                }
140                out.push('\n');
141            }
142            if !self.per_parent.is_empty() {
143                out.push_str("### Per-parent diffs\n\n");
144                for (parent, drift) in &self.per_parent {
145                    out.push_str(&format!("#### `{parent}`\n\n"));
146                    if !drift.added_children.is_empty() {
147                        out.push_str("Added children (upstream only):\n");
148                        for c in &drift.added_children {
149                            out.push_str(&format!("- `{c}`\n"));
150                        }
151                        out.push('\n');
152                    }
153                    if !drift.removed_children.is_empty() {
154                        out.push_str("Removed children (local only):\n");
155                        for c in &drift.removed_children {
156                            out.push_str(&format!("- `{c}`\n"));
157                        }
158                        out.push('\n');
159                    }
160                }
161            }
162        }
163
164        out.push_str("---\n");
165        out.push_str(
166            "_Generated by the `adf-schema-drift` job. To refresh the snapshot, \
167             update `CONTENT_ENTRIES`, `SCHEMA_VERSION`, and `UPSTREAM_TARBALL_SHA256` in \
168             `src/atlassian/adf_schema/mod.rs`._\n",
169        );
170        out
171    }
172
173    /// Render the same report as a JSON object, for machine-readable CI use.
174    #[must_use]
175    pub fn render_json(&self) -> Value {
176        serde_json::to_value(self).unwrap_or(Value::Null)
177    }
178}
179
180/// Fetch the latest upstream tarball, parse its `full.json`, and compute a
181/// drift report against the locally-encoded schema.
182///
183/// Honours the [`NPM_LATEST_URL_ENV`] environment variable as an override
184/// for the registry URL — useful for integration tests and for teams behind
185/// an npm mirror.
186pub async fn fetch_latest_drift_report() -> Result<DriftReport> {
187    let url = std::env::var(NPM_LATEST_URL_ENV).unwrap_or_else(|_| NPM_LATEST_URL.to_string());
188    fetch_drift_report_from_url(&url).await
189}
190
191/// Variant of [`fetch_latest_drift_report`] that takes a configurable
192/// `latest`-dist-tag URL. Tests use this with a `wiremock` server; production
193/// always uses [`NPM_LATEST_URL`].
194async fn fetch_drift_report_from_url(latest_url: &str) -> Result<DriftReport> {
195    let client = reqwest::Client::builder()
196        .user_agent(concat!(
197            "omni-dev-adf-schema-drift/",
198            env!("CARGO_PKG_VERSION")
199        ))
200        .build()
201        .context("building HTTP client")?;
202
203    let meta: Value = client
204        .get(latest_url)
205        .send()
206        .await
207        .context("fetching npm registry latest dist-tag")?
208        .error_for_status()
209        .context("npm registry returned a non-2xx status for latest dist-tag")?
210        .json()
211        .await
212        .context("parsing npm latest dist-tag JSON")?;
213
214    let upstream_version = meta
215        .get("version")
216        .and_then(Value::as_str)
217        .ok_or_else(|| anyhow!("npm latest dist-tag JSON has no `version` field"))?
218        .to_string();
219    let tarball_url = meta
220        .get("dist")
221        .and_then(|d| d.get("tarball"))
222        .and_then(Value::as_str)
223        .ok_or_else(|| anyhow!("npm latest dist-tag JSON has no `dist.tarball` field"))?
224        .to_string();
225
226    let tarball_bytes = client
227        .get(&tarball_url)
228        .send()
229        .await
230        .with_context(|| format!("fetching tarball {tarball_url}"))?
231        .error_for_status()
232        .with_context(|| format!("npm tarball {tarball_url} returned a non-2xx status"))?
233        .bytes()
234        .await
235        .context("reading tarball bytes")?;
236
237    let upstream_sha = hex_encode(&Sha256::digest(&tarball_bytes));
238    let full_json = extract_full_json_from_tarball(&tarball_bytes)
239        .context("extracting dist/json-schema/v1/full.json from tarball")?;
240
241    diff_against_upstream_json_schema(&full_json, &upstream_version, &upstream_sha)
242}
243
244/// Parse the upstream `full.json` and diff against the local snapshot.
245pub fn diff_against_upstream_json_schema(
246    full: &Value,
247    upstream_version: &str,
248    upstream_sha256: &str,
249) -> Result<DriftReport> {
250    let upstream = parse_upstream_full_json(full)?;
251    let local = local_schema_map();
252
253    let local_version_npm = strip_transcription_date(SCHEMA_VERSION);
254    let version_changed = upstream_version != local_version_npm;
255
256    let upstream_parents: BTreeSet<&str> = upstream.keys().map(String::as_str).collect();
257    let local_parents: BTreeSet<&str> = local.keys().copied().collect();
258
259    let added_parents: BTreeSet<String> = upstream_parents
260        .difference(&local_parents)
261        .map(|s| (*s).to_string())
262        .collect();
263    let removed_parents: BTreeSet<String> = local_parents
264        .difference(&upstream_parents)
265        .map(|s| (*s).to_string())
266        .collect();
267
268    let mut per_parent: BTreeMap<String, ParentDrift> = BTreeMap::new();
269    for parent in upstream_parents.intersection(&local_parents).copied() {
270        let upstream_children: &BTreeSet<String> = upstream
271            .get(parent)
272            .ok_or_else(|| anyhow!("internal: parent `{parent}` missing from upstream map"))?;
273        let local_children: &BTreeSet<&'static str> = local
274            .get(parent)
275            .ok_or_else(|| anyhow!("internal: parent `{parent}` missing from local map"))?;
276        let added_children: BTreeSet<String> = upstream_children
277            .iter()
278            .filter(|c| !local_children.contains(c.as_str()))
279            .cloned()
280            .collect();
281        let removed_children: BTreeSet<String> = local_children
282            .iter()
283            .filter(|c| !upstream_children.contains(**c))
284            .map(|s| (*s).to_string())
285            .collect();
286        if !added_children.is_empty() || !removed_children.is_empty() {
287            per_parent.insert(
288                parent.to_string(),
289                ParentDrift {
290                    added_children,
291                    removed_children,
292                },
293            );
294        }
295    }
296
297    Ok(DriftReport {
298        upstream_version: upstream_version.to_string(),
299        upstream_tarball_sha256: upstream_sha256.to_string(),
300        local_version: SCHEMA_VERSION.to_string(),
301        local_tarball_sha256: UPSTREAM_TARBALL_SHA256.to_string(),
302        version_changed,
303        per_parent,
304        added_parents,
305        removed_parents,
306    })
307}
308
309// -----------------------------------------------------------------------------
310// JSON-schema parsing
311// -----------------------------------------------------------------------------
312
313/// Parse `full.json`'s `definitions` into a per-parent allowed-children map.
314///
315/// The shape we accept:
316///
317/// - A "bare-type" definition has `properties.type.const` (or `enum`)
318///   directly readable, e.g. `paragraph_node` → bare type `paragraph`.
319/// - A "marks-overlay" definition uses `allOf [base_ref, marks_extension]`
320///   to add a marks shape to an existing node (e.g.
321///   `formatted_text_inline_node` overlays marks on `text_node`). It
322///   inherits its base's bare type via a fixed-point pass.
323/// - Allowed children are the bare types reachable from any
324///   `properties.content` subtree (including ones nested under `allOf` to
325///   handle defs like `mediaSingle_caption_node`), with alias definitions
326///   flattened transitively.
327///
328/// Exposed `pub` so the `adf-schema-codegen` binary (issue #732) can parse
329/// the vendored `assets/adf-schema/full.json` without copy-pasting this
330/// logic.
331pub fn parse_upstream_full_json(full: &Value) -> Result<BTreeMap<String, BTreeSet<String>>> {
332    let definitions = full
333        .get("definitions")
334        .and_then(Value::as_object)
335        .ok_or_else(|| anyhow!("upstream JSON has no `definitions` object"))?;
336
337    // Pass 1: direct bare types (anything with `properties.type.const|enum`
338    // readable without crossing a `$ref`).
339    let mut def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
340    for (name, def) in definitions {
341        def_to_bare.insert(name.clone(), find_bare_type(def));
342    }
343
344    // Pass 2: fixed-point inheritance. `allOf [{$ref: X}, ...]` inherits
345    // X's bare type, when X has one. Repeat until no change so chains of
346    // length >1 (`A inherits from B inherits from C`) converge.
347    loop {
348        let mut changed = false;
349        for (name, def) in definitions {
350            if def_to_bare
351                .get(name)
352                .is_some_and(std::option::Option::is_some)
353            {
354                continue;
355            }
356            if let Some(inherited) = inherited_bare_type_via_allof(def, &def_to_bare) {
357                def_to_bare.insert(name.clone(), Some(inherited));
358                changed = true;
359            }
360        }
361        if !changed {
362            break;
363        }
364    }
365
366    let mut result: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
367    for (name, def) in definitions {
368        let Some(Some(bare)) = def_to_bare.get(name) else {
369            continue;
370        };
371        let children = definition_content_children(name, def, definitions, &def_to_bare);
372        if !children.is_empty() {
373            result.entry(bare.clone()).or_default().extend(children);
374        }
375    }
376
377    Ok(result)
378}
379
380/// Look for an inherited bare type via an `allOf` whose items include a
381/// `$ref` to a definition whose own bare type is known.
382///
383/// Only `allOf` is followed: it represents "I am all of these"
384/// (`base + extension`), so the base's identity is the natural inheritance
385/// path. `anyOf` / `oneOf` are unions of options, NOT inheritance — collapsing
386/// them to a single bare type would silently drop the other options' types.
387fn inherited_bare_type_via_allof(
388    def: &Value,
389    def_to_bare: &BTreeMap<String, Option<String>>,
390) -> Option<String> {
391    let Value::Object(obj) = def else { return None };
392    let Some(Value::Array(arr)) = obj.get("allOf") else {
393        return None;
394    };
395    for item in arr {
396        let Some(s) = item.get("$ref").and_then(Value::as_str) else {
397            continue;
398        };
399        let Some(target) = s.strip_prefix("#/definitions/") else {
400            continue;
401        };
402        if let Some(Some(bare)) = def_to_bare.get(target) {
403            return Some(bare.clone());
404        }
405    }
406    None
407}
408
409/// Find the bare node type for a definition, if it has one.
410///
411/// Looks at `properties.type.const` and `properties.type.enum[0]`, recursing
412/// through `allOf` / `anyOf` / `oneOf` arrays.
413fn find_bare_type(def: &Value) -> Option<String> {
414    fn walk(v: &Value) -> Option<String> {
415        let Value::Object(obj) = v else { return None };
416        if let Some(Value::Object(props)) = obj.get("properties") {
417            if let Some(Value::Object(t)) = props.get("type") {
418                if let Some(Value::String(s)) = t.get("const") {
419                    return Some(s.clone());
420                }
421                if let Some(Value::Array(arr)) = t.get("enum") {
422                    if let Some(Value::String(s)) = arr.first() {
423                        return Some(s.clone());
424                    }
425                }
426            }
427        }
428        for key in ["allOf", "anyOf", "oneOf"] {
429            if let Some(Value::Array(arr)) = obj.get(key) {
430                for x in arr {
431                    if let Some(s) = walk(x) {
432                        return Some(s);
433                    }
434                }
435            }
436        }
437        None
438    }
439    walk(def)
440}
441
442/// Collect bare-type children reachable from any `properties.content`
443/// subtree of `def`, including ones nested under `allOf` / `oneOf` / `anyOf`.
444fn definition_content_children(
445    def_name: &str,
446    def: &Value,
447    definitions: &Map<String, Value>,
448    def_to_bare: &BTreeMap<String, Option<String>>,
449) -> BTreeSet<String> {
450    let mut subtrees: Vec<&Value> = Vec::new();
451    find_content_subtrees(def, &mut subtrees);
452
453    let mut refs = Vec::new();
454    for subtree in subtrees {
455        collect_refs(subtree, &mut refs);
456    }
457
458    let mut out = BTreeSet::new();
459    for r in refs {
460        let mut visited = HashSet::new();
461        visited.insert(def_name.to_string());
462        resolve_ref_to_bare_types(&r, definitions, def_to_bare, &mut visited, &mut out);
463    }
464    out
465}
466
467/// Walk a definition tree gathering every `properties.content` subtree.
468///
469/// Descends through `allOf` / `oneOf` / `anyOf` siblings (so a marks-overlay
470/// def whose content lives inside an `allOf` extension is still picked up)
471/// but does NOT descend into the values of `properties.*` itself, which
472/// keeps marks/attrs subtrees out of the content-ref search.
473fn find_content_subtrees<'a>(value: &'a Value, out: &mut Vec<&'a Value>) {
474    let Value::Object(obj) = value else { return };
475    if let Some(Value::Object(props)) = obj.get("properties") {
476        if let Some(content) = props.get("content") {
477            out.push(content);
478        }
479    }
480    for key in ["allOf", "oneOf", "anyOf"] {
481        if let Some(Value::Array(arr)) = obj.get(key) {
482            for item in arr {
483                find_content_subtrees(item, out);
484            }
485        }
486    }
487}
488
489/// Resolve a `$ref` target to its bare type(s), flattening alias chains.
490///
491/// Bare-type targets always emit their bare type, including for legitimate
492/// self-references (e.g. `taskList` listing `taskList` as an allowed child) —
493/// reaching the same bare-type def twice is not recursion, just convergence.
494/// Only alias-only chains need the `visited` set to prevent infinite loops.
495fn resolve_ref_to_bare_types(
496    target_def_name: &str,
497    definitions: &Map<String, Value>,
498    def_to_bare: &BTreeMap<String, Option<String>>,
499    visited: &mut HashSet<String>,
500    out: &mut BTreeSet<String>,
501) {
502    if let Some(Some(bare)) = def_to_bare.get(target_def_name) {
503        out.insert(bare.clone());
504        return;
505    }
506    if !visited.insert(target_def_name.to_string()) {
507        return;
508    }
509    let Some(target_def) = definitions.get(target_def_name) else {
510        return;
511    };
512    let mut refs = Vec::new();
513    collect_refs(target_def, &mut refs);
514    for r in refs {
515        resolve_ref_to_bare_types(&r, definitions, def_to_bare, visited, out);
516    }
517}
518
519/// Recursively collect every `#/definitions/<name>` reference under `node`.
520///
521/// Skips subtrees keyed `marks` or `attrs`: those describe mark/attribute
522/// schemas, not content, and their refs (e.g. to `link_mark` whose bare type
523/// would resolve to `"link"`) would otherwise leak in as false-positive
524/// content children when walking an alias definition.
525fn collect_refs(node: &Value, out: &mut Vec<String>) {
526    match node {
527        Value::Object(obj) => {
528            if let Some(Value::String(r)) = obj.get("$ref") {
529                if let Some(name) = r.strip_prefix("#/definitions/") {
530                    out.push(name.to_string());
531                }
532            }
533            for (key, v) in obj {
534                if key == "marks" || key == "attrs" {
535                    continue;
536                }
537                collect_refs(v, out);
538            }
539        }
540        Value::Array(arr) => {
541            for v in arr {
542                collect_refs(v, out);
543            }
544        }
545        _ => {}
546    }
547}
548
549// -----------------------------------------------------------------------------
550// Tarball extraction
551// -----------------------------------------------------------------------------
552
553fn extract_full_json_from_tarball(bytes: &[u8]) -> Result<Value> {
554    let gz = flate2::read::GzDecoder::new(bytes);
555    let mut archive = tar::Archive::new(gz);
556    for entry in archive.entries().context("opening tarball entries")? {
557        let mut entry = entry.context("reading tarball entry header")?;
558        let path_buf = entry
559            .path()
560            .context("decoding tarball entry path")?
561            .into_owned();
562        if path_buf == std::path::Path::new("package/dist/json-schema/v1/full.json") {
563            let mut buf = String::new();
564            entry
565                .read_to_string(&mut buf)
566                .context("reading full.json")?;
567            return serde_json::from_str(&buf).context("parsing full.json");
568        }
569    }
570    Err(anyhow!(
571        "tarball does not contain package/dist/json-schema/v1/full.json"
572    ))
573}
574
575// -----------------------------------------------------------------------------
576// Version-string handling
577// -----------------------------------------------------------------------------
578
579/// Strip the trailing `-YYYY-MM-DD` transcription-date suffix from a
580/// `SCHEMA_VERSION`-style string, leaving the npm version.
581fn strip_transcription_date(s: &str) -> &str {
582    if s.len() < 11 {
583        return s;
584    }
585    let (head, tail) = s.split_at(s.len() - 11);
586    let Some(rest) = tail.strip_prefix('-') else {
587        return s;
588    };
589    let parts: Vec<&str> = rest.split('-').collect();
590    let looks_like_date = parts.len() == 3
591        && parts[0].len() == 4
592        && parts[1].len() == 2
593        && parts[2].len() == 2
594        && parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit()));
595    if looks_like_date {
596        head
597    } else {
598        s
599    }
600}
601
602/// Lower-case hex encoding of a byte slice.
603///
604/// Replaces the `format!("{:x}", Sha256::digest(...))` idiom, which broke when
605/// `sha2` 0.11 changed the digest output type to `hybrid_array::Array`, which
606/// does not implement `LowerHex`.
607#[must_use]
608pub fn hex_encode(bytes: &[u8]) -> String {
609    use std::fmt::Write;
610    bytes
611        .iter()
612        .fold(String::with_capacity(bytes.len() * 2), |mut s, b| {
613            let _ = write!(s, "{b:02x}");
614            s
615        })
616}
617
618#[cfg(test)]
619#[allow(clippy::unwrap_used, clippy::expect_used)]
620mod tests {
621    use super::*;
622    use serde_json::json;
623
624    fn synthesise_full_json_from_local() -> Value {
625        let local = local_schema_map();
626        let parents: BTreeSet<&str> = local.keys().copied().collect();
627        let mut all_types: BTreeSet<&str> = parents.clone();
628        for children in local.values() {
629            for c in children {
630                all_types.insert(*c);
631            }
632        }
633        let leaves: BTreeSet<&str> = all_types.difference(&parents).copied().collect();
634
635        let mut definitions = serde_json::Map::new();
636        for (parent, children) in &local {
637            let any_of: Vec<Value> = children
638                .iter()
639                .map(|c| json!({"$ref": format!("#/definitions/{c}_node")}))
640                .collect();
641            definitions.insert(
642                format!("{parent}_node"),
643                json!({
644                    "properties": {
645                        "type": {"const": parent},
646                        "content": {
647                            "type": "array",
648                            "items": {"anyOf": any_of}
649                        }
650                    }
651                }),
652            );
653        }
654        for leaf in &leaves {
655            definitions.insert(
656                format!("{leaf}_node"),
657                json!({
658                    "properties": {
659                        "type": {"const": leaf}
660                    }
661                }),
662            );
663        }
664        json!({"definitions": Value::Object(definitions)})
665    }
666
667    #[test]
668    fn parses_anyof_refs_into_allowed_children_set() {
669        let full = json!({
670            "definitions": {
671                "blockquote_node": {
672                    "properties": {
673                        "type": {"const": "blockquote"},
674                        "content": {
675                            "type": "array",
676                            "items": {
677                                "anyOf": [
678                                    {"$ref": "#/definitions/paragraph_node"},
679                                    {"$ref": "#/definitions/codeBlock_node"}
680                                ]
681                            }
682                        }
683                    }
684                },
685                "paragraph_node": {"properties": {"type": {"const": "paragraph"}}},
686                "codeBlock_node": {"properties": {"type": {"const": "codeBlock"}}}
687            }
688        });
689        let parsed = parse_upstream_full_json(&full).unwrap();
690        let bq = parsed.get("blockquote").expect("blockquote parsed");
691        let expected: BTreeSet<String> = ["codeBlock", "paragraph"]
692            .into_iter()
693            .map(String::from)
694            .collect();
695        assert_eq!(*bq, expected);
696    }
697
698    #[test]
699    fn alias_definition_is_flattened_transitively() {
700        // tableCell-style: bare def points at an alias; alias resolves to bare types.
701        let full = json!({
702            "definitions": {
703                "tableCell_node": {
704                    "properties": {
705                        "type": {"const": "tableCell"},
706                        "content": {
707                            "type": "array",
708                            "items": {"$ref": "#/definitions/table_cell_content"}
709                        }
710                    }
711                },
712                "table_cell_content": {
713                    "anyOf": [
714                        {"$ref": "#/definitions/paragraph_node"},
715                        {"$ref": "#/definitions/heading_node"}
716                    ]
717                },
718                "paragraph_node": {"properties": {"type": {"const": "paragraph"}}},
719                "heading_node": {"properties": {"type": {"const": "heading"}}}
720            }
721        });
722        let parsed = parse_upstream_full_json(&full).unwrap();
723        let cell = parsed.get("tableCell").expect("tableCell parsed");
724        let expected: BTreeSet<String> = ["heading", "paragraph"]
725            .into_iter()
726            .map(String::from)
727            .collect();
728        assert_eq!(*cell, expected);
729    }
730
731    #[test]
732    fn report_is_empty_when_input_matches_local_entries() {
733        // Round-trip: synthesise a JSON-schema fixture from the local table,
734        // feed it through the parser, and assert no drift. This is a stronger
735        // version of the existing `blockquote_allowed_children_match_upstream_
736        // json_schema` test: it covers every parent in the table at once.
737        let full = synthesise_full_json_from_local();
738        let report = diff_against_upstream_json_schema(
739            &full,
740            strip_transcription_date(SCHEMA_VERSION),
741            "fixture",
742        )
743        .unwrap();
744        assert!(
745            !report.has_content_drift(),
746            "synthesised-from-local fixture should produce no content drift, got: {report:#?}"
747        );
748        assert!(report.added_parents.is_empty());
749        assert!(report.removed_parents.is_empty());
750        assert!(report.per_parent.is_empty());
751        assert!(!report.version_changed);
752    }
753
754    #[test]
755    fn report_flags_added_and_removed_children() {
756        let mut full = synthesise_full_json_from_local();
757        // Add a child to blockquote that the local table doesn't list.
758        let bq_items = full
759            .pointer_mut("/definitions/blockquote_node/properties/content/items/anyOf")
760            .unwrap()
761            .as_array_mut()
762            .unwrap();
763        bq_items.push(json!({"$ref": "#/definitions/madeUpBlock_node"}));
764        full.pointer_mut("/definitions")
765            .unwrap()
766            .as_object_mut()
767            .unwrap()
768            .insert(
769                "madeUpBlock_node".to_string(),
770                json!({"properties": {"type": {"const": "madeUpBlock"}}}),
771            );
772        // Remove a child from panel.
773        let panel_items = full
774            .pointer_mut("/definitions/panel_node/properties/content/items/anyOf")
775            .unwrap()
776            .as_array_mut()
777            .unwrap();
778        panel_items.retain(|v| {
779            v.get("$ref").and_then(Value::as_str) != Some("#/definitions/paragraph_node")
780        });
781
782        let report =
783            diff_against_upstream_json_schema(&full, "fixture-version", "fixture").unwrap();
784        assert!(report.has_content_drift());
785
786        let bq = report
787            .per_parent
788            .get("blockquote")
789            .expect("blockquote drift present");
790        assert_eq!(
791            bq.added_children,
792            std::iter::once("madeUpBlock").map(String::from).collect()
793        );
794        assert!(bq.removed_children.is_empty());
795
796        let panel = report.per_parent.get("panel").expect("panel drift present");
797        assert!(panel.added_children.is_empty());
798        assert_eq!(
799            panel.removed_children,
800            std::iter::once("paragraph").map(String::from).collect()
801        );
802    }
803
804    #[test]
805    fn report_flags_added_and_removed_parents() {
806        let mut full = synthesise_full_json_from_local();
807        // Remove a parent definition (`expand`) from upstream entirely.
808        full.pointer_mut("/definitions")
809            .unwrap()
810            .as_object_mut()
811            .unwrap()
812            .remove("expand_node");
813        // Add a parent definition that the local table doesn't have.
814        full.pointer_mut("/definitions")
815            .unwrap()
816            .as_object_mut()
817            .unwrap()
818            .insert(
819                "futureBlock_node".to_string(),
820                json!({
821                    "properties": {
822                        "type": {"const": "futureBlock"},
823                        "content": {
824                            "type": "array",
825                            "items": {
826                                "anyOf": [
827                                    {"$ref": "#/definitions/paragraph_node"}
828                                ]
829                            }
830                        }
831                    }
832                }),
833            );
834
835        let report = diff_against_upstream_json_schema(&full, "fixture", "fixture").unwrap();
836        assert!(report.added_parents.contains("futureBlock"));
837        assert!(report.removed_parents.contains("expand"));
838    }
839
840    #[test]
841    fn version_changed_distinguishes_npm_version_from_transcription_date() {
842        let full = synthesise_full_json_from_local();
843        // Same as local: not changed.
844        let r = diff_against_upstream_json_schema(
845            &full,
846            strip_transcription_date(SCHEMA_VERSION),
847            "fixture",
848        )
849        .unwrap();
850        assert!(!r.version_changed);
851
852        // Different: changed.
853        let r = diff_against_upstream_json_schema(&full, "999.0.0", "fixture").unwrap();
854        assert!(r.version_changed);
855    }
856
857    #[test]
858    fn strip_transcription_date_handles_yyyy_mm_dd_suffix() {
859        assert_eq!(strip_transcription_date("52.9.5-2026-05-10"), "52.9.5");
860        assert_eq!(strip_transcription_date("52.9.5"), "52.9.5");
861        assert_eq!(strip_transcription_date("52.9.5-rc1"), "52.9.5-rc1");
862        assert_eq!(
863            strip_transcription_date("52.9.5-rc1-2026-05-10"),
864            "52.9.5-rc1"
865        );
866        assert_eq!(strip_transcription_date(""), "");
867    }
868
869    #[test]
870    fn strip_transcription_date_returns_input_when_suffix_lacks_leading_dash() {
871        // 12+ chars but no `-` at the suffix-start position: returns input.
872        assert_eq!(strip_transcription_date("abcdefghijkl"), "abcdefghijkl");
873    }
874
875    #[test]
876    fn strip_transcription_date_returns_input_when_suffix_is_not_a_date() {
877        // 14 chars; tail = "-1234-XX-YY", parts non-numeric → returns input.
878        assert_eq!(strip_transcription_date("abc-1234-XX-YY"), "abc-1234-XX-YY");
879    }
880
881    #[test]
882    fn render_markdown_is_terse_when_no_drift() {
883        let full = synthesise_full_json_from_local();
884        let report = diff_against_upstream_json_schema(
885            &full,
886            strip_transcription_date(SCHEMA_VERSION),
887            "fixture",
888        )
889        .unwrap();
890        let md = report.render_markdown();
891        assert!(md.contains("No content-model changes"));
892        assert!(!md.contains("Per-parent diffs"));
893    }
894
895    #[test]
896    fn render_markdown_includes_per_parent_diffs() {
897        let mut full = synthesise_full_json_from_local();
898        let bq_items = full
899            .pointer_mut("/definitions/blockquote_node/properties/content/items/anyOf")
900            .unwrap()
901            .as_array_mut()
902            .unwrap();
903        bq_items.push(json!({"$ref": "#/definitions/text_node"}));
904        let report = diff_against_upstream_json_schema(&full, "fixture", "fixture").unwrap();
905        let md = report.render_markdown();
906        assert!(md.contains("Per-parent diffs"));
907        assert!(md.contains("`blockquote`"));
908        assert!(md.contains("text"));
909    }
910
911    #[test]
912    fn render_json_is_serializable() {
913        let full = synthesise_full_json_from_local();
914        let report = diff_against_upstream_json_schema(&full, "fixture", "fixture").unwrap();
915        let v = report.render_json();
916        assert!(v.is_object());
917        assert!(v.get("upstream_version").is_some());
918        assert!(v.get("per_parent").is_some());
919    }
920
921    // ---- find_bare_type variants -----------------------------------------
922
923    #[test]
924    fn find_bare_type_recognises_enum_array() {
925        // Some upstream defs use `"enum": ["nodeName"]` instead of
926        // `"const": "nodeName"` — both must resolve to the bare type.
927        let def = json!({"properties": {"type": {"enum": ["paragraph"]}}});
928        assert_eq!(find_bare_type(&def).as_deref(), Some("paragraph"));
929    }
930
931    #[test]
932    fn find_bare_type_walks_into_oneof_for_nested_const() {
933        let def = json!({
934            "oneOf": [
935                {"properties": {"type": {"const": "nestedExpand"}}},
936                {"properties": {"type": {"const": "ignoredVariant"}}}
937            ]
938        });
939        // First match wins.
940        assert_eq!(find_bare_type(&def).as_deref(), Some("nestedExpand"));
941    }
942
943    #[test]
944    fn find_bare_type_returns_none_for_nodes_with_no_type_const() {
945        let def = json!({"anyOf": [{"$ref": "#/definitions/x"}]});
946        assert_eq!(find_bare_type(&def), None);
947    }
948
949    #[test]
950    fn find_bare_type_returns_none_when_enum_is_empty_or_non_string() {
951        // Empty enum array: the `first()` Option is None.
952        let empty = json!({"properties": {"type": {"enum": []}}});
953        assert_eq!(find_bare_type(&empty), None);
954        // Enum with a non-string head element: the Value::String pattern fails.
955        let non_string = json!({"properties": {"type": {"enum": [42]}}});
956        assert_eq!(find_bare_type(&non_string), None);
957    }
958
959    #[test]
960    fn find_bare_type_returns_none_on_non_object() {
961        assert_eq!(find_bare_type(&json!(null)), None);
962        assert_eq!(find_bare_type(&json!([])), None);
963        assert_eq!(find_bare_type(&json!("string")), None);
964    }
965
966    // ---- inherited_bare_type_via_allof -----------------------------------
967
968    #[test]
969    fn inheritance_via_allof_finds_known_base() {
970        let def = json!({
971            "allOf": [
972                {"$ref": "#/definitions/paragraph_node"},
973                {"properties": {"marks": {}}}
974            ]
975        });
976        let mut def_to_bare = BTreeMap::new();
977        def_to_bare.insert("paragraph_node".to_string(), Some("paragraph".to_string()));
978        assert_eq!(
979            inherited_bare_type_via_allof(&def, &def_to_bare).as_deref(),
980            Some("paragraph")
981        );
982    }
983
984    #[test]
985    fn inheritance_via_allof_returns_none_when_no_allof() {
986        let def = json!({"anyOf": [{"$ref": "#/definitions/x_node"}]});
987        let mut def_to_bare = BTreeMap::new();
988        def_to_bare.insert("x_node".to_string(), Some("x".to_string()));
989        // anyOf is intentionally NOT followed for inheritance.
990        assert_eq!(inherited_bare_type_via_allof(&def, &def_to_bare), None);
991    }
992
993    #[test]
994    fn inheritance_via_allof_returns_none_when_target_unknown() {
995        let def = json!({"allOf": [{"$ref": "#/definitions/unknown_node"}]});
996        let def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
997        assert_eq!(inherited_bare_type_via_allof(&def, &def_to_bare), None);
998    }
999
1000    #[test]
1001    fn inheritance_via_allof_skips_items_without_ref_or_with_external_ref() {
1002        // Items inside `allOf` may be plain objects (no `$ref`) — skip.
1003        // Items with `$ref` not pointing into `#/definitions/` — also skip.
1004        let def = json!({
1005            "allOf": [
1006                {"properties": {"marks": {}}},                  // no $ref
1007                {"$ref": "https://example.com/external.json"},   // external ref
1008                {"$ref": "#/definitions/paragraph_node"}         // valid; should be picked
1009            ]
1010        });
1011        let mut def_to_bare = BTreeMap::new();
1012        def_to_bare.insert("paragraph_node".to_string(), Some("paragraph".to_string()));
1013        assert_eq!(
1014            inherited_bare_type_via_allof(&def, &def_to_bare).as_deref(),
1015            Some("paragraph")
1016        );
1017    }
1018
1019    #[test]
1020    fn inheritance_via_allof_returns_none_when_only_non_ref_items() {
1021        // No item carries a usable `$ref` — function returns None.
1022        let def = json!({
1023            "allOf": [
1024                {"properties": {"marks": {}}},
1025                {"$ref": "https://example.com/external.json"}
1026            ]
1027        });
1028        let def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
1029        assert_eq!(inherited_bare_type_via_allof(&def, &def_to_bare), None);
1030    }
1031
1032    #[test]
1033    fn inheritance_via_allof_returns_none_for_non_object_input() {
1034        // Defensive `let Value::Object(obj) = def else { return None }`.
1035        let def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
1036        assert_eq!(
1037            inherited_bare_type_via_allof(&json!(null), &def_to_bare),
1038            None
1039        );
1040        assert_eq!(
1041            inherited_bare_type_via_allof(&json!([]), &def_to_bare),
1042            None
1043        );
1044        assert_eq!(
1045            inherited_bare_type_via_allof(&json!("string"), &def_to_bare),
1046            None
1047        );
1048    }
1049
1050    #[test]
1051    fn inheritance_via_allof_handles_marks_overlay_pattern() {
1052        // Real-world pattern: formatted_text_inline_node.
1053        let full = json!({
1054            "definitions": {
1055                "paragraph_node": {
1056                    "properties": {
1057                        "type": {"const": "paragraph"},
1058                        "content": {
1059                            "type": "array",
1060                            "items": {"$ref": "#/definitions/formatted_text_inline_node"}
1061                        }
1062                    }
1063                },
1064                "text_node": {"properties": {"type": {"const": "text"}}},
1065                "formatted_text_inline_node": {
1066                    "allOf": [
1067                        {"$ref": "#/definitions/text_node"},
1068                        {
1069                            "properties": {
1070                                "marks": {
1071                                    "type": "array",
1072                                    "items": {
1073                                        "anyOf": [
1074                                            {"$ref": "#/definitions/link_mark"}
1075                                        ]
1076                                    }
1077                                }
1078                            }
1079                        }
1080                    ]
1081                },
1082                "link_mark": {"properties": {"type": {"const": "link"}}}
1083            }
1084        });
1085        let parsed = parse_upstream_full_json(&full).unwrap();
1086        // paragraph's children must be exactly {text}; the link mark must
1087        // NOT leak in via the marks subtree.
1088        let p = parsed.get("paragraph").expect("paragraph present");
1089        let expected: BTreeSet<String> = std::iter::once("text").map(String::from).collect();
1090        assert_eq!(*p, expected);
1091    }
1092
1093    // ---- parse error paths -----------------------------------------------
1094
1095    #[test]
1096    fn parse_returns_error_when_definitions_missing() {
1097        let full = json!({"foo": "bar"});
1098        let err = parse_upstream_full_json(&full).unwrap_err();
1099        assert!(err.to_string().contains("definitions"));
1100    }
1101
1102    #[test]
1103    fn diff_propagates_parse_error_when_definitions_missing() {
1104        let err = diff_against_upstream_json_schema(&json!({}), "1.0.0", "sha").unwrap_err();
1105        assert!(err.to_string().contains("definitions"));
1106    }
1107
1108    // ---- collect_refs / find_content_subtrees corner cases ---------------
1109
1110    #[test]
1111    fn collect_refs_skips_marks_and_attrs_subtrees() {
1112        let v = json!({
1113            "properties": {
1114                "content": {"$ref": "#/definitions/keep_me"},
1115                "marks": {"$ref": "#/definitions/skip_me_mark"},
1116                "attrs": {"$ref": "#/definitions/skip_me_attrs"}
1117            }
1118        });
1119        let mut refs = Vec::new();
1120        collect_refs(&v, &mut refs);
1121        assert!(refs.contains(&"keep_me".to_string()));
1122        assert!(!refs.contains(&"skip_me_mark".to_string()));
1123        assert!(!refs.contains(&"skip_me_attrs".to_string()));
1124    }
1125
1126    #[test]
1127    fn collect_refs_handles_arrays_of_refs() {
1128        let v = json!([
1129            {"$ref": "#/definitions/a"},
1130            {"$ref": "#/definitions/b"},
1131            {"$ref": "https://example.com/schema"}, // non-#/definitions, ignored
1132        ]);
1133        let mut refs = Vec::new();
1134        collect_refs(&v, &mut refs);
1135        assert_eq!(refs, vec!["a".to_string(), "b".to_string()]);
1136    }
1137
1138    #[test]
1139    fn find_content_subtrees_returns_nothing_for_non_object_input() {
1140        // Defensive `let Value::Object(obj) = value else { return }`.
1141        let null = json!(null);
1142        let array = json!([]);
1143        let string = json!("string");
1144        let mut subtrees = Vec::new();
1145        find_content_subtrees(&null, &mut subtrees);
1146        find_content_subtrees(&array, &mut subtrees);
1147        find_content_subtrees(&string, &mut subtrees);
1148        assert!(subtrees.is_empty());
1149    }
1150
1151    #[test]
1152    fn find_content_subtrees_picks_up_content_nested_in_allof() {
1153        // mediaSingle_caption_node-style: bare type comes via $ref, content
1154        // sits inside an `allOf` extension.
1155        let def = json!({
1156            "allOf": [
1157                {"$ref": "#/definitions/mediaSingle_node"},
1158                {
1159                    "properties": {
1160                        "content": {
1161                            "type": "array",
1162                            "items": [
1163                                {"$ref": "#/definitions/media_node"},
1164                                {"$ref": "#/definitions/caption_node"}
1165                            ]
1166                        }
1167                    }
1168                }
1169            ]
1170        });
1171        let mut subtrees = Vec::new();
1172        find_content_subtrees(&def, &mut subtrees);
1173        assert_eq!(subtrees.len(), 1);
1174    }
1175
1176    // ---- resolve cycle protection ----------------------------------------
1177
1178    #[test]
1179    fn resolve_handles_alias_cycles_without_infinite_loop() {
1180        let mut definitions = serde_json::Map::new();
1181        definitions.insert(
1182            "alias_a".to_string(),
1183            json!({"anyOf": [{"$ref": "#/definitions/alias_b"}]}),
1184        );
1185        definitions.insert(
1186            "alias_b".to_string(),
1187            json!({"anyOf": [{"$ref": "#/definitions/alias_a"}]}),
1188        );
1189        let mut def_to_bare = BTreeMap::new();
1190        def_to_bare.insert("alias_a".to_string(), None);
1191        def_to_bare.insert("alias_b".to_string(), None);
1192
1193        let mut visited = HashSet::new();
1194        let mut out = BTreeSet::new();
1195        // Should terminate (no infinite recursion).
1196        resolve_ref_to_bare_types(
1197            "alias_a",
1198            &definitions,
1199            &def_to_bare,
1200            &mut visited,
1201            &mut out,
1202        );
1203        assert!(out.is_empty());
1204    }
1205
1206    #[test]
1207    fn resolve_returns_silently_when_target_not_in_definitions() {
1208        let definitions = serde_json::Map::new();
1209        let mut def_to_bare = BTreeMap::new();
1210        def_to_bare.insert("ghost".to_string(), None);
1211        let mut visited = HashSet::new();
1212        let mut out = BTreeSet::new();
1213        resolve_ref_to_bare_types("ghost", &definitions, &def_to_bare, &mut visited, &mut out);
1214        assert!(out.is_empty());
1215    }
1216
1217    // ---- markdown rendering branches -------------------------------------
1218
1219    fn report_with(
1220        added_parents: BTreeSet<String>,
1221        removed_parents: BTreeSet<String>,
1222        per_parent: BTreeMap<String, ParentDrift>,
1223        version_changed: bool,
1224    ) -> DriftReport {
1225        DriftReport {
1226            upstream_version: "9.9.9".to_string(),
1227            upstream_tarball_sha256: "up-sha".to_string(),
1228            local_version: "1.0.0-2026-01-01".to_string(),
1229            local_tarball_sha256: "local-sha".to_string(),
1230            version_changed,
1231            per_parent,
1232            added_parents,
1233            removed_parents,
1234        }
1235    }
1236
1237    #[test]
1238    fn render_markdown_renders_added_parents_section() {
1239        let added: BTreeSet<String> = std::iter::once("futureNode").map(String::from).collect();
1240        let report = report_with(added, BTreeSet::new(), BTreeMap::new(), true);
1241        let md = report.render_markdown();
1242        assert!(md.contains("New parents (upstream only)"));
1243        assert!(md.contains("`futureNode`"));
1244        assert!(!md.contains("Removed parents"));
1245    }
1246
1247    #[test]
1248    fn render_markdown_renders_removed_parents_section() {
1249        let removed: BTreeSet<String> = std::iter::once("oldNode").map(String::from).collect();
1250        let report = report_with(BTreeSet::new(), removed, BTreeMap::new(), false);
1251        let md = report.render_markdown();
1252        assert!(md.contains("Removed parents (local only)"));
1253        assert!(md.contains("`oldNode`"));
1254        assert!(!md.contains("New parents"));
1255    }
1256
1257    #[test]
1258    fn render_markdown_renders_only_added_children_when_no_removed() {
1259        let mut per = BTreeMap::new();
1260        per.insert(
1261            "blockquote".to_string(),
1262            ParentDrift {
1263                added_children: std::iter::once("newChild").map(String::from).collect(),
1264                removed_children: BTreeSet::new(),
1265            },
1266        );
1267        let report = report_with(BTreeSet::new(), BTreeSet::new(), per, false);
1268        let md = report.render_markdown();
1269        assert!(md.contains("Added children"));
1270        assert!(!md.contains("Removed children"));
1271        assert!(md.contains("`newChild`"));
1272    }
1273
1274    #[test]
1275    fn render_markdown_renders_only_removed_children_when_no_added() {
1276        let mut per = BTreeMap::new();
1277        per.insert(
1278            "panel".to_string(),
1279            ParentDrift {
1280                added_children: BTreeSet::new(),
1281                removed_children: std::iter::once("goneChild").map(String::from).collect(),
1282            },
1283        );
1284        let report = report_with(BTreeSet::new(), BTreeSet::new(), per, false);
1285        let md = report.render_markdown();
1286        assert!(!md.contains("Added children"));
1287        assert!(md.contains("Removed children"));
1288        assert!(md.contains("`goneChild`"));
1289    }
1290
1291    // ---- has_any_drift / has_content_drift -------------------------------
1292
1293    #[test]
1294    fn has_any_drift_true_when_only_version_changed() {
1295        let report = report_with(BTreeSet::new(), BTreeSet::new(), BTreeMap::new(), true);
1296        assert!(!report.has_content_drift());
1297        assert!(report.has_any_drift());
1298    }
1299
1300    #[test]
1301    fn has_any_drift_true_when_only_added_parents() {
1302        let added: BTreeSet<String> = std::iter::once("x").map(String::from).collect();
1303        let report = report_with(added, BTreeSet::new(), BTreeMap::new(), false);
1304        assert!(report.has_content_drift());
1305        assert!(report.has_any_drift());
1306    }
1307
1308    #[test]
1309    fn has_any_drift_true_when_only_removed_parents() {
1310        let removed: BTreeSet<String> = std::iter::once("x").map(String::from).collect();
1311        let report = report_with(BTreeSet::new(), removed, BTreeMap::new(), false);
1312        assert!(report.has_content_drift());
1313        assert!(report.has_any_drift());
1314    }
1315
1316    // ---- tarball extraction ----------------------------------------------
1317
1318    fn build_synthetic_tarball(entries: &[(&str, &[u8])]) -> Vec<u8> {
1319        let mut gz = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
1320        {
1321            let mut builder = tar::Builder::new(&mut gz);
1322            for (path, body) in entries {
1323                let mut header = tar::Header::new_gnu();
1324                header.set_path(path).unwrap();
1325                header.set_size(body.len() as u64);
1326                header.set_mode(0o644);
1327                header.set_cksum();
1328                builder.append(&header, *body).unwrap();
1329            }
1330            builder.finish().unwrap();
1331        }
1332        gz.finish().unwrap()
1333    }
1334
1335    #[test]
1336    fn extract_full_json_succeeds_when_path_present() {
1337        let body = serde_json::to_vec(&json!({"definitions": {}})).unwrap();
1338        let bytes =
1339            build_synthetic_tarball(&[("package/dist/json-schema/v1/full.json", body.as_slice())]);
1340        let parsed = extract_full_json_from_tarball(&bytes).unwrap();
1341        assert!(parsed.get("definitions").is_some());
1342    }
1343
1344    #[test]
1345    fn extract_full_json_errors_when_path_missing() {
1346        let bytes = build_synthetic_tarball(&[("package/README.md", b"hello")]);
1347        let err = extract_full_json_from_tarball(&bytes).unwrap_err();
1348        assert!(err.to_string().contains("does not contain"));
1349    }
1350
1351    #[test]
1352    fn extract_full_json_errors_on_invalid_gzip() {
1353        let bytes = b"not a gzip stream";
1354        let err = extract_full_json_from_tarball(bytes).unwrap_err();
1355        // Some error from flate2/tar surfaces as the cause; we only assert
1356        // an error is returned.
1357        let _ = err;
1358    }
1359
1360    #[test]
1361    fn extract_full_json_errors_when_payload_is_not_json() {
1362        let bytes =
1363            build_synthetic_tarball(&[("package/dist/json-schema/v1/full.json", b"not json{")]);
1364        let err = extract_full_json_from_tarball(&bytes).unwrap_err();
1365        assert!(err.to_string().contains("parsing full.json"));
1366    }
1367
1368    // ---- end-to-end fetch via wiremock -----------------------------------
1369
1370    #[tokio::test]
1371    async fn fetch_drift_report_from_url_handles_clean_upstream() {
1372        let server = wiremock::MockServer::start().await;
1373        let full = synthesise_full_json_from_local();
1374        let tarball = build_synthetic_tarball(&[(
1375            "package/dist/json-schema/v1/full.json",
1376            serde_json::to_vec(&full).unwrap().as_slice(),
1377        )]);
1378        let tarball_url = format!("{}/-/adf-schema-fixture.tgz", server.uri());
1379
1380        wiremock::Mock::given(wiremock::matchers::path("/latest"))
1381            .respond_with(wiremock::ResponseTemplate::new(200).set_body_json(json!({
1382                "version": strip_transcription_date(SCHEMA_VERSION),
1383                "dist": {"tarball": tarball_url}
1384            })))
1385            .mount(&server)
1386            .await;
1387        wiremock::Mock::given(wiremock::matchers::path("/-/adf-schema-fixture.tgz"))
1388            .respond_with(wiremock::ResponseTemplate::new(200).set_body_bytes(tarball))
1389            .mount(&server)
1390            .await;
1391
1392        let report = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1393            .await
1394            .unwrap();
1395        assert!(!report.version_changed);
1396        assert!(!report.has_content_drift());
1397    }
1398
1399    #[tokio::test]
1400    async fn fetch_drift_report_from_url_errors_when_metadata_lacks_version() {
1401        let server = wiremock::MockServer::start().await;
1402        wiremock::Mock::given(wiremock::matchers::path("/latest"))
1403            .respond_with(
1404                wiremock::ResponseTemplate::new(200)
1405                    .set_body_json(json!({"dist": {"tarball": "x"}})),
1406            )
1407            .mount(&server)
1408            .await;
1409        let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1410            .await
1411            .unwrap_err();
1412        assert!(err.to_string().contains("`version` field"));
1413    }
1414
1415    #[tokio::test]
1416    async fn fetch_drift_report_from_url_errors_when_metadata_lacks_tarball() {
1417        let server = wiremock::MockServer::start().await;
1418        wiremock::Mock::given(wiremock::matchers::path("/latest"))
1419            .respond_with(
1420                wiremock::ResponseTemplate::new(200).set_body_json(json!({"version": "1.0.0"})),
1421            )
1422            .mount(&server)
1423            .await;
1424        let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1425            .await
1426            .unwrap_err();
1427        assert!(err.to_string().contains("`dist.tarball` field"));
1428    }
1429
1430    #[tokio::test]
1431    async fn fetch_drift_report_from_url_errors_when_metadata_is_not_json() {
1432        let server = wiremock::MockServer::start().await;
1433        wiremock::Mock::given(wiremock::matchers::path("/latest"))
1434            .respond_with(
1435                wiremock::ResponseTemplate::new(200)
1436                    .insert_header("content-type", "application/json")
1437                    .set_body_string("not json{"),
1438            )
1439            .mount(&server)
1440            .await;
1441        let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1442            .await
1443            .unwrap_err();
1444        assert!(err.to_string().contains("parsing npm latest dist-tag JSON"));
1445    }
1446
1447    #[tokio::test]
1448    async fn fetch_drift_report_from_url_errors_on_connection_refused() {
1449        // Bind to find a free local port, then drop the listener so the
1450        // port is no longer accepting connections. reqwest's `send().await`
1451        // hits a connection-refused error, exercising the underlying error
1452        // context wrapper that wiremock 200/4xx/5xx tests can't reach.
1453        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1454        let port = listener.local_addr().unwrap().port();
1455        drop(listener);
1456        let url = format!("http://127.0.0.1:{port}/latest");
1457        let err = fetch_drift_report_from_url(&url).await.unwrap_err();
1458        assert!(
1459            err.to_string().contains("fetching npm registry"),
1460            "unexpected error: {err}"
1461        );
1462    }
1463
1464    #[tokio::test]
1465    async fn fetch_drift_report_from_url_errors_on_npm_5xx() {
1466        let server = wiremock::MockServer::start().await;
1467        wiremock::Mock::given(wiremock::matchers::path("/latest"))
1468            .respond_with(wiremock::ResponseTemplate::new(503))
1469            .mount(&server)
1470            .await;
1471        let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1472            .await
1473            .unwrap_err();
1474        assert!(err
1475            .to_string()
1476            .contains("non-2xx status for latest dist-tag"));
1477    }
1478
1479    #[tokio::test]
1480    async fn fetch_drift_report_from_url_errors_on_tarball_5xx() {
1481        let server = wiremock::MockServer::start().await;
1482        let tarball_url = format!("{}/-/x.tgz", server.uri());
1483        wiremock::Mock::given(wiremock::matchers::path("/latest"))
1484            .respond_with(wiremock::ResponseTemplate::new(200).set_body_json(json!({
1485                "version": "1.0.0",
1486                "dist": {"tarball": tarball_url}
1487            })))
1488            .mount(&server)
1489            .await;
1490        wiremock::Mock::given(wiremock::matchers::path("/-/x.tgz"))
1491            .respond_with(wiremock::ResponseTemplate::new(503))
1492            .mount(&server)
1493            .await;
1494        let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1495            .await
1496            .unwrap_err();
1497        assert!(err.to_string().contains("non-2xx status"));
1498    }
1499}