1use std::collections::{BTreeMap, BTreeSet, HashSet};
19use std::io::Read;
20
21use anyhow::{anyhow, Context, Result};
22use serde::Serialize;
23use serde_json::{Map, Value};
24use sha2::{Digest, Sha256};
25
26use super::{local_schema_map, SCHEMA_VERSION, UPSTREAM_TARBALL_SHA256};
27
28const NPM_LATEST_URL: &str = "https://registry.npmjs.org/@atlaskit/adf-schema/latest";
30
31pub const NPM_LATEST_URL_ENV: &str = "OMNI_DEV_ADF_SCHEMA_LATEST_URL";
37
38#[derive(Debug, Clone, Default, Serialize, PartialEq, Eq)]
42pub struct ParentDrift {
43 pub added_children: BTreeSet<String>,
45 pub removed_children: BTreeSet<String>,
47}
48
49#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
58pub struct DriftReport {
59 pub upstream_version: String,
61 pub upstream_tarball_sha256: String,
63 pub local_version: String,
65 pub local_tarball_sha256: String,
67 pub version_changed: bool,
70 pub per_parent: BTreeMap<String, ParentDrift>,
73 pub added_parents: BTreeSet<String>,
75 pub removed_parents: BTreeSet<String>,
77}
78
79impl DriftReport {
80 #[must_use]
82 pub fn has_content_drift(&self) -> bool {
83 !self.per_parent.is_empty()
84 || !self.added_parents.is_empty()
85 || !self.removed_parents.is_empty()
86 }
87
88 #[must_use]
92 pub fn has_any_drift(&self) -> bool {
93 self.version_changed || self.has_content_drift()
94 }
95
96 #[must_use]
98 pub fn render_markdown(&self) -> String {
99 let mut out = String::new();
100 out.push_str("# ADF schema drift report\n\n");
101
102 out.push_str("## Version\n\n");
103 out.push_str(&format!(
104 "- Upstream `@atlaskit/adf-schema`: `{}`\n",
105 self.upstream_version
106 ));
107 out.push_str(&format!(
108 "- Upstream tarball SHA-256: `{}`\n",
109 self.upstream_tarball_sha256
110 ));
111 out.push_str(&format!(
112 "- Local `SCHEMA_VERSION`: `{}`\n",
113 self.local_version
114 ));
115 out.push_str(&format!(
116 "- Local `UPSTREAM_TARBALL_SHA256`: `{}`\n",
117 self.local_tarball_sha256
118 ));
119 out.push_str(&format!(
120 "- Version changed: **{}**\n\n",
121 self.version_changed
122 ));
123
124 out.push_str("## Content-model drift\n\n");
125 if !self.has_content_drift() {
126 out.push_str("No content-model changes — version bump only.\n\n");
127 } else {
128 if !self.added_parents.is_empty() {
129 out.push_str("### New parents (upstream only)\n\n");
130 for p in &self.added_parents {
131 out.push_str(&format!("- `{p}`\n"));
132 }
133 out.push('\n');
134 }
135 if !self.removed_parents.is_empty() {
136 out.push_str("### Removed parents (local only)\n\n");
137 for p in &self.removed_parents {
138 out.push_str(&format!("- `{p}`\n"));
139 }
140 out.push('\n');
141 }
142 if !self.per_parent.is_empty() {
143 out.push_str("### Per-parent diffs\n\n");
144 for (parent, drift) in &self.per_parent {
145 out.push_str(&format!("#### `{parent}`\n\n"));
146 if !drift.added_children.is_empty() {
147 out.push_str("Added children (upstream only):\n");
148 for c in &drift.added_children {
149 out.push_str(&format!("- `{c}`\n"));
150 }
151 out.push('\n');
152 }
153 if !drift.removed_children.is_empty() {
154 out.push_str("Removed children (local only):\n");
155 for c in &drift.removed_children {
156 out.push_str(&format!("- `{c}`\n"));
157 }
158 out.push('\n');
159 }
160 }
161 }
162 }
163
164 out.push_str("---\n");
165 out.push_str(
166 "_Generated by the `adf-schema-drift` job. To refresh the snapshot, \
167 update `CONTENT_ENTRIES`, `SCHEMA_VERSION`, and `UPSTREAM_TARBALL_SHA256` in \
168 `src/atlassian/adf_schema/mod.rs`._\n",
169 );
170 out
171 }
172
173 #[must_use]
175 pub fn render_json(&self) -> Value {
176 serde_json::to_value(self).unwrap_or(Value::Null)
177 }
178}
179
180pub async fn fetch_latest_drift_report() -> Result<DriftReport> {
187 let url = std::env::var(NPM_LATEST_URL_ENV).unwrap_or_else(|_| NPM_LATEST_URL.to_string());
188 fetch_drift_report_from_url(&url).await
189}
190
191async fn fetch_drift_report_from_url(latest_url: &str) -> Result<DriftReport> {
195 let client = reqwest::Client::builder()
196 .user_agent(concat!(
197 "omni-dev-adf-schema-drift/",
198 env!("CARGO_PKG_VERSION")
199 ))
200 .build()
201 .context("building HTTP client")?;
202
203 let meta: Value = client
204 .get(latest_url)
205 .send()
206 .await
207 .context("fetching npm registry latest dist-tag")?
208 .error_for_status()
209 .context("npm registry returned a non-2xx status for latest dist-tag")?
210 .json()
211 .await
212 .context("parsing npm latest dist-tag JSON")?;
213
214 let upstream_version = meta
215 .get("version")
216 .and_then(Value::as_str)
217 .ok_or_else(|| anyhow!("npm latest dist-tag JSON has no `version` field"))?
218 .to_string();
219 let tarball_url = meta
220 .get("dist")
221 .and_then(|d| d.get("tarball"))
222 .and_then(Value::as_str)
223 .ok_or_else(|| anyhow!("npm latest dist-tag JSON has no `dist.tarball` field"))?
224 .to_string();
225
226 let tarball_bytes = client
227 .get(&tarball_url)
228 .send()
229 .await
230 .with_context(|| format!("fetching tarball {tarball_url}"))?
231 .error_for_status()
232 .with_context(|| format!("npm tarball {tarball_url} returned a non-2xx status"))?
233 .bytes()
234 .await
235 .context("reading tarball bytes")?;
236
237 let upstream_sha = hex_encode(&Sha256::digest(&tarball_bytes));
238 let full_json = extract_full_json_from_tarball(&tarball_bytes)
239 .context("extracting dist/json-schema/v1/full.json from tarball")?;
240
241 diff_against_upstream_json_schema(&full_json, &upstream_version, &upstream_sha)
242}
243
244pub fn diff_against_upstream_json_schema(
246 full: &Value,
247 upstream_version: &str,
248 upstream_sha256: &str,
249) -> Result<DriftReport> {
250 let upstream = parse_upstream_full_json(full)?;
251 let local = local_schema_map();
252
253 let local_version_npm = strip_transcription_date(SCHEMA_VERSION);
254 let version_changed = upstream_version != local_version_npm;
255
256 let upstream_parents: BTreeSet<&str> = upstream.keys().map(String::as_str).collect();
257 let local_parents: BTreeSet<&str> = local.keys().copied().collect();
258
259 let added_parents: BTreeSet<String> = upstream_parents
260 .difference(&local_parents)
261 .map(|s| (*s).to_string())
262 .collect();
263 let removed_parents: BTreeSet<String> = local_parents
264 .difference(&upstream_parents)
265 .map(|s| (*s).to_string())
266 .collect();
267
268 let mut per_parent: BTreeMap<String, ParentDrift> = BTreeMap::new();
269 for parent in upstream_parents.intersection(&local_parents).copied() {
270 let upstream_children: &BTreeSet<String> = upstream
271 .get(parent)
272 .ok_or_else(|| anyhow!("internal: parent `{parent}` missing from upstream map"))?;
273 let local_children: &BTreeSet<&'static str> = local
274 .get(parent)
275 .ok_or_else(|| anyhow!("internal: parent `{parent}` missing from local map"))?;
276 let added_children: BTreeSet<String> = upstream_children
277 .iter()
278 .filter(|c| !local_children.contains(c.as_str()))
279 .cloned()
280 .collect();
281 let removed_children: BTreeSet<String> = local_children
282 .iter()
283 .filter(|c| !upstream_children.contains(**c))
284 .map(|s| (*s).to_string())
285 .collect();
286 if !added_children.is_empty() || !removed_children.is_empty() {
287 per_parent.insert(
288 parent.to_string(),
289 ParentDrift {
290 added_children,
291 removed_children,
292 },
293 );
294 }
295 }
296
297 Ok(DriftReport {
298 upstream_version: upstream_version.to_string(),
299 upstream_tarball_sha256: upstream_sha256.to_string(),
300 local_version: SCHEMA_VERSION.to_string(),
301 local_tarball_sha256: UPSTREAM_TARBALL_SHA256.to_string(),
302 version_changed,
303 per_parent,
304 added_parents,
305 removed_parents,
306 })
307}
308
309pub fn parse_upstream_full_json(full: &Value) -> Result<BTreeMap<String, BTreeSet<String>>> {
332 let definitions = full
333 .get("definitions")
334 .and_then(Value::as_object)
335 .ok_or_else(|| anyhow!("upstream JSON has no `definitions` object"))?;
336
337 let mut def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
340 for (name, def) in definitions {
341 def_to_bare.insert(name.clone(), find_bare_type(def));
342 }
343
344 loop {
348 let mut changed = false;
349 for (name, def) in definitions {
350 if def_to_bare
351 .get(name)
352 .is_some_and(std::option::Option::is_some)
353 {
354 continue;
355 }
356 if let Some(inherited) = inherited_bare_type_via_allof(def, &def_to_bare) {
357 def_to_bare.insert(name.clone(), Some(inherited));
358 changed = true;
359 }
360 }
361 if !changed {
362 break;
363 }
364 }
365
366 let mut result: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
367 for (name, def) in definitions {
368 let Some(Some(bare)) = def_to_bare.get(name) else {
369 continue;
370 };
371 let children = definition_content_children(name, def, definitions, &def_to_bare);
372 if !children.is_empty() {
373 result.entry(bare.clone()).or_default().extend(children);
374 }
375 }
376
377 Ok(result)
378}
379
380fn inherited_bare_type_via_allof(
388 def: &Value,
389 def_to_bare: &BTreeMap<String, Option<String>>,
390) -> Option<String> {
391 let Value::Object(obj) = def else { return None };
392 let Some(Value::Array(arr)) = obj.get("allOf") else {
393 return None;
394 };
395 for item in arr {
396 let Some(s) = item.get("$ref").and_then(Value::as_str) else {
397 continue;
398 };
399 let Some(target) = s.strip_prefix("#/definitions/") else {
400 continue;
401 };
402 if let Some(Some(bare)) = def_to_bare.get(target) {
403 return Some(bare.clone());
404 }
405 }
406 None
407}
408
409fn find_bare_type(def: &Value) -> Option<String> {
414 fn walk(v: &Value) -> Option<String> {
415 let Value::Object(obj) = v else { return None };
416 if let Some(Value::Object(props)) = obj.get("properties") {
417 if let Some(Value::Object(t)) = props.get("type") {
418 if let Some(Value::String(s)) = t.get("const") {
419 return Some(s.clone());
420 }
421 if let Some(Value::Array(arr)) = t.get("enum") {
422 if let Some(Value::String(s)) = arr.first() {
423 return Some(s.clone());
424 }
425 }
426 }
427 }
428 for key in ["allOf", "anyOf", "oneOf"] {
429 if let Some(Value::Array(arr)) = obj.get(key) {
430 for x in arr {
431 if let Some(s) = walk(x) {
432 return Some(s);
433 }
434 }
435 }
436 }
437 None
438 }
439 walk(def)
440}
441
442fn definition_content_children(
445 def_name: &str,
446 def: &Value,
447 definitions: &Map<String, Value>,
448 def_to_bare: &BTreeMap<String, Option<String>>,
449) -> BTreeSet<String> {
450 let mut subtrees: Vec<&Value> = Vec::new();
451 find_content_subtrees(def, &mut subtrees);
452
453 let mut refs = Vec::new();
454 for subtree in subtrees {
455 collect_refs(subtree, &mut refs);
456 }
457
458 let mut out = BTreeSet::new();
459 for r in refs {
460 let mut visited = HashSet::new();
461 visited.insert(def_name.to_string());
462 resolve_ref_to_bare_types(&r, definitions, def_to_bare, &mut visited, &mut out);
463 }
464 out
465}
466
467fn find_content_subtrees<'a>(value: &'a Value, out: &mut Vec<&'a Value>) {
474 let Value::Object(obj) = value else { return };
475 if let Some(Value::Object(props)) = obj.get("properties") {
476 if let Some(content) = props.get("content") {
477 out.push(content);
478 }
479 }
480 for key in ["allOf", "oneOf", "anyOf"] {
481 if let Some(Value::Array(arr)) = obj.get(key) {
482 for item in arr {
483 find_content_subtrees(item, out);
484 }
485 }
486 }
487}
488
489fn resolve_ref_to_bare_types(
496 target_def_name: &str,
497 definitions: &Map<String, Value>,
498 def_to_bare: &BTreeMap<String, Option<String>>,
499 visited: &mut HashSet<String>,
500 out: &mut BTreeSet<String>,
501) {
502 if let Some(Some(bare)) = def_to_bare.get(target_def_name) {
503 out.insert(bare.clone());
504 return;
505 }
506 if !visited.insert(target_def_name.to_string()) {
507 return;
508 }
509 let Some(target_def) = definitions.get(target_def_name) else {
510 return;
511 };
512 let mut refs = Vec::new();
513 collect_refs(target_def, &mut refs);
514 for r in refs {
515 resolve_ref_to_bare_types(&r, definitions, def_to_bare, visited, out);
516 }
517}
518
519fn collect_refs(node: &Value, out: &mut Vec<String>) {
526 match node {
527 Value::Object(obj) => {
528 if let Some(Value::String(r)) = obj.get("$ref") {
529 if let Some(name) = r.strip_prefix("#/definitions/") {
530 out.push(name.to_string());
531 }
532 }
533 for (key, v) in obj {
534 if key == "marks" || key == "attrs" {
535 continue;
536 }
537 collect_refs(v, out);
538 }
539 }
540 Value::Array(arr) => {
541 for v in arr {
542 collect_refs(v, out);
543 }
544 }
545 _ => {}
546 }
547}
548
549fn extract_full_json_from_tarball(bytes: &[u8]) -> Result<Value> {
554 let gz = flate2::read::GzDecoder::new(bytes);
555 let mut archive = tar::Archive::new(gz);
556 for entry in archive.entries().context("opening tarball entries")? {
557 let mut entry = entry.context("reading tarball entry header")?;
558 let path_buf = entry
559 .path()
560 .context("decoding tarball entry path")?
561 .into_owned();
562 if path_buf == std::path::Path::new("package/dist/json-schema/v1/full.json") {
563 let mut buf = String::new();
564 entry
565 .read_to_string(&mut buf)
566 .context("reading full.json")?;
567 return serde_json::from_str(&buf).context("parsing full.json");
568 }
569 }
570 Err(anyhow!(
571 "tarball does not contain package/dist/json-schema/v1/full.json"
572 ))
573}
574
575fn strip_transcription_date(s: &str) -> &str {
582 if s.len() < 11 {
583 return s;
584 }
585 let (head, tail) = s.split_at(s.len() - 11);
586 let Some(rest) = tail.strip_prefix('-') else {
587 return s;
588 };
589 let parts: Vec<&str> = rest.split('-').collect();
590 let looks_like_date = parts.len() == 3
591 && parts[0].len() == 4
592 && parts[1].len() == 2
593 && parts[2].len() == 2
594 && parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit()));
595 if looks_like_date {
596 head
597 } else {
598 s
599 }
600}
601
602#[must_use]
608pub fn hex_encode(bytes: &[u8]) -> String {
609 use std::fmt::Write;
610 bytes
611 .iter()
612 .fold(String::with_capacity(bytes.len() * 2), |mut s, b| {
613 let _ = write!(s, "{b:02x}");
614 s
615 })
616}
617
618#[cfg(test)]
619#[allow(clippy::unwrap_used, clippy::expect_used)]
620mod tests {
621 use super::*;
622 use serde_json::json;
623
624 fn synthesise_full_json_from_local() -> Value {
625 let local = local_schema_map();
626 let parents: BTreeSet<&str> = local.keys().copied().collect();
627 let mut all_types: BTreeSet<&str> = parents.clone();
628 for children in local.values() {
629 for c in children {
630 all_types.insert(*c);
631 }
632 }
633 let leaves: BTreeSet<&str> = all_types.difference(&parents).copied().collect();
634
635 let mut definitions = serde_json::Map::new();
636 for (parent, children) in &local {
637 let any_of: Vec<Value> = children
638 .iter()
639 .map(|c| json!({"$ref": format!("#/definitions/{c}_node")}))
640 .collect();
641 definitions.insert(
642 format!("{parent}_node"),
643 json!({
644 "properties": {
645 "type": {"const": parent},
646 "content": {
647 "type": "array",
648 "items": {"anyOf": any_of}
649 }
650 }
651 }),
652 );
653 }
654 for leaf in &leaves {
655 definitions.insert(
656 format!("{leaf}_node"),
657 json!({
658 "properties": {
659 "type": {"const": leaf}
660 }
661 }),
662 );
663 }
664 json!({"definitions": Value::Object(definitions)})
665 }
666
667 #[test]
668 fn parses_anyof_refs_into_allowed_children_set() {
669 let full = json!({
670 "definitions": {
671 "blockquote_node": {
672 "properties": {
673 "type": {"const": "blockquote"},
674 "content": {
675 "type": "array",
676 "items": {
677 "anyOf": [
678 {"$ref": "#/definitions/paragraph_node"},
679 {"$ref": "#/definitions/codeBlock_node"}
680 ]
681 }
682 }
683 }
684 },
685 "paragraph_node": {"properties": {"type": {"const": "paragraph"}}},
686 "codeBlock_node": {"properties": {"type": {"const": "codeBlock"}}}
687 }
688 });
689 let parsed = parse_upstream_full_json(&full).unwrap();
690 let bq = parsed.get("blockquote").expect("blockquote parsed");
691 let expected: BTreeSet<String> = ["codeBlock", "paragraph"]
692 .into_iter()
693 .map(String::from)
694 .collect();
695 assert_eq!(*bq, expected);
696 }
697
698 #[test]
699 fn alias_definition_is_flattened_transitively() {
700 let full = json!({
702 "definitions": {
703 "tableCell_node": {
704 "properties": {
705 "type": {"const": "tableCell"},
706 "content": {
707 "type": "array",
708 "items": {"$ref": "#/definitions/table_cell_content"}
709 }
710 }
711 },
712 "table_cell_content": {
713 "anyOf": [
714 {"$ref": "#/definitions/paragraph_node"},
715 {"$ref": "#/definitions/heading_node"}
716 ]
717 },
718 "paragraph_node": {"properties": {"type": {"const": "paragraph"}}},
719 "heading_node": {"properties": {"type": {"const": "heading"}}}
720 }
721 });
722 let parsed = parse_upstream_full_json(&full).unwrap();
723 let cell = parsed.get("tableCell").expect("tableCell parsed");
724 let expected: BTreeSet<String> = ["heading", "paragraph"]
725 .into_iter()
726 .map(String::from)
727 .collect();
728 assert_eq!(*cell, expected);
729 }
730
731 #[test]
732 fn report_is_empty_when_input_matches_local_entries() {
733 let full = synthesise_full_json_from_local();
738 let report = diff_against_upstream_json_schema(
739 &full,
740 strip_transcription_date(SCHEMA_VERSION),
741 "fixture",
742 )
743 .unwrap();
744 assert!(
745 !report.has_content_drift(),
746 "synthesised-from-local fixture should produce no content drift, got: {report:#?}"
747 );
748 assert!(report.added_parents.is_empty());
749 assert!(report.removed_parents.is_empty());
750 assert!(report.per_parent.is_empty());
751 assert!(!report.version_changed);
752 }
753
754 #[test]
755 fn report_flags_added_and_removed_children() {
756 let mut full = synthesise_full_json_from_local();
757 let bq_items = full
759 .pointer_mut("/definitions/blockquote_node/properties/content/items/anyOf")
760 .unwrap()
761 .as_array_mut()
762 .unwrap();
763 bq_items.push(json!({"$ref": "#/definitions/madeUpBlock_node"}));
764 full.pointer_mut("/definitions")
765 .unwrap()
766 .as_object_mut()
767 .unwrap()
768 .insert(
769 "madeUpBlock_node".to_string(),
770 json!({"properties": {"type": {"const": "madeUpBlock"}}}),
771 );
772 let panel_items = full
774 .pointer_mut("/definitions/panel_node/properties/content/items/anyOf")
775 .unwrap()
776 .as_array_mut()
777 .unwrap();
778 panel_items.retain(|v| {
779 v.get("$ref").and_then(Value::as_str) != Some("#/definitions/paragraph_node")
780 });
781
782 let report =
783 diff_against_upstream_json_schema(&full, "fixture-version", "fixture").unwrap();
784 assert!(report.has_content_drift());
785
786 let bq = report
787 .per_parent
788 .get("blockquote")
789 .expect("blockquote drift present");
790 assert_eq!(
791 bq.added_children,
792 std::iter::once("madeUpBlock").map(String::from).collect()
793 );
794 assert!(bq.removed_children.is_empty());
795
796 let panel = report.per_parent.get("panel").expect("panel drift present");
797 assert!(panel.added_children.is_empty());
798 assert_eq!(
799 panel.removed_children,
800 std::iter::once("paragraph").map(String::from).collect()
801 );
802 }
803
804 #[test]
805 fn report_flags_added_and_removed_parents() {
806 let mut full = synthesise_full_json_from_local();
807 full.pointer_mut("/definitions")
809 .unwrap()
810 .as_object_mut()
811 .unwrap()
812 .remove("expand_node");
813 full.pointer_mut("/definitions")
815 .unwrap()
816 .as_object_mut()
817 .unwrap()
818 .insert(
819 "futureBlock_node".to_string(),
820 json!({
821 "properties": {
822 "type": {"const": "futureBlock"},
823 "content": {
824 "type": "array",
825 "items": {
826 "anyOf": [
827 {"$ref": "#/definitions/paragraph_node"}
828 ]
829 }
830 }
831 }
832 }),
833 );
834
835 let report = diff_against_upstream_json_schema(&full, "fixture", "fixture").unwrap();
836 assert!(report.added_parents.contains("futureBlock"));
837 assert!(report.removed_parents.contains("expand"));
838 }
839
840 #[test]
841 fn version_changed_distinguishes_npm_version_from_transcription_date() {
842 let full = synthesise_full_json_from_local();
843 let r = diff_against_upstream_json_schema(
845 &full,
846 strip_transcription_date(SCHEMA_VERSION),
847 "fixture",
848 )
849 .unwrap();
850 assert!(!r.version_changed);
851
852 let r = diff_against_upstream_json_schema(&full, "999.0.0", "fixture").unwrap();
854 assert!(r.version_changed);
855 }
856
857 #[test]
858 fn strip_transcription_date_handles_yyyy_mm_dd_suffix() {
859 assert_eq!(strip_transcription_date("52.9.5-2026-05-10"), "52.9.5");
860 assert_eq!(strip_transcription_date("52.9.5"), "52.9.5");
861 assert_eq!(strip_transcription_date("52.9.5-rc1"), "52.9.5-rc1");
862 assert_eq!(
863 strip_transcription_date("52.9.5-rc1-2026-05-10"),
864 "52.9.5-rc1"
865 );
866 assert_eq!(strip_transcription_date(""), "");
867 }
868
869 #[test]
870 fn strip_transcription_date_returns_input_when_suffix_lacks_leading_dash() {
871 assert_eq!(strip_transcription_date("abcdefghijkl"), "abcdefghijkl");
873 }
874
875 #[test]
876 fn strip_transcription_date_returns_input_when_suffix_is_not_a_date() {
877 assert_eq!(strip_transcription_date("abc-1234-XX-YY"), "abc-1234-XX-YY");
879 }
880
881 #[test]
882 fn render_markdown_is_terse_when_no_drift() {
883 let full = synthesise_full_json_from_local();
884 let report = diff_against_upstream_json_schema(
885 &full,
886 strip_transcription_date(SCHEMA_VERSION),
887 "fixture",
888 )
889 .unwrap();
890 let md = report.render_markdown();
891 assert!(md.contains("No content-model changes"));
892 assert!(!md.contains("Per-parent diffs"));
893 }
894
895 #[test]
896 fn render_markdown_includes_per_parent_diffs() {
897 let mut full = synthesise_full_json_from_local();
898 let bq_items = full
899 .pointer_mut("/definitions/blockquote_node/properties/content/items/anyOf")
900 .unwrap()
901 .as_array_mut()
902 .unwrap();
903 bq_items.push(json!({"$ref": "#/definitions/text_node"}));
904 let report = diff_against_upstream_json_schema(&full, "fixture", "fixture").unwrap();
905 let md = report.render_markdown();
906 assert!(md.contains("Per-parent diffs"));
907 assert!(md.contains("`blockquote`"));
908 assert!(md.contains("text"));
909 }
910
911 #[test]
912 fn render_json_is_serializable() {
913 let full = synthesise_full_json_from_local();
914 let report = diff_against_upstream_json_schema(&full, "fixture", "fixture").unwrap();
915 let v = report.render_json();
916 assert!(v.is_object());
917 assert!(v.get("upstream_version").is_some());
918 assert!(v.get("per_parent").is_some());
919 }
920
921 #[test]
924 fn find_bare_type_recognises_enum_array() {
925 let def = json!({"properties": {"type": {"enum": ["paragraph"]}}});
928 assert_eq!(find_bare_type(&def).as_deref(), Some("paragraph"));
929 }
930
931 #[test]
932 fn find_bare_type_walks_into_oneof_for_nested_const() {
933 let def = json!({
934 "oneOf": [
935 {"properties": {"type": {"const": "nestedExpand"}}},
936 {"properties": {"type": {"const": "ignoredVariant"}}}
937 ]
938 });
939 assert_eq!(find_bare_type(&def).as_deref(), Some("nestedExpand"));
941 }
942
943 #[test]
944 fn find_bare_type_returns_none_for_nodes_with_no_type_const() {
945 let def = json!({"anyOf": [{"$ref": "#/definitions/x"}]});
946 assert_eq!(find_bare_type(&def), None);
947 }
948
949 #[test]
950 fn find_bare_type_returns_none_when_enum_is_empty_or_non_string() {
951 let empty = json!({"properties": {"type": {"enum": []}}});
953 assert_eq!(find_bare_type(&empty), None);
954 let non_string = json!({"properties": {"type": {"enum": [42]}}});
956 assert_eq!(find_bare_type(&non_string), None);
957 }
958
959 #[test]
960 fn find_bare_type_returns_none_on_non_object() {
961 assert_eq!(find_bare_type(&json!(null)), None);
962 assert_eq!(find_bare_type(&json!([])), None);
963 assert_eq!(find_bare_type(&json!("string")), None);
964 }
965
966 #[test]
969 fn inheritance_via_allof_finds_known_base() {
970 let def = json!({
971 "allOf": [
972 {"$ref": "#/definitions/paragraph_node"},
973 {"properties": {"marks": {}}}
974 ]
975 });
976 let mut def_to_bare = BTreeMap::new();
977 def_to_bare.insert("paragraph_node".to_string(), Some("paragraph".to_string()));
978 assert_eq!(
979 inherited_bare_type_via_allof(&def, &def_to_bare).as_deref(),
980 Some("paragraph")
981 );
982 }
983
984 #[test]
985 fn inheritance_via_allof_returns_none_when_no_allof() {
986 let def = json!({"anyOf": [{"$ref": "#/definitions/x_node"}]});
987 let mut def_to_bare = BTreeMap::new();
988 def_to_bare.insert("x_node".to_string(), Some("x".to_string()));
989 assert_eq!(inherited_bare_type_via_allof(&def, &def_to_bare), None);
991 }
992
993 #[test]
994 fn inheritance_via_allof_returns_none_when_target_unknown() {
995 let def = json!({"allOf": [{"$ref": "#/definitions/unknown_node"}]});
996 let def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
997 assert_eq!(inherited_bare_type_via_allof(&def, &def_to_bare), None);
998 }
999
1000 #[test]
1001 fn inheritance_via_allof_skips_items_without_ref_or_with_external_ref() {
1002 let def = json!({
1005 "allOf": [
1006 {"properties": {"marks": {}}}, {"$ref": "https://example.com/external.json"}, {"$ref": "#/definitions/paragraph_node"} ]
1010 });
1011 let mut def_to_bare = BTreeMap::new();
1012 def_to_bare.insert("paragraph_node".to_string(), Some("paragraph".to_string()));
1013 assert_eq!(
1014 inherited_bare_type_via_allof(&def, &def_to_bare).as_deref(),
1015 Some("paragraph")
1016 );
1017 }
1018
1019 #[test]
1020 fn inheritance_via_allof_returns_none_when_only_non_ref_items() {
1021 let def = json!({
1023 "allOf": [
1024 {"properties": {"marks": {}}},
1025 {"$ref": "https://example.com/external.json"}
1026 ]
1027 });
1028 let def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
1029 assert_eq!(inherited_bare_type_via_allof(&def, &def_to_bare), None);
1030 }
1031
1032 #[test]
1033 fn inheritance_via_allof_returns_none_for_non_object_input() {
1034 let def_to_bare: BTreeMap<String, Option<String>> = BTreeMap::new();
1036 assert_eq!(
1037 inherited_bare_type_via_allof(&json!(null), &def_to_bare),
1038 None
1039 );
1040 assert_eq!(
1041 inherited_bare_type_via_allof(&json!([]), &def_to_bare),
1042 None
1043 );
1044 assert_eq!(
1045 inherited_bare_type_via_allof(&json!("string"), &def_to_bare),
1046 None
1047 );
1048 }
1049
1050 #[test]
1051 fn inheritance_via_allof_handles_marks_overlay_pattern() {
1052 let full = json!({
1054 "definitions": {
1055 "paragraph_node": {
1056 "properties": {
1057 "type": {"const": "paragraph"},
1058 "content": {
1059 "type": "array",
1060 "items": {"$ref": "#/definitions/formatted_text_inline_node"}
1061 }
1062 }
1063 },
1064 "text_node": {"properties": {"type": {"const": "text"}}},
1065 "formatted_text_inline_node": {
1066 "allOf": [
1067 {"$ref": "#/definitions/text_node"},
1068 {
1069 "properties": {
1070 "marks": {
1071 "type": "array",
1072 "items": {
1073 "anyOf": [
1074 {"$ref": "#/definitions/link_mark"}
1075 ]
1076 }
1077 }
1078 }
1079 }
1080 ]
1081 },
1082 "link_mark": {"properties": {"type": {"const": "link"}}}
1083 }
1084 });
1085 let parsed = parse_upstream_full_json(&full).unwrap();
1086 let p = parsed.get("paragraph").expect("paragraph present");
1089 let expected: BTreeSet<String> = std::iter::once("text").map(String::from).collect();
1090 assert_eq!(*p, expected);
1091 }
1092
1093 #[test]
1096 fn parse_returns_error_when_definitions_missing() {
1097 let full = json!({"foo": "bar"});
1098 let err = parse_upstream_full_json(&full).unwrap_err();
1099 assert!(err.to_string().contains("definitions"));
1100 }
1101
1102 #[test]
1103 fn diff_propagates_parse_error_when_definitions_missing() {
1104 let err = diff_against_upstream_json_schema(&json!({}), "1.0.0", "sha").unwrap_err();
1105 assert!(err.to_string().contains("definitions"));
1106 }
1107
1108 #[test]
1111 fn collect_refs_skips_marks_and_attrs_subtrees() {
1112 let v = json!({
1113 "properties": {
1114 "content": {"$ref": "#/definitions/keep_me"},
1115 "marks": {"$ref": "#/definitions/skip_me_mark"},
1116 "attrs": {"$ref": "#/definitions/skip_me_attrs"}
1117 }
1118 });
1119 let mut refs = Vec::new();
1120 collect_refs(&v, &mut refs);
1121 assert!(refs.contains(&"keep_me".to_string()));
1122 assert!(!refs.contains(&"skip_me_mark".to_string()));
1123 assert!(!refs.contains(&"skip_me_attrs".to_string()));
1124 }
1125
1126 #[test]
1127 fn collect_refs_handles_arrays_of_refs() {
1128 let v = json!([
1129 {"$ref": "#/definitions/a"},
1130 {"$ref": "#/definitions/b"},
1131 {"$ref": "https://example.com/schema"}, ]);
1133 let mut refs = Vec::new();
1134 collect_refs(&v, &mut refs);
1135 assert_eq!(refs, vec!["a".to_string(), "b".to_string()]);
1136 }
1137
1138 #[test]
1139 fn find_content_subtrees_returns_nothing_for_non_object_input() {
1140 let null = json!(null);
1142 let array = json!([]);
1143 let string = json!("string");
1144 let mut subtrees = Vec::new();
1145 find_content_subtrees(&null, &mut subtrees);
1146 find_content_subtrees(&array, &mut subtrees);
1147 find_content_subtrees(&string, &mut subtrees);
1148 assert!(subtrees.is_empty());
1149 }
1150
1151 #[test]
1152 fn find_content_subtrees_picks_up_content_nested_in_allof() {
1153 let def = json!({
1156 "allOf": [
1157 {"$ref": "#/definitions/mediaSingle_node"},
1158 {
1159 "properties": {
1160 "content": {
1161 "type": "array",
1162 "items": [
1163 {"$ref": "#/definitions/media_node"},
1164 {"$ref": "#/definitions/caption_node"}
1165 ]
1166 }
1167 }
1168 }
1169 ]
1170 });
1171 let mut subtrees = Vec::new();
1172 find_content_subtrees(&def, &mut subtrees);
1173 assert_eq!(subtrees.len(), 1);
1174 }
1175
1176 #[test]
1179 fn resolve_handles_alias_cycles_without_infinite_loop() {
1180 let mut definitions = serde_json::Map::new();
1181 definitions.insert(
1182 "alias_a".to_string(),
1183 json!({"anyOf": [{"$ref": "#/definitions/alias_b"}]}),
1184 );
1185 definitions.insert(
1186 "alias_b".to_string(),
1187 json!({"anyOf": [{"$ref": "#/definitions/alias_a"}]}),
1188 );
1189 let mut def_to_bare = BTreeMap::new();
1190 def_to_bare.insert("alias_a".to_string(), None);
1191 def_to_bare.insert("alias_b".to_string(), None);
1192
1193 let mut visited = HashSet::new();
1194 let mut out = BTreeSet::new();
1195 resolve_ref_to_bare_types(
1197 "alias_a",
1198 &definitions,
1199 &def_to_bare,
1200 &mut visited,
1201 &mut out,
1202 );
1203 assert!(out.is_empty());
1204 }
1205
1206 #[test]
1207 fn resolve_returns_silently_when_target_not_in_definitions() {
1208 let definitions = serde_json::Map::new();
1209 let mut def_to_bare = BTreeMap::new();
1210 def_to_bare.insert("ghost".to_string(), None);
1211 let mut visited = HashSet::new();
1212 let mut out = BTreeSet::new();
1213 resolve_ref_to_bare_types("ghost", &definitions, &def_to_bare, &mut visited, &mut out);
1214 assert!(out.is_empty());
1215 }
1216
1217 fn report_with(
1220 added_parents: BTreeSet<String>,
1221 removed_parents: BTreeSet<String>,
1222 per_parent: BTreeMap<String, ParentDrift>,
1223 version_changed: bool,
1224 ) -> DriftReport {
1225 DriftReport {
1226 upstream_version: "9.9.9".to_string(),
1227 upstream_tarball_sha256: "up-sha".to_string(),
1228 local_version: "1.0.0-2026-01-01".to_string(),
1229 local_tarball_sha256: "local-sha".to_string(),
1230 version_changed,
1231 per_parent,
1232 added_parents,
1233 removed_parents,
1234 }
1235 }
1236
1237 #[test]
1238 fn render_markdown_renders_added_parents_section() {
1239 let added: BTreeSet<String> = std::iter::once("futureNode").map(String::from).collect();
1240 let report = report_with(added, BTreeSet::new(), BTreeMap::new(), true);
1241 let md = report.render_markdown();
1242 assert!(md.contains("New parents (upstream only)"));
1243 assert!(md.contains("`futureNode`"));
1244 assert!(!md.contains("Removed parents"));
1245 }
1246
1247 #[test]
1248 fn render_markdown_renders_removed_parents_section() {
1249 let removed: BTreeSet<String> = std::iter::once("oldNode").map(String::from).collect();
1250 let report = report_with(BTreeSet::new(), removed, BTreeMap::new(), false);
1251 let md = report.render_markdown();
1252 assert!(md.contains("Removed parents (local only)"));
1253 assert!(md.contains("`oldNode`"));
1254 assert!(!md.contains("New parents"));
1255 }
1256
1257 #[test]
1258 fn render_markdown_renders_only_added_children_when_no_removed() {
1259 let mut per = BTreeMap::new();
1260 per.insert(
1261 "blockquote".to_string(),
1262 ParentDrift {
1263 added_children: std::iter::once("newChild").map(String::from).collect(),
1264 removed_children: BTreeSet::new(),
1265 },
1266 );
1267 let report = report_with(BTreeSet::new(), BTreeSet::new(), per, false);
1268 let md = report.render_markdown();
1269 assert!(md.contains("Added children"));
1270 assert!(!md.contains("Removed children"));
1271 assert!(md.contains("`newChild`"));
1272 }
1273
1274 #[test]
1275 fn render_markdown_renders_only_removed_children_when_no_added() {
1276 let mut per = BTreeMap::new();
1277 per.insert(
1278 "panel".to_string(),
1279 ParentDrift {
1280 added_children: BTreeSet::new(),
1281 removed_children: std::iter::once("goneChild").map(String::from).collect(),
1282 },
1283 );
1284 let report = report_with(BTreeSet::new(), BTreeSet::new(), per, false);
1285 let md = report.render_markdown();
1286 assert!(!md.contains("Added children"));
1287 assert!(md.contains("Removed children"));
1288 assert!(md.contains("`goneChild`"));
1289 }
1290
1291 #[test]
1294 fn has_any_drift_true_when_only_version_changed() {
1295 let report = report_with(BTreeSet::new(), BTreeSet::new(), BTreeMap::new(), true);
1296 assert!(!report.has_content_drift());
1297 assert!(report.has_any_drift());
1298 }
1299
1300 #[test]
1301 fn has_any_drift_true_when_only_added_parents() {
1302 let added: BTreeSet<String> = std::iter::once("x").map(String::from).collect();
1303 let report = report_with(added, BTreeSet::new(), BTreeMap::new(), false);
1304 assert!(report.has_content_drift());
1305 assert!(report.has_any_drift());
1306 }
1307
1308 #[test]
1309 fn has_any_drift_true_when_only_removed_parents() {
1310 let removed: BTreeSet<String> = std::iter::once("x").map(String::from).collect();
1311 let report = report_with(BTreeSet::new(), removed, BTreeMap::new(), false);
1312 assert!(report.has_content_drift());
1313 assert!(report.has_any_drift());
1314 }
1315
1316 fn build_synthetic_tarball(entries: &[(&str, &[u8])]) -> Vec<u8> {
1319 let mut gz = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
1320 {
1321 let mut builder = tar::Builder::new(&mut gz);
1322 for (path, body) in entries {
1323 let mut header = tar::Header::new_gnu();
1324 header.set_path(path).unwrap();
1325 header.set_size(body.len() as u64);
1326 header.set_mode(0o644);
1327 header.set_cksum();
1328 builder.append(&header, *body).unwrap();
1329 }
1330 builder.finish().unwrap();
1331 }
1332 gz.finish().unwrap()
1333 }
1334
1335 #[test]
1336 fn extract_full_json_succeeds_when_path_present() {
1337 let body = serde_json::to_vec(&json!({"definitions": {}})).unwrap();
1338 let bytes =
1339 build_synthetic_tarball(&[("package/dist/json-schema/v1/full.json", body.as_slice())]);
1340 let parsed = extract_full_json_from_tarball(&bytes).unwrap();
1341 assert!(parsed.get("definitions").is_some());
1342 }
1343
1344 #[test]
1345 fn extract_full_json_errors_when_path_missing() {
1346 let bytes = build_synthetic_tarball(&[("package/README.md", b"hello")]);
1347 let err = extract_full_json_from_tarball(&bytes).unwrap_err();
1348 assert!(err.to_string().contains("does not contain"));
1349 }
1350
1351 #[test]
1352 fn extract_full_json_errors_on_invalid_gzip() {
1353 let bytes = b"not a gzip stream";
1354 let err = extract_full_json_from_tarball(bytes).unwrap_err();
1355 let _ = err;
1358 }
1359
1360 #[test]
1361 fn extract_full_json_errors_when_payload_is_not_json() {
1362 let bytes =
1363 build_synthetic_tarball(&[("package/dist/json-schema/v1/full.json", b"not json{")]);
1364 let err = extract_full_json_from_tarball(&bytes).unwrap_err();
1365 assert!(err.to_string().contains("parsing full.json"));
1366 }
1367
1368 #[tokio::test]
1371 async fn fetch_drift_report_from_url_handles_clean_upstream() {
1372 let server = wiremock::MockServer::start().await;
1373 let full = synthesise_full_json_from_local();
1374 let tarball = build_synthetic_tarball(&[(
1375 "package/dist/json-schema/v1/full.json",
1376 serde_json::to_vec(&full).unwrap().as_slice(),
1377 )]);
1378 let tarball_url = format!("{}/-/adf-schema-fixture.tgz", server.uri());
1379
1380 wiremock::Mock::given(wiremock::matchers::path("/latest"))
1381 .respond_with(wiremock::ResponseTemplate::new(200).set_body_json(json!({
1382 "version": strip_transcription_date(SCHEMA_VERSION),
1383 "dist": {"tarball": tarball_url}
1384 })))
1385 .mount(&server)
1386 .await;
1387 wiremock::Mock::given(wiremock::matchers::path("/-/adf-schema-fixture.tgz"))
1388 .respond_with(wiremock::ResponseTemplate::new(200).set_body_bytes(tarball))
1389 .mount(&server)
1390 .await;
1391
1392 let report = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1393 .await
1394 .unwrap();
1395 assert!(!report.version_changed);
1396 assert!(!report.has_content_drift());
1397 }
1398
1399 #[tokio::test]
1400 async fn fetch_drift_report_from_url_errors_when_metadata_lacks_version() {
1401 let server = wiremock::MockServer::start().await;
1402 wiremock::Mock::given(wiremock::matchers::path("/latest"))
1403 .respond_with(
1404 wiremock::ResponseTemplate::new(200)
1405 .set_body_json(json!({"dist": {"tarball": "x"}})),
1406 )
1407 .mount(&server)
1408 .await;
1409 let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1410 .await
1411 .unwrap_err();
1412 assert!(err.to_string().contains("`version` field"));
1413 }
1414
1415 #[tokio::test]
1416 async fn fetch_drift_report_from_url_errors_when_metadata_lacks_tarball() {
1417 let server = wiremock::MockServer::start().await;
1418 wiremock::Mock::given(wiremock::matchers::path("/latest"))
1419 .respond_with(
1420 wiremock::ResponseTemplate::new(200).set_body_json(json!({"version": "1.0.0"})),
1421 )
1422 .mount(&server)
1423 .await;
1424 let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1425 .await
1426 .unwrap_err();
1427 assert!(err.to_string().contains("`dist.tarball` field"));
1428 }
1429
1430 #[tokio::test]
1431 async fn fetch_drift_report_from_url_errors_when_metadata_is_not_json() {
1432 let server = wiremock::MockServer::start().await;
1433 wiremock::Mock::given(wiremock::matchers::path("/latest"))
1434 .respond_with(
1435 wiremock::ResponseTemplate::new(200)
1436 .insert_header("content-type", "application/json")
1437 .set_body_string("not json{"),
1438 )
1439 .mount(&server)
1440 .await;
1441 let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1442 .await
1443 .unwrap_err();
1444 assert!(err.to_string().contains("parsing npm latest dist-tag JSON"));
1445 }
1446
1447 #[tokio::test]
1448 async fn fetch_drift_report_from_url_errors_on_connection_refused() {
1449 let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1454 let port = listener.local_addr().unwrap().port();
1455 drop(listener);
1456 let url = format!("http://127.0.0.1:{port}/latest");
1457 let err = fetch_drift_report_from_url(&url).await.unwrap_err();
1458 assert!(
1459 err.to_string().contains("fetching npm registry"),
1460 "unexpected error: {err}"
1461 );
1462 }
1463
1464 #[tokio::test]
1465 async fn fetch_drift_report_from_url_errors_on_npm_5xx() {
1466 let server = wiremock::MockServer::start().await;
1467 wiremock::Mock::given(wiremock::matchers::path("/latest"))
1468 .respond_with(wiremock::ResponseTemplate::new(503))
1469 .mount(&server)
1470 .await;
1471 let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1472 .await
1473 .unwrap_err();
1474 assert!(err
1475 .to_string()
1476 .contains("non-2xx status for latest dist-tag"));
1477 }
1478
1479 #[tokio::test]
1480 async fn fetch_drift_report_from_url_errors_on_tarball_5xx() {
1481 let server = wiremock::MockServer::start().await;
1482 let tarball_url = format!("{}/-/x.tgz", server.uri());
1483 wiremock::Mock::given(wiremock::matchers::path("/latest"))
1484 .respond_with(wiremock::ResponseTemplate::new(200).set_body_json(json!({
1485 "version": "1.0.0",
1486 "dist": {"tarball": tarball_url}
1487 })))
1488 .mount(&server)
1489 .await;
1490 wiremock::Mock::given(wiremock::matchers::path("/-/x.tgz"))
1491 .respond_with(wiremock::ResponseTemplate::new(503))
1492 .mount(&server)
1493 .await;
1494 let err = fetch_drift_report_from_url(&format!("{}/latest", server.uri()))
1495 .await
1496 .unwrap_err();
1497 assert!(err.to_string().contains("non-2xx status"));
1498 }
1499}