Skip to main content

cyrs_schema/
diff.rs

1//! Schema diff (spec 0002 §9, §12).
2//!
3//! A pure function producing a stable [`SchemaDiff`] between two
4//! [`InMemorySchema`] values. The output is designed to be serialised
5//! as JSON for CI "schema-compat" gates: consumer projects run
6//! `cypher schema diff old.toml new.toml` and compare the `breaking`
7//! list to decide whether the new schema is a drop-in replacement.
8//!
9//! # What counts as "breaking"
10//!
11//! At v0 the following changes are classed as breaking:
12//!
13//! - A label is removed (existing queries that reference the label
14//!   break).
15//! - A property on an existing label loses `required = true`, changes
16//!   type, or is removed entirely.
17//! - A relationship type is removed.
18//! - A rel type's `start_labels` or `end_labels` shrink (existing
19//!   patterns may now be rejected by the semantic pass).
20//! - A rel type changes or loses a property (same rules as labels).
21//! - A parameter is removed, or changes type, or loses its default
22//!   value.
23//!
24//! Non-breaking changes land in `adds`:
25//!
26//! - New labels, rel types, parameters.
27//! - New **optional** properties on an existing declaration.
28//! - A rel type's endpoint lists expanding (more labels allowed).
29//! - A parameter gaining a default.
30//!
31//! The diff is **structural**: it does not know about the semantic
32//! content of queries that depend on the schema. A downstream CI gate
33//! that wants stronger compatibility should check breakages against
34//! its own corpus.
35
36use std::collections::{BTreeMap, BTreeSet};
37
38use smol_str::SmolStr;
39
40#[cfg(feature = "serde")]
41use serde::{Deserialize, Serialize};
42
43use crate::{InMemorySchema, ParamDecl, PropertyDecl, PropertyType, in_memory::RelDecl};
44
45// ============================================================
46// Public shape
47// ============================================================
48
49/// Structured diff between two schemas.
50///
51/// Iteration order of each field is deterministic (`BTreeMap`-backed
52/// internally), so the serialised JSON is suitable for snapshot tests
53/// and CI attestation.
54#[derive(Debug, Clone, Default, PartialEq, Eq)]
55#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
56pub struct SchemaDiff {
57    /// Additive-only changes: new labels, rels, parameters, and
58    /// optional properties.
59    pub adds: Vec<DiffEntry>,
60    /// Removals of previously-declared items.
61    pub removes: Vec<DiffEntry>,
62    /// Changes that are structurally backwards-incompatible.
63    pub breaking: Vec<DiffEntry>,
64}
65
66impl SchemaDiff {
67    /// `true` iff every bucket is empty — the two schemas are
68    /// semantically equal.
69    #[must_use]
70    pub fn is_empty(&self) -> bool {
71        self.adds.is_empty() && self.removes.is_empty() && self.breaking.is_empty()
72    }
73
74    /// `true` iff there is at least one breaking change.
75    #[must_use]
76    pub fn has_breaking(&self) -> bool {
77        !self.breaking.is_empty()
78    }
79}
80
81/// One discrete change recorded in a [`SchemaDiff`].
82#[derive(Debug, Clone, PartialEq, Eq)]
83#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
84pub struct DiffEntry {
85    /// Kind of change: `"add"`, `"remove"`, `"breaking"`. Matches the
86    /// bucket the entry appears in; duplicated on the entry so a flat
87    /// serialised stream is self-describing.
88    pub kind: String,
89    /// Category of item affected: `"label"`, `"rel_type"`, `"parameter"`,
90    /// `"label_property"`, `"rel_type_property"`,
91    /// `"rel_type_endpoints"`, `"parameter_type"`,
92    /// `"parameter_default"`.
93    pub category: String,
94    /// Stable path identifier (`Person`, `Person.age`, `ACTED_IN.role`,
95    /// `$since_year`). Sort key for deterministic output.
96    pub path: String,
97    /// Human-readable description of the change. Stable wording for
98    /// snapshot tests.
99    pub detail: String,
100}
101
102impl DiffEntry {
103    fn add(category: &'static str, path: String, detail: String) -> Self {
104        Self {
105            kind: "add".to_owned(),
106            category: category.to_owned(),
107            path,
108            detail,
109        }
110    }
111
112    fn remove(category: &'static str, path: String, detail: String) -> Self {
113        Self {
114            kind: "remove".to_owned(),
115            category: category.to_owned(),
116            path,
117            detail,
118        }
119    }
120
121    fn breaking(category: &'static str, path: String, detail: String) -> Self {
122        Self {
123            kind: "breaking".to_owned(),
124            category: category.to_owned(),
125            path,
126            detail,
127        }
128    }
129}
130
131// ============================================================
132// Entry point
133// ============================================================
134
135/// Compute the diff between `old` and `new`.
136///
137/// The function is deterministic and side-effect-free; running it twice
138/// on the same inputs always yields identical output (sorted by path).
139#[must_use]
140pub fn diff(old: &InMemorySchema, new: &InMemorySchema) -> SchemaDiff {
141    let mut out = SchemaDiff::default();
142    diff_labels(old, new, &mut out);
143    diff_rel_types(old, new, &mut out);
144    diff_parameters(old, new, &mut out);
145
146    // Sort each bucket by (category, path, detail) for snapshot stability.
147    for bucket in [&mut out.adds, &mut out.removes, &mut out.breaking] {
148        bucket.sort_by(|a, b| {
149            a.category
150                .cmp(&b.category)
151                .then(a.path.cmp(&b.path))
152                .then(a.detail.cmp(&b.detail))
153        });
154    }
155    out
156}
157
158// ============================================================
159// Labels
160// ============================================================
161
162fn diff_labels(old: &InMemorySchema, new: &InMemorySchema, out: &mut SchemaDiff) {
163    let old_labels: BTreeMap<SmolStr, Vec<PropertyDecl>> = old
164        .label_names()
165        .into_iter()
166        .map(|n| {
167            let props = old.node_properties_internal(&n).unwrap_or_default();
168            (n, props)
169        })
170        .collect();
171    let new_labels: BTreeMap<SmolStr, Vec<PropertyDecl>> = new
172        .label_names()
173        .into_iter()
174        .map(|n| {
175            let props = new.node_properties_internal(&n).unwrap_or_default();
176            (n, props)
177        })
178        .collect();
179
180    for (name, new_props) in &new_labels {
181        if !old_labels.contains_key(name) {
182            out.adds.push(DiffEntry::add(
183                "label",
184                name.to_string(),
185                format!("added label `{name}`"),
186            ));
187        } else if let Some(old_props) = old_labels.get(name) {
188            diff_property_sets(
189                &format!("{name}"),
190                "label_property",
191                old_props,
192                new_props,
193                out,
194            );
195        }
196    }
197    for name in old_labels.keys() {
198        if !new_labels.contains_key(name) {
199            out.breaking.push(DiffEntry::breaking(
200                "label",
201                name.to_string(),
202                format!("removed label `{name}`"),
203            ));
204            out.removes.push(DiffEntry::remove(
205                "label",
206                name.to_string(),
207                format!("removed label `{name}`"),
208            ));
209        }
210    }
211}
212
213// ============================================================
214// Rel types
215// ============================================================
216
217fn diff_rel_types(old: &InMemorySchema, new: &InMemorySchema, out: &mut SchemaDiff) {
218    let old_rels: BTreeMap<SmolStr, &RelDecl> =
219        old.rel_types().map(|r| (r.name.clone(), r)).collect();
220    let new_rels: BTreeMap<SmolStr, &RelDecl> =
221        new.rel_types().map(|r| (r.name.clone(), r)).collect();
222
223    for (name, new_rel) in &new_rels {
224        match old_rels.get(name) {
225            None => {
226                out.adds.push(DiffEntry::add(
227                    "rel_type",
228                    name.to_string(),
229                    format!("added relationship type `{name}`"),
230                ));
231            }
232            Some(old_rel) => {
233                diff_endpoint_list(
234                    name,
235                    "start_labels",
236                    &old_rel.start_labels,
237                    &new_rel.start_labels,
238                    out,
239                );
240                diff_endpoint_list(
241                    name,
242                    "end_labels",
243                    &old_rel.end_labels,
244                    &new_rel.end_labels,
245                    out,
246                );
247                diff_property_sets(
248                    &format!("{name}"),
249                    "rel_type_property",
250                    &old_rel.properties,
251                    &new_rel.properties,
252                    out,
253                );
254            }
255        }
256    }
257    for name in old_rels.keys() {
258        if !new_rels.contains_key(name) {
259            out.breaking.push(DiffEntry::breaking(
260                "rel_type",
261                name.to_string(),
262                format!("removed relationship type `{name}`"),
263            ));
264            out.removes.push(DiffEntry::remove(
265                "rel_type",
266                name.to_string(),
267                format!("removed relationship type `{name}`"),
268            ));
269        }
270    }
271}
272
273fn diff_endpoint_list(
274    rel: &SmolStr,
275    side: &'static str,
276    old: &[SmolStr],
277    new: &[SmolStr],
278    out: &mut SchemaDiff,
279) {
280    let old_set: BTreeSet<&SmolStr> = old.iter().collect();
281    let new_set: BTreeSet<&SmolStr> = new.iter().collect();
282    if old_set == new_set {
283        return;
284    }
285    let added: Vec<&&SmolStr> = new_set.difference(&old_set).collect();
286    let removed: Vec<&&SmolStr> = old_set.difference(&new_set).collect();
287    let path = format!("{rel}.{side}");
288    if !added.is_empty() {
289        let names: Vec<String> = added.iter().map(|s| format!("`{s}`")).collect();
290        out.adds.push(DiffEntry::add(
291            "rel_type_endpoints",
292            path.clone(),
293            format!(
294                "relationship `{rel}` {side} gained {names}",
295                names = names.join(", "),
296            ),
297        ));
298    }
299    if !removed.is_empty() {
300        let names: Vec<String> = removed.iter().map(|s| format!("`{s}`")).collect();
301        out.breaking.push(DiffEntry::breaking(
302            "rel_type_endpoints",
303            path,
304            format!(
305                "relationship `{rel}` {side} lost {names}",
306                names = names.join(", "),
307            ),
308        ));
309    }
310}
311
312// ============================================================
313// Property sets (labels + rel types share the same logic)
314// ============================================================
315
316fn diff_property_sets(
317    owner: &str,
318    category: &'static str,
319    old: &[PropertyDecl],
320    new: &[PropertyDecl],
321    out: &mut SchemaDiff,
322) {
323    let old_map: BTreeMap<&SmolStr, &PropertyDecl> = old.iter().map(|p| (&p.name, p)).collect();
324    let new_map: BTreeMap<&SmolStr, &PropertyDecl> = new.iter().map(|p| (&p.name, p)).collect();
325    for (name, new_prop) in &new_map {
326        match old_map.get(name) {
327            None => {
328                let path = format!("{owner}.{name}");
329                if new_prop.required {
330                    out.breaking.push(DiffEntry::breaking(
331                        category,
332                        path,
333                        format!(
334                            "property `{owner}.{name}` added as required \
335                             (existing instances may violate the invariant)",
336                        ),
337                    ));
338                } else {
339                    out.adds.push(DiffEntry::add(
340                        category,
341                        path,
342                        format!("property `{owner}.{name}` added as optional"),
343                    ));
344                }
345            }
346            Some(old_prop) => {
347                let path = format!("{owner}.{name}");
348                if old_prop.ty != new_prop.ty {
349                    out.breaking.push(DiffEntry::breaking(
350                        category,
351                        path.clone(),
352                        format!(
353                            "property `{owner}.{name}` type changed from `{}` to `{}`",
354                            render_type(&old_prop.ty),
355                            render_type(&new_prop.ty),
356                        ),
357                    ));
358                }
359                if old_prop.required && !new_prop.required {
360                    out.adds.push(DiffEntry::add(
361                        category,
362                        path.clone(),
363                        format!("property `{owner}.{name}` relaxed from required to optional"),
364                    ));
365                } else if !old_prop.required && new_prop.required {
366                    out.breaking.push(DiffEntry::breaking(
367                        category,
368                        path,
369                        format!("property `{owner}.{name}` tightened from optional to required",),
370                    ));
371                }
372            }
373        }
374    }
375    for name in old_map.keys() {
376        if !new_map.contains_key(name) {
377            let path = format!("{owner}.{name}");
378            out.breaking.push(DiffEntry::breaking(
379                category,
380                path.clone(),
381                format!("property `{owner}.{name}` removed"),
382            ));
383            out.removes.push(DiffEntry::remove(
384                category,
385                path,
386                format!("property `{owner}.{name}` removed"),
387            ));
388        }
389    }
390}
391
392// ============================================================
393// Parameters
394// ============================================================
395
396fn diff_parameters(old: &InMemorySchema, new: &InMemorySchema, out: &mut SchemaDiff) {
397    let old_params: BTreeMap<&SmolStr, &ParamDecl> =
398        old.parameters().map(|p| (&p.name, p)).collect();
399    let new_params: BTreeMap<&SmolStr, &ParamDecl> =
400        new.parameters().map(|p| (&p.name, p)).collect();
401    for (name, new_param) in &new_params {
402        match old_params.get(name) {
403            None => {
404                let path = format!("${name}");
405                if new_param.default.is_some() {
406                    out.adds.push(DiffEntry::add(
407                        "parameter",
408                        path,
409                        format!("parameter `${name}` added with a default"),
410                    ));
411                } else {
412                    // Adding a parameter without a default may break
413                    // consumers that don't yet pass it, but v0 leaves
414                    // that to the caller — the schema itself gains
415                    // expressive power; queries either supply the
416                    // parameter or the front-end reports a missing-param
417                    // diagnostic. Treat as additive.
418                    out.adds.push(DiffEntry::add(
419                        "parameter",
420                        path,
421                        format!("parameter `${name}` added"),
422                    ));
423                }
424            }
425            Some(old_param) => {
426                let path = format!("${name}");
427                if old_param.ty != new_param.ty {
428                    out.breaking.push(DiffEntry::breaking(
429                        "parameter_type",
430                        path.clone(),
431                        format!(
432                            "parameter `${name}` type changed from `{}` to `{}`",
433                            render_type(&old_param.ty),
434                            render_type(&new_param.ty),
435                        ),
436                    ));
437                }
438                match (&old_param.default, &new_param.default) {
439                    (Some(old_default), None) => {
440                        out.breaking.push(DiffEntry::breaking(
441                            "parameter_default",
442                            path.clone(),
443                            format!(
444                                "parameter `${name}` lost its default value (was `{old_default}`)",
445                            ),
446                        ));
447                    }
448                    (None, Some(new_default)) => {
449                        out.adds.push(DiffEntry::add(
450                            "parameter_default",
451                            path.clone(),
452                            format!(
453                                "parameter `${name}` gained a default value of `{new_default}`",
454                            ),
455                        ));
456                    }
457                    (Some(old_default), Some(new_default)) if old_default != new_default => {
458                        out.adds.push(DiffEntry::add(
459                            "parameter_default",
460                            path.clone(),
461                            format!(
462                                "parameter `${name}` default changed from `{old_default}` to `{new_default}`",
463                            ),
464                        ));
465                    }
466                    _ => {}
467                }
468            }
469        }
470    }
471    for (name, old_param) in &old_params {
472        if !new_params.contains_key(name) {
473            let path = format!("${name}");
474            out.breaking.push(DiffEntry::breaking(
475                "parameter",
476                path.clone(),
477                format!(
478                    "parameter `${name}` removed (was `{}`)",
479                    render_type(&old_param.ty),
480                ),
481            ));
482            out.removes.push(DiffEntry::remove(
483                "parameter",
484                path,
485                format!("parameter `${name}` removed"),
486            ));
487        }
488    }
489}
490
491// ============================================================
492// Internal helpers
493// ============================================================
494
495/// Render a [`PropertyType`] in the same spelling as the v0 file format
496/// (spec 0002 §4). Kept in sync with `file::render_type` — duplicated
497/// here so the `file` feature stays optional.
498fn render_type(ty: &PropertyType) -> String {
499    match ty {
500        PropertyType::String => "STRING".to_owned(),
501        PropertyType::Int => "INTEGER".to_owned(),
502        PropertyType::Float => "FLOAT".to_owned(),
503        PropertyType::Bool => "BOOLEAN".to_owned(),
504        PropertyType::Date => "DATE".to_owned(),
505        PropertyType::Datetime => "DATETIME".to_owned(),
506        PropertyType::List(inner) => format!("LIST<{}>", render_type(inner)),
507        PropertyType::Opaque(n) | PropertyType::Enum(n, _) => n.to_string(),
508        PropertyType::Any => "MAP".to_owned(),
509    }
510}
511
512// Helper accessor used by the diff without cloning twice. Avoids paying
513// the `SchemaProvider` trait's clone-on-return cost twice per label.
514impl InMemorySchema {
515    fn node_properties_internal(&self, label: &str) -> Option<Vec<PropertyDecl>> {
516        self.labels.get(label).cloned()
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    use super::*;
523    use crate::{InMemorySchema, ParamDecl, PropertyDecl, PropertyType, in_memory::RelDecl};
524
525    fn base_schema() -> InMemorySchema {
526        InMemorySchema::builder()
527            .add_label(
528                SmolStr::new("Person"),
529                vec![
530                    PropertyDecl::new(SmolStr::new("name"), PropertyType::String, true),
531                    PropertyDecl::new(SmolStr::new("age"), PropertyType::Int, false),
532                ],
533            )
534            .add_label(
535                SmolStr::new("Movie"),
536                vec![PropertyDecl::new(
537                    SmolStr::new("title"),
538                    PropertyType::String,
539                    true,
540                )],
541            )
542            .add_rel_type(RelDecl {
543                name: SmolStr::new("ACTED_IN"),
544                start_labels: vec![SmolStr::new("Person")],
545                end_labels: vec![SmolStr::new("Movie")],
546                properties: vec![PropertyDecl::new(
547                    SmolStr::new("role"),
548                    PropertyType::String,
549                    false,
550                )],
551            })
552            .add_parameter(ParamDecl {
553                name: SmolStr::new("since_year"),
554                ty: PropertyType::Int,
555                default: Some(SmolStr::new("1990")),
556            })
557            .build()
558            .expect("builds")
559    }
560
561    #[test]
562    fn identical_schemas_diff_empty() {
563        let s = base_schema();
564        let d = diff(&s, &s);
565        assert!(d.is_empty());
566    }
567
568    #[test]
569    fn added_label_is_non_breaking() {
570        let old = base_schema();
571        let new = InMemorySchema::builder()
572            .add_label(
573                SmolStr::new("Person"),
574                old.node_properties_internal("Person").unwrap(),
575            )
576            .add_label(
577                SmolStr::new("Movie"),
578                old.node_properties_internal("Movie").unwrap(),
579            )
580            .add_label(SmolStr::new("Director"), vec![])
581            .add_rel_type(RelDecl {
582                name: SmolStr::new("ACTED_IN"),
583                start_labels: vec![SmolStr::new("Person")],
584                end_labels: vec![SmolStr::new("Movie")],
585                properties: vec![PropertyDecl::new(
586                    SmolStr::new("role"),
587                    PropertyType::String,
588                    false,
589                )],
590            })
591            .add_parameter(ParamDecl {
592                name: SmolStr::new("since_year"),
593                ty: PropertyType::Int,
594                default: Some(SmolStr::new("1990")),
595            })
596            .build()
597            .expect("builds");
598        let d = diff(&old, &new);
599        assert!(!d.has_breaking());
600        assert!(d.adds.iter().any(|e| e.path == "Director"));
601    }
602
603    #[test]
604    fn removed_label_is_breaking() {
605        let old = base_schema();
606        let new = InMemorySchema::builder()
607            .add_label(
608                SmolStr::new("Person"),
609                old.node_properties_internal("Person").unwrap(),
610            )
611            .build()
612            .expect("builds");
613        let d = diff(&old, &new);
614        assert!(d.has_breaking());
615        assert!(d.breaking.iter().any(|e| e.path == "Movie"));
616        assert!(d.removes.iter().any(|e| e.path == "Movie"));
617    }
618
619    #[test]
620    fn property_type_change_is_breaking() {
621        let old = base_schema();
622        let new = InMemorySchema::builder()
623            .add_label(
624                SmolStr::new("Person"),
625                vec![
626                    PropertyDecl::new(SmolStr::new("name"), PropertyType::String, true),
627                    PropertyDecl::new(SmolStr::new("age"), PropertyType::String, false),
628                ],
629            )
630            .add_label(
631                SmolStr::new("Movie"),
632                old.node_properties_internal("Movie").unwrap(),
633            )
634            .build()
635            .expect("builds");
636        let d = diff(&old, &new);
637        assert!(d.breaking.iter().any(|e| e.path == "Person.age"));
638    }
639
640    #[test]
641    fn rel_type_endpoint_removal_is_breaking() {
642        let old = base_schema();
643        let new = InMemorySchema::builder()
644            .add_label(
645                SmolStr::new("Person"),
646                old.node_properties_internal("Person").unwrap(),
647            )
648            .add_label(
649                SmolStr::new("Movie"),
650                old.node_properties_internal("Movie").unwrap(),
651            )
652            .add_rel_type(RelDecl {
653                name: SmolStr::new("ACTED_IN"),
654                start_labels: vec![],
655                end_labels: vec![SmolStr::new("Movie")],
656                properties: vec![PropertyDecl::new(
657                    SmolStr::new("role"),
658                    PropertyType::String,
659                    false,
660                )],
661            })
662            .build()
663            .expect("builds");
664        let d = diff(&old, &new);
665        assert!(
666            d.breaking
667                .iter()
668                .any(|e| e.category == "rel_type_endpoints" && e.path == "ACTED_IN.start_labels")
669        );
670    }
671
672    #[test]
673    fn parameter_losing_default_is_breaking() {
674        let old = base_schema();
675        let new = InMemorySchema::builder()
676            .add_label(
677                SmolStr::new("Person"),
678                old.node_properties_internal("Person").unwrap(),
679            )
680            .add_label(
681                SmolStr::new("Movie"),
682                old.node_properties_internal("Movie").unwrap(),
683            )
684            .add_parameter(ParamDecl {
685                name: SmolStr::new("since_year"),
686                ty: PropertyType::Int,
687                default: None,
688            })
689            .build()
690            .expect("builds");
691        let d = diff(&old, &new);
692        assert!(d.breaking.iter().any(|e| e.category == "parameter_default"));
693    }
694
695    #[test]
696    fn diff_is_deterministic() {
697        let a = base_schema();
698        let b = base_schema();
699        assert_eq!(diff(&a, &b), diff(&a, &b));
700    }
701}