Skip to main content

panproto_vcs/
hash.rs

1//! Content-addressing via canonical serialization and blake3 hashing.
2//!
3//! Every object in the VCS is identified by a blake3 hash of its canonical
4//! `MessagePack` representation. Canonical forms sort all map entries by key
5//! (using [`BTreeMap`]) and exclude derived/precomputed fields.
6
7use panproto_gat::Name;
8use std::collections::BTreeMap;
9use std::fmt;
10use std::str::FromStr;
11
12use panproto_mig::Migration;
13use panproto_schema::{
14    Constraint, Edge, HyperEdge, RecursionPoint, Schema, Span, UsageMode, Variant, Vertex,
15};
16use serde::{Deserialize, Serialize};
17
18use crate::error::VcsError;
19use crate::object::{CommitObject, ComplementObject, DataSetObject, EditLogObject, TagObject};
20
21/// A content-addressed object identifier: a blake3 hash (32 bytes).
22#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
23pub struct ObjectId([u8; 32]);
24
25impl ObjectId {
26    /// The zero object ID (useful as a sentinel).
27    pub const ZERO: Self = Self([0u8; 32]);
28
29    /// Create an `ObjectId` from raw bytes.
30    #[must_use]
31    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
32        Self(bytes)
33    }
34
35    /// Return the raw bytes.
36    #[must_use]
37    pub const fn as_bytes(&self) -> &[u8; 32] {
38        &self.0
39    }
40
41    /// Return the first 7 hex characters (short form for display).
42    #[must_use]
43    pub fn short(&self) -> String {
44        let full = self.to_string();
45        full[..7].to_owned()
46    }
47}
48
49impl fmt::Display for ObjectId {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        for byte in &self.0 {
52            write!(f, "{byte:02x}")?;
53        }
54        Ok(())
55    }
56}
57
58impl fmt::Debug for ObjectId {
59    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60        write!(f, "ObjectId({})", self.short())
61    }
62}
63
64/// Error parsing a hex string as an `ObjectId`.
65#[derive(Debug, thiserror::Error)]
66#[error("invalid object id: {reason}")]
67pub struct ParseObjectIdError {
68    reason: String,
69}
70
71impl FromStr for ObjectId {
72    type Err = ParseObjectIdError;
73
74    fn from_str(s: &str) -> Result<Self, Self::Err> {
75        if s.len() != 64 {
76            return Err(ParseObjectIdError {
77                reason: format!("expected 64 hex chars, got {}", s.len()),
78            });
79        }
80        let mut bytes = [0u8; 32];
81        for (i, byte) in bytes.iter_mut().enumerate() {
82            *byte =
83                u8::from_str_radix(&s[i * 2..i * 2 + 2], 16).map_err(|e| ParseObjectIdError {
84                    reason: e.to_string(),
85                })?;
86        }
87        Ok(Self(bytes))
88    }
89}
90
91// ---------------------------------------------------------------------------
92// Canonical forms: private structs with deterministic field ordering.
93// These exist only for hashing; they are never persisted directly.
94// ---------------------------------------------------------------------------
95
96#[derive(Serialize)]
97struct CanonicalVertex {
98    id: String,
99    kind: String,
100    nsid: Option<String>,
101}
102
103impl From<&Vertex> for CanonicalVertex {
104    fn from(v: &Vertex) -> Self {
105        Self {
106            id: v.id.to_string(),
107            kind: v.kind.to_string(),
108            nsid: v.nsid.as_ref().map(Name::to_string),
109        }
110    }
111}
112
113#[derive(Serialize)]
114struct CanonicalHyperEdge {
115    id: String,
116    kind: String,
117    signature: BTreeMap<String, String>,
118    parent_label: String,
119}
120
121impl From<&HyperEdge> for CanonicalHyperEdge {
122    fn from(he: &HyperEdge) -> Self {
123        Self {
124            id: he.id.to_string(),
125            kind: he.kind.to_string(),
126            signature: he
127                .signature
128                .iter()
129                .map(|(k, v)| (k.to_string(), v.to_string()))
130                .collect(),
131            parent_label: he.parent_label.to_string(),
132        }
133    }
134}
135
136#[derive(Serialize)]
137struct CanonicalCoercionSpec {
138    forward: panproto_expr::Expr,
139    inverse: Option<panproto_expr::Expr>,
140    class: panproto_gat::CoercionClass,
141}
142
143/// Canonical schema with `BTreeMap` fields, sorted `Vec`s, and no precomputed indices.
144#[derive(Serialize)]
145struct CanonicalSchema {
146    protocol: String,
147    vertices: BTreeMap<String, CanonicalVertex>,
148    edges: BTreeMap<Edge, String>,
149    hyper_edges: BTreeMap<String, CanonicalHyperEdge>,
150    constraints: BTreeMap<String, Vec<Constraint>>,
151    required: BTreeMap<String, Vec<Edge>>,
152    nsids: BTreeMap<String, String>,
153    variants: BTreeMap<String, Vec<Variant>>,
154    orderings: BTreeMap<Edge, u32>,
155    recursion_points: BTreeMap<String, RecursionPoint>,
156    spans: BTreeMap<String, Span>,
157    usage_modes: BTreeMap<Edge, UsageMode>,
158    nominal: BTreeMap<String, bool>,
159    coercions: BTreeMap<(String, String), CanonicalCoercionSpec>,
160    mergers: BTreeMap<String, panproto_expr::Expr>,
161    defaults: BTreeMap<String, panproto_expr::Expr>,
162    policies: BTreeMap<String, panproto_expr::Expr>,
163}
164
165impl From<&Schema> for CanonicalSchema {
166    fn from(s: &Schema) -> Self {
167        let mut constraints: BTreeMap<String, Vec<Constraint>> = s
168            .constraints
169            .iter()
170            .map(|(k, v)| {
171                let mut sorted = v.clone();
172                sorted.sort();
173                (k.to_string(), sorted)
174            })
175            .collect();
176        // Remove empty constraint lists.
177        constraints.retain(|_, v| !v.is_empty());
178
179        let mut required: BTreeMap<String, Vec<Edge>> = s
180            .required
181            .iter()
182            .map(|(k, v)| {
183                let mut sorted = v.clone();
184                sorted.sort();
185                (k.to_string(), sorted)
186            })
187            .collect();
188        required.retain(|_, v| !v.is_empty());
189
190        Self {
191            protocol: s.protocol.clone(),
192            vertices: s
193                .vertices
194                .iter()
195                .map(|(k, v)| (k.to_string(), CanonicalVertex::from(v)))
196                .collect(),
197            edges: s
198                .edges
199                .iter()
200                .map(|(k, v)| (k.clone(), v.to_string()))
201                .collect(),
202            hyper_edges: s
203                .hyper_edges
204                .iter()
205                .map(|(k, v)| (k.to_string(), CanonicalHyperEdge::from(v)))
206                .collect(),
207            constraints,
208            required,
209            nsids: s
210                .nsids
211                .iter()
212                .map(|(k, v)| (k.to_string(), v.to_string()))
213                .collect(),
214            variants: s
215                .variants
216                .iter()
217                .map(|(k, v)| (k.to_string(), v.clone()))
218                .collect(),
219            orderings: s.orderings.iter().map(|(k, v)| (k.clone(), *v)).collect(),
220            recursion_points: s
221                .recursion_points
222                .iter()
223                .map(|(k, v)| (k.to_string(), v.clone()))
224                .collect(),
225            spans: s
226                .spans
227                .iter()
228                .map(|(k, v)| (k.to_string(), v.clone()))
229                .collect(),
230            usage_modes: s
231                .usage_modes
232                .iter()
233                .map(|(k, v)| (k.clone(), v.clone()))
234                .collect(),
235            nominal: s.nominal.iter().map(|(k, v)| (k.to_string(), *v)).collect(),
236            coercions: s
237                .coercions
238                .iter()
239                .map(|((k1, k2), v)| {
240                    (
241                        (k1.to_string(), k2.to_string()),
242                        CanonicalCoercionSpec {
243                            forward: v.forward.clone(),
244                            inverse: v.inverse.clone(),
245                            class: v.class,
246                        },
247                    )
248                })
249                .collect(),
250            mergers: s
251                .mergers
252                .iter()
253                .map(|(k, v)| (k.to_string(), v.clone()))
254                .collect(),
255            defaults: s
256                .defaults
257                .iter()
258                .map(|(k, v)| (k.to_string(), v.clone()))
259                .collect(),
260            policies: s
261                .policies
262                .iter()
263                .map(|(k, v)| (k.to_string(), v.clone()))
264                .collect(),
265        }
266    }
267}
268
269/// Canonical migration where all `HashMap` fields become `BTreeMap`.
270#[derive(Serialize)]
271struct CanonicalMigration {
272    src: ObjectId,
273    tgt: ObjectId,
274    vertex_map: BTreeMap<String, String>,
275    edge_map: BTreeMap<Edge, Edge>,
276    hyper_edge_map: BTreeMap<String, String>,
277    label_map: BTreeMap<(String, String), String>,
278    resolver: BTreeMap<(String, String), Edge>,
279    hyper_resolver: BTreeMap<String, (String, BTreeMap<String, String>)>,
280    expr_resolvers: BTreeMap<(String, String), panproto_expr::Expr>,
281}
282
283// ---------------------------------------------------------------------------
284// Public hashing functions
285// ---------------------------------------------------------------------------
286
287/// Compute the content-addressed ID of a schema.
288///
289/// The hash excludes precomputed indices (`outgoing`, `incoming`, `between`)
290/// since those are derived data.
291///
292/// # Errors
293///
294/// Returns an error if canonical serialization fails.
295pub fn hash_schema(schema: &Schema) -> Result<ObjectId, VcsError> {
296    let canonical = CanonicalSchema::from(schema);
297    let bytes = rmp_serde::to_vec(&canonical)?;
298    Ok(ObjectId(blake3::hash(&bytes).into()))
299}
300
301/// Compute the content-addressed ID of a migration.
302///
303/// The hash includes the source and target schema object IDs so that the
304/// same morphism applied between different schema pairs produces distinct
305/// migration IDs.
306///
307/// # Errors
308///
309/// Returns an error if canonical serialization fails.
310pub fn hash_migration(
311    src: ObjectId,
312    tgt: ObjectId,
313    migration: &Migration,
314) -> Result<ObjectId, VcsError> {
315    // Flatten hyper_resolver to BTreeMap with sorted inner maps.
316    let hyper_resolver: BTreeMap<String, (String, BTreeMap<String, String>)> = migration
317        .hyper_resolver
318        .iter()
319        .map(|((he_id, _labels), (tgt_he, remap))| {
320            let sorted_remap: BTreeMap<String, String> = remap
321                .iter()
322                .map(|(k, v)| (k.to_string(), v.to_string()))
323                .collect();
324            (he_id.to_string(), (tgt_he.to_string(), sorted_remap))
325        })
326        .collect();
327
328    let canonical = CanonicalMigration {
329        src,
330        tgt,
331        vertex_map: migration
332            .vertex_map
333            .iter()
334            .map(|(k, v)| (k.to_string(), v.to_string()))
335            .collect(),
336        edge_map: migration
337            .edge_map
338            .iter()
339            .map(|(k, v)| (k.clone(), v.clone()))
340            .collect(),
341        hyper_edge_map: migration
342            .hyper_edge_map
343            .iter()
344            .map(|(k, v)| (k.to_string(), v.to_string()))
345            .collect(),
346        label_map: migration
347            .label_map
348            .iter()
349            .map(|((k1, k2), v)| ((k1.to_string(), k2.to_string()), v.to_string()))
350            .collect(),
351        resolver: migration
352            .resolver
353            .iter()
354            .map(|((k1, k2), v)| ((k1.to_string(), k2.to_string()), v.clone()))
355            .collect(),
356        hyper_resolver,
357        expr_resolvers: migration
358            .expr_resolvers
359            .iter()
360            .map(|((k1, k2), v)| ((k1.to_string(), k2.to_string()), v.clone()))
361            .collect(),
362    };
363    let bytes = rmp_serde::to_vec(&canonical)?;
364    Ok(ObjectId(blake3::hash(&bytes).into()))
365}
366
367/// Compute the content-addressed ID of a commit.
368///
369/// # Errors
370///
371/// Returns an error if serialization fails.
372pub fn hash_commit(commit: &CommitObject) -> Result<ObjectId, VcsError> {
373    let bytes = rmp_serde::to_vec(commit)?;
374    Ok(ObjectId(blake3::hash(&bytes).into()))
375}
376
377/// Hash an annotated tag object.
378///
379/// # Errors
380///
381/// Returns an error if serialization fails.
382pub fn hash_tag(tag: &TagObject) -> Result<ObjectId, VcsError> {
383    let bytes = rmp_serde::to_vec(tag)?;
384    Ok(ObjectId(blake3::hash(&bytes).into()))
385}
386
387/// Compute the content-addressed ID of a data set.
388///
389/// Uses a canonical `BTreeMap` form to ensure deterministic hashing
390/// regardless of field ordering.
391///
392/// # Errors
393///
394/// Returns an error if serialization fails.
395pub fn hash_dataset(dataset: &DataSetObject) -> Result<ObjectId, VcsError> {
396    let canonical: BTreeMap<&str, Vec<u8>> = BTreeMap::from([
397        ("schema_id", rmp_serde::to_vec(&dataset.schema_id)?),
398        ("data", rmp_serde::to_vec(&dataset.data)?),
399        ("record_count", rmp_serde::to_vec(&dataset.record_count)?),
400    ]);
401    let bytes = rmp_serde::to_vec(&canonical)?;
402    Ok(ObjectId(blake3::hash(&bytes).into()))
403}
404
405/// Compute the content-addressed ID of a complement.
406///
407/// Uses a canonical `BTreeMap` form to ensure deterministic hashing.
408///
409/// # Errors
410///
411/// Returns an error if serialization fails.
412pub fn hash_complement(complement: &ComplementObject) -> Result<ObjectId, VcsError> {
413    let canonical: BTreeMap<&str, Vec<u8>> = BTreeMap::from([
414        ("migration_id", rmp_serde::to_vec(&complement.migration_id)?),
415        ("data_id", rmp_serde::to_vec(&complement.data_id)?),
416        ("complement", rmp_serde::to_vec(&complement.complement)?),
417    ]);
418    let bytes = rmp_serde::to_vec(&canonical)?;
419    Ok(ObjectId(blake3::hash(&bytes).into()))
420}
421
422/// Compute the content-addressed ID of an expression.
423///
424/// The hash is computed from the canonical `MessagePack` serialization
425/// of the expression AST.
426///
427/// # Errors
428///
429/// Returns an error if serialization fails.
430pub fn hash_expr(expr: &panproto_expr::Expr) -> Result<ObjectId, VcsError> {
431    let bytes = rmp_serde::to_vec(expr)?;
432    Ok(ObjectId(blake3::hash(&bytes).into()))
433}
434
435/// Compute the content-addressed ID of a protocol definition.
436///
437/// The hash includes all protocol fields via direct serialization.
438///
439/// # Errors
440///
441/// Returns an error if serialization fails.
442pub fn hash_protocol(protocol: &panproto_schema::Protocol) -> Result<ObjectId, VcsError> {
443    let bytes = rmp_serde::to_vec(protocol)?;
444    Ok(ObjectId(blake3::hash(&bytes).into()))
445}
446
447/// Compute the content-addressed ID of a GAT theory.
448///
449/// Theory's custom `Serialize` implementation only emits Vec-based fields
450/// (sorts, ops, eqs, `directed_eqs`, policies) and scalar fields (name, extends),
451/// all of which have deterministic ordering. Direct serialization is safe.
452///
453/// # Errors
454///
455/// Returns an error if serialization fails.
456pub fn hash_theory(theory: &panproto_gat::Theory) -> Result<ObjectId, VcsError> {
457    let bytes = rmp_serde::to_vec(theory)?;
458    Ok(ObjectId(blake3::hash(&bytes).into()))
459}
460
461/// Canonical theory morphism where `HashMap` fields become `BTreeMap` for deterministic hashing.
462#[derive(Serialize)]
463struct CanonicalTheoryMorphism {
464    name: String,
465    domain: String,
466    codomain: String,
467    sort_map: BTreeMap<String, String>,
468    op_map: BTreeMap<String, String>,
469}
470
471/// Compute the content-addressed ID of a theory morphism.
472///
473/// Uses a canonical form with `BTreeMap` for the sort and operation maps
474/// since `TheoryMorphism` uses `HashMap` internally.
475///
476/// # Errors
477///
478/// Returns an error if serialization fails.
479pub fn hash_theory_morphism(morphism: &panproto_gat::TheoryMorphism) -> Result<ObjectId, VcsError> {
480    let canonical = CanonicalTheoryMorphism {
481        name: morphism.name.to_string(),
482        domain: morphism.domain.to_string(),
483        codomain: morphism.codomain.to_string(),
484        sort_map: morphism
485            .sort_map
486            .iter()
487            .map(|(k, v)| (k.to_string(), v.to_string()))
488            .collect(),
489        op_map: morphism
490            .op_map
491            .iter()
492            .map(|(k, v)| (k.to_string(), v.to_string()))
493            .collect(),
494    };
495    let bytes = rmp_serde::to_vec(&canonical)?;
496    Ok(ObjectId(blake3::hash(&bytes).into()))
497}
498
499/// Compute the content-addressed ID of a CST complement.
500///
501/// The CST complement is serialized as `MessagePack`, so hashing is
502/// straightforward: hash the payload bytes concatenated with the data ID.
503///
504/// # Errors
505///
506/// Returns an error if serialization fails.
507pub fn hash_cst_complement(
508    cst_comp: &crate::object::CstComplementObject,
509) -> Result<ObjectId, VcsError> {
510    let mut hasher = blake3::Hasher::new();
511    hasher.update(b"cst_complement:");
512    hasher.update(&cst_comp.data_id.0);
513    hasher.update(&cst_comp.cst_complement);
514    Ok(ObjectId(hasher.finalize().into()))
515}
516
517/// Compute the content-addressed ID of an edit log.
518///
519/// Uses a canonical `BTreeMap` form to ensure deterministic hashing.
520///
521/// # Errors
522///
523/// Returns an error if serialization fails.
524pub fn hash_edit_log(edit_log: &EditLogObject) -> Result<ObjectId, VcsError> {
525    let canonical: BTreeMap<&str, Vec<u8>> = BTreeMap::from([
526        ("schema_id", rmp_serde::to_vec(&edit_log.schema_id)?),
527        ("data_id", rmp_serde::to_vec(&edit_log.data_id)?),
528        ("edits", rmp_serde::to_vec(&edit_log.edits)?),
529        ("edit_count", rmp_serde::to_vec(&edit_log.edit_count)?),
530        (
531            "final_complement",
532            rmp_serde::to_vec(&edit_log.final_complement)?,
533        ),
534    ]);
535    let bytes = rmp_serde::to_vec(&canonical)?;
536    Ok(ObjectId(blake3::hash(&bytes).into()))
537}
538
539/// Compute the content-addressed ID of a per-file schema leaf.
540///
541/// Hashes the file path, protocol, and canonical form of the
542/// per-file `Schema`. Two files with the same path, protocol, and
543/// schema hash to the same [`ObjectId`].
544///
545/// # Errors
546///
547/// Returns an error if serialization fails.
548pub fn hash_file_schema(file: &crate::object::FileSchemaObject) -> Result<ObjectId, VcsError> {
549    let schema_id = hash_schema(&file.schema)?;
550    let mut sorted_cross = file.cross_file_edges.clone();
551    sorted_cross.sort();
552    let canonical: BTreeMap<&str, Vec<u8>> = BTreeMap::from([
553        ("path", rmp_serde::to_vec(&file.path)?),
554        ("protocol", rmp_serde::to_vec(&file.protocol)?),
555        ("schema_id", rmp_serde::to_vec(&schema_id)?),
556        ("cross_file_edges", rmp_serde::to_vec(&sorted_cross)?),
557    ]);
558    let bytes = rmp_serde::to_vec(&canonical)?;
559    Ok(ObjectId(blake3::hash(&bytes).into()))
560}
561
562/// Compute the content-addressed ID of a schema-tree inner node.
563///
564/// Serializes the sorted list of `(name, entry)` pairs canonically
565/// so the resulting [`ObjectId`] depends only on the tree's entry set
566/// and not on construction order.
567///
568/// # Errors
569///
570/// Returns an error if serialization fails.
571pub fn hash_schema_tree(tree: &crate::object::SchemaTreeObject) -> Result<ObjectId, VcsError> {
572    // A `SingleLeaf` has a distinct object shape with no name slot and
573    // hashes over its `file_schema_id` alone. A `Directory` hashes
574    // over its entries in canonical sorted order so the id is
575    // independent of wire order.
576    use crate::object::SchemaTreeObject;
577    let mut hasher = blake3::Hasher::new();
578    hasher.update(b"schema_tree:");
579    match tree {
580        SchemaTreeObject::SingleLeaf { file_schema_id } => {
581            hasher.update(b"single:");
582            hasher.update(file_schema_id.as_bytes());
583        }
584        SchemaTreeObject::Directory { .. } => {
585            let sorted: Vec<(String, crate::object::SchemaTreeEntry)> = tree
586                .sorted_entries()
587                .into_iter()
588                .map(|(n, e)| (n.to_owned(), e.clone()))
589                .collect();
590            let bytes = rmp_serde::to_vec(&sorted)?;
591            hasher.update(b"dir:");
592            hasher.update(&bytes);
593        }
594    }
595    Ok(ObjectId(hasher.finalize().into()))
596}
597
598#[cfg(test)]
599mod tests {
600    use super::*;
601    use panproto_schema::Vertex;
602    use smallvec::SmallVec;
603    use std::collections::HashMap;
604
605    fn make_schema(vertices: &[(&str, &str)], edges: &[Edge]) -> Schema {
606        let mut vert_map = HashMap::new();
607        let mut edge_map = HashMap::new();
608        let mut outgoing: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
609        let mut incoming: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
610        let mut between: HashMap<(Name, Name), SmallVec<Edge, 2>> = HashMap::new();
611
612        for (id, kind) in vertices {
613            vert_map.insert(
614                Name::from(*id),
615                Vertex {
616                    id: Name::from(*id),
617                    kind: Name::from(*kind),
618                    nsid: None,
619                },
620            );
621        }
622        for edge in edges {
623            edge_map.insert(edge.clone(), edge.kind.clone());
624            outgoing
625                .entry(edge.src.clone())
626                .or_default()
627                .push(edge.clone());
628            incoming
629                .entry(edge.tgt.clone())
630                .or_default()
631                .push(edge.clone());
632            between
633                .entry((edge.src.clone(), edge.tgt.clone()))
634                .or_default()
635                .push(edge.clone());
636        }
637
638        Schema {
639            protocol: "test".into(),
640            vertices: vert_map,
641            edges: edge_map,
642            hyper_edges: HashMap::new(),
643            constraints: HashMap::new(),
644            required: HashMap::new(),
645            nsids: HashMap::new(),
646            entries: Vec::new(),
647            variants: HashMap::new(),
648            orderings: HashMap::new(),
649            recursion_points: HashMap::new(),
650            spans: HashMap::new(),
651            usage_modes: HashMap::new(),
652            nominal: HashMap::new(),
653            coercions: HashMap::new(),
654            mergers: HashMap::new(),
655            defaults: HashMap::new(),
656            policies: HashMap::new(),
657            outgoing,
658            incoming,
659            between,
660        }
661    }
662
663    #[test]
664    fn hash_stability_same_schema() -> Result<(), Box<dyn std::error::Error>> {
665        let s = make_schema(&[("a", "object"), ("b", "string")], &[]);
666        let h1 = hash_schema(&s)?;
667        let h2 = hash_schema(&s)?;
668        assert_eq!(h1, h2);
669        Ok(())
670    }
671
672    #[test]
673    fn hash_differs_for_different_schemas() -> Result<(), Box<dyn std::error::Error>> {
674        let s1 = make_schema(&[("a", "object")], &[]);
675        let s2 = make_schema(&[("a", "object"), ("b", "string")], &[]);
676        let h1 = hash_schema(&s1)?;
677        let h2 = hash_schema(&s2)?;
678        assert_ne!(h1, h2);
679        Ok(())
680    }
681
682    #[test]
683    fn hash_ignores_precomputed_indices() -> Result<(), Box<dyn std::error::Error>> {
684        let edge = Edge {
685            src: "a".into(),
686            tgt: "b".into(),
687            kind: "prop".into(),
688            name: None,
689        };
690        let s1 = make_schema(&[("a", "object"), ("b", "string")], &[edge]);
691
692        // Create the same schema but with empty precomputed indices.
693        let mut s2 = s1.clone();
694        s2.outgoing.clear();
695        s2.incoming.clear();
696        s2.between.clear();
697
698        let h1 = hash_schema(&s1)?;
699        let h2 = hash_schema(&s2)?;
700        assert_eq!(h1, h2, "hash should not depend on precomputed indices");
701        Ok(())
702    }
703
704    #[test]
705    fn object_id_display_and_parse() -> Result<(), Box<dyn std::error::Error>> {
706        let id = ObjectId::ZERO;
707        let hex = id.to_string();
708        assert_eq!(hex.len(), 64);
709        assert!(hex.chars().all(|c| c == '0'));
710
711        let parsed: ObjectId = hex.parse()?;
712        assert_eq!(parsed, id);
713        Ok(())
714    }
715
716    #[test]
717    fn object_id_short() {
718        let id = ObjectId::from_bytes([0xab; 32]);
719        assert_eq!(id.short(), "abababa");
720    }
721
722    #[test]
723    fn hash_commit_deterministic() -> Result<(), Box<dyn std::error::Error>> {
724        let commit = CommitObject::builder(ObjectId::ZERO, "test", "test-author", "initial commit")
725            .timestamp(1_234_567_890)
726            .build();
727        let h1 = hash_commit(&commit)?;
728        let h2 = hash_commit(&commit)?;
729        assert_eq!(h1, h2);
730        Ok(())
731    }
732
733    #[test]
734    fn hash_theory_stability() -> Result<(), Box<dyn std::error::Error>> {
735        let theory = panproto_gat::Theory::new(
736            "ThTest",
737            vec![panproto_gat::Sort::simple("Vertex")],
738            vec![],
739            vec![],
740        );
741        let h1 = hash_theory(&theory)?;
742        let h2 = hash_theory(&theory)?;
743        assert_eq!(h1, h2, "same theory should produce the same hash");
744        Ok(())
745    }
746
747    #[test]
748    fn hash_theory_morphism_stability() -> Result<(), Box<dyn std::error::Error>> {
749        use std::sync::Arc;
750        let morph = panproto_gat::TheoryMorphism::new(
751            "test",
752            "A",
753            "B",
754            HashMap::from([(Arc::from("S"), Arc::from("T"))]),
755            HashMap::new(),
756        );
757        let h1 = hash_theory_morphism(&morph)?;
758        let h2 = hash_theory_morphism(&morph)?;
759        assert_eq!(h1, h2, "same morphism should produce the same hash");
760        Ok(())
761    }
762
763    #[test]
764    fn hash_migration_includes_endpoints() -> Result<(), Box<dyn std::error::Error>> {
765        let mig = Migration::empty();
766        let src1 = ObjectId::from_bytes([1; 32]);
767        let src2 = ObjectId::from_bytes([2; 32]);
768        let tgt = ObjectId::from_bytes([3; 32]);
769
770        let h1 = hash_migration(src1, tgt, &mig)?;
771        let h2 = hash_migration(src2, tgt, &mig)?;
772        assert_ne!(
773            h1, h2,
774            "different source schemas should produce different migration IDs"
775        );
776        Ok(())
777    }
778
779    #[test]
780    fn hash_dataset_stability() -> Result<(), Box<dyn std::error::Error>> {
781        let ds = crate::object::DataSetObject {
782            schema_id: ObjectId::from_bytes([1; 32]),
783            data: vec![10, 20, 30],
784            record_count: 3,
785        };
786        let h1 = hash_dataset(&ds)?;
787        let h2 = hash_dataset(&ds)?;
788        assert_eq!(h1, h2, "same dataset should produce the same hash");
789        Ok(())
790    }
791
792    #[test]
793    fn hash_complement_stability() -> Result<(), Box<dyn std::error::Error>> {
794        let comp = crate::object::ComplementObject {
795            migration_id: ObjectId::from_bytes([1; 32]),
796            data_id: ObjectId::from_bytes([2; 32]),
797            complement: vec![42],
798        };
799        let h1 = hash_complement(&comp)?;
800        let h2 = hash_complement(&comp)?;
801        assert_eq!(h1, h2, "same complement should produce the same hash");
802        Ok(())
803    }
804
805    #[test]
806    fn hash_protocol_stability() -> Result<(), Box<dyn std::error::Error>> {
807        let proto = panproto_schema::Protocol {
808            name: "test-proto".into(),
809            ..Default::default()
810        };
811        let h1 = hash_protocol(&proto)?;
812        let h2 = hash_protocol(&proto)?;
813        assert_eq!(h1, h2, "same protocol should produce the same hash");
814        Ok(())
815    }
816
817    #[test]
818    fn hash_expr_stability() -> Result<(), Box<dyn std::error::Error>> {
819        let expr = panproto_expr::Expr::Lit(panproto_expr::Literal::Int(42));
820        let h1 = hash_expr(&expr)?;
821        let h2 = hash_expr(&expr)?;
822        assert_eq!(h1, h2, "same expression should produce the same hash");
823        Ok(())
824    }
825
826    #[test]
827    fn hash_expr_differs_for_different_values() -> Result<(), Box<dyn std::error::Error>> {
828        let e1 = panproto_expr::Expr::Lit(panproto_expr::Literal::Int(1));
829        let e2 = panproto_expr::Expr::Lit(panproto_expr::Literal::Int(2));
830        let h1 = hash_expr(&e1)?;
831        let h2 = hash_expr(&e2)?;
832        assert_ne!(
833            h1, h2,
834            "different expressions should produce different hashes"
835        );
836        Ok(())
837    }
838
839    #[test]
840    fn schema_tree_directory_hash_ignores_wire_order() -> Result<(), Box<dyn std::error::Error>> {
841        use crate::object::{SchemaTreeEntry, SchemaTreeObject};
842
843        let a = ObjectId::from_bytes([1; 32]);
844        let b = ObjectId::from_bytes([2; 32]);
845        let c = ObjectId::from_bytes([3; 32]);
846
847        let forward = SchemaTreeObject::Directory {
848            entries: vec![
849                ("a".to_owned(), SchemaTreeEntry::File(a)),
850                ("b".to_owned(), SchemaTreeEntry::Tree(b)),
851                ("c".to_owned(), SchemaTreeEntry::File(c)),
852            ],
853        };
854        let shuffled = SchemaTreeObject::Directory {
855            entries: vec![
856                ("c".to_owned(), SchemaTreeEntry::File(c)),
857                ("a".to_owned(), SchemaTreeEntry::File(a)),
858                ("b".to_owned(), SchemaTreeEntry::Tree(b)),
859            ],
860        };
861        assert_eq!(hash_schema_tree(&forward)?, hash_schema_tree(&shuffled)?);
862        Ok(())
863    }
864
865    #[test]
866    fn flat_schema_hash_stable_across_serde_round_trip() -> Result<(), Box<dyn std::error::Error>> {
867        // Serialize a Schema, deserialize it, and confirm the
868        // content-addressed hash is identical to the original. This
869        // guards against canonicalization drift introduced by the
870        // serde round trip.
871        let s = make_schema(&[("alpha", "record"), ("beta", "string")], &[]);
872        let h1 = hash_schema(&s)?;
873        let bytes = rmp_serde::to_vec(&s)?;
874        let s2: Schema = rmp_serde::from_slice(&bytes)?;
875        let h2 = hash_schema(&s2)?;
876        assert_eq!(h1, h2);
877        Ok(())
878    }
879}