Skip to main content

panproto_vcs/
object.rs

1//! Content-addressed objects stored in the VCS.
2//!
3//! The object store contains the following kinds of objects:
4//! - [`Object::FileSchema`]: a per-file schema leaf
5//! - [`Object::SchemaTree`]: an inner node of the project schema Merkle tree
6//! - [`Object::Migration`]: a morphism between two schemas
7//! - [`Object::Commit`]: a point in the schema evolution DAG
8//! - [`Object::Tag`]: an annotated tag pointing to another object
9//! - [`Object::DataSet`]: a data snapshot conforming to a schema
10//! - [`Object::Complement`]: a complement from data migration
11//! - [`Object::Protocol`]: a protocol (metaschema) definition
12//! - [`Object::Expr`]: a standalone expression (coercion, merge, default)
13//! - [`Object::EditLog`]: an edit log for incremental migration
14//! - [`Object::Theory`]: a GAT theory definition
15//! - [`Object::TheoryMorphism`]: a structure-preserving map between theories
16
17use std::collections::BTreeMap;
18
19use panproto_gat::SiteRename;
20use panproto_mig::Migration;
21use panproto_schema::Schema;
22use serde::{Deserialize, Serialize};
23
24use crate::ObjectId;
25
26/// A content-addressed object in the store.
27///
28/// Marked `#[non_exhaustive]` so that adding new variants downstream
29/// (new on-disk object kinds, new enrichments) does not silently break
30/// exhaustive `match` expressions in consumers of this crate.
31#[non_exhaustive]
32#[derive(Clone, Debug, Serialize, Deserialize)]
33pub enum Object {
34    /// A migration between two schemas, identified by their object IDs.
35    Migration {
36        /// Object ID of the source schema.
37        src: ObjectId,
38        /// Object ID of the target schema.
39        tgt: ObjectId,
40        /// The migration morphism.
41        mapping: Migration,
42    },
43
44    /// A commit in the schema evolution DAG.
45    Commit(CommitObject),
46
47    /// An annotated tag pointing to another object.
48    Tag(TagObject),
49
50    /// A data snapshot: instances conforming to a specific schema.
51    DataSet(DataSetObject),
52
53    /// A complement from data migration, for backward migration.
54    Complement(ComplementObject),
55
56    /// A protocol (metaschema) definition.
57    Protocol(Box<panproto_schema::Protocol>),
58
59    /// A standalone expression (e.g., coercion, merge, default).
60    Expr(Box<panproto_expr::Expr>),
61
62    /// An edit log recording incremental edits against a schema.
63    EditLog(EditLogObject),
64
65    /// A GAT theory definition.
66    Theory(Box<panproto_gat::Theory>),
67
68    /// A structure-preserving map between two theories.
69    TheoryMorphism(Box<panproto_gat::TheoryMorphism>),
70
71    /// A CST complement for format-preserving round-trips.
72    ///
73    /// Stores the full tree-sitter CST Schema alongside a data set,
74    /// enabling byte-identical reconstruction of the original file
75    /// formatting after schema migration.
76    CstComplement(CstComplementObject),
77
78    /// A schema for a single file.
79    ///
80    /// Content-addressed independently so that unchanged files
81    /// deduplicate across commits. The project-level schema is
82    /// assembled by walking a tree of [`Self::FileSchema`] leaves
83    /// joined by [`Self::SchemaTree`] nodes; see [`crate::tree`].
84    FileSchema(Box<FileSchemaObject>),
85
86    /// A flat schema, stored only as a migration endpoint.
87    ///
88    /// [`Object::Migration`] references its source and target schemas
89    /// by [`ObjectId`]. Commits content-address their project schema
90    /// as an [`Object::SchemaTree`] whose leaves are
91    /// [`Object::FileSchema`]s, so a migration between two tree
92    /// roots cannot reuse that addressing: the migration morphism
93    /// speaks of a single flat vertex/edge space, not a tree. This
94    /// variant stores that flat form so that `gc::mark_reachable`
95    /// can find the referenced schema and migration composition has
96    /// an object to load from.
97    FlatSchema(Box<panproto_schema::Schema>),
98
99    /// A directory or project root in the schema Merkle tree.
100    ///
101    /// A sorted list of `(name, ObjectId)` entries where each
102    /// [`ObjectId`] points at either an [`Self::FileSchema`] leaf or
103    /// another [`Self::SchemaTree`]. Mirrors git's tree object
104    /// model: sibling entries are sorted lexicographically so the
105    /// resulting [`ObjectId`] is deterministic.
106    SchemaTree(Box<SchemaTreeObject>),
107}
108
109impl Object {
110    /// Returns the type name of this object (for error messages).
111    #[must_use]
112    pub const fn type_name(&self) -> &'static str {
113        match self {
114            Self::Migration { .. } => "migration",
115            Self::Commit(_) => "commit",
116            Self::Tag(_) => "tag",
117            Self::DataSet(_) => "dataset",
118            Self::Complement(_) => "complement",
119            Self::Protocol(_) => "protocol",
120            Self::Expr(_) => "expr",
121            Self::EditLog(_) => "editlog",
122            Self::Theory(_) => "theory",
123            Self::TheoryMorphism(_) => "theory_morphism",
124            Self::CstComplement(_) => "cst_complement",
125            Self::FileSchema(_) => "file_schema",
126            Self::SchemaTree(_) => "schema_tree",
127            Self::FlatSchema(_) => "flat_schema",
128        }
129    }
130}
131
132/// A per-file schema leaf in the project schema Merkle tree.
133///
134/// Carries the full parsed [`Schema`] for one file alongside the
135/// file's path and the protocol used to parse it. Stored as an
136/// [`Object::FileSchema`] so its [`ObjectId`] depends only on the
137/// per-file content.
138#[derive(Clone, Debug, Serialize, Deserialize)]
139pub struct FileSchemaObject {
140    /// Path of the file within the project, using forward slashes.
141    pub path: String,
142    /// Protocol used to parse this file (e.g., `"typescript"`,
143    /// `"raw_file"`).
144    pub protocol: String,
145    /// The per-file schema as produced by the protocol's parser.
146    pub schema: Schema,
147    /// Cross-file import edges rooted at a vertex in this file.
148    ///
149    /// Each edge carries vertex names already prefixed with the
150    /// owning file's project path (`<src_path>::<src_name>` and
151    /// `<tgt_path>::<tgt_name>`), so flat assembly can add them
152    /// without re-prefixing. Populated by
153    /// `panproto_project::ProjectBuilder::build_tree`; empty
154    /// otherwise.
155    #[serde(default, skip_serializing_if = "Vec::is_empty")]
156    pub cross_file_edges: Vec<panproto_schema::Edge>,
157}
158
159/// An entry in a [`SchemaTreeObject::Directory`] node.
160///
161/// Distinguishes leaf (`File`, pointing at a [`FileSchemaObject`])
162/// from inner (`Tree`, pointing at another [`SchemaTreeObject`]) so
163/// the tree walker does not need to re-fetch objects just to
164/// classify them.
165#[derive(Clone, Debug, Serialize, Deserialize)]
166pub enum SchemaTreeEntry {
167    /// A file-schema leaf named by its parent tree.
168    File(ObjectId),
169    /// A subtree named by its parent tree.
170    Tree(ObjectId),
171}
172
173/// An inner node of the project schema Merkle tree.
174///
175/// A [`SchemaTreeObject::Directory`] holds named entries sorted
176/// lexicographically by name so two directories with the same
177/// `(name, entry)` set hash to the same [`ObjectId`] regardless of
178/// how they were constructed. A [`SchemaTreeObject::SingleLeaf`] is a
179/// nameless wrapper over a single [`FileSchemaObject`], produced by
180/// [`crate::tree::store_schema_as_tree`] when a caller needs to store
181/// a flat [`panproto_schema::Schema`] under the tree-based commit
182/// model. Keeping the two shapes as distinct variants of the object
183/// itself (rather than a name slot alongside a `SingleLeaf` entry)
184/// means a peer cannot forge a `(name="x", SingleLeaf(id))` entry
185/// that walks identically to the canonical form but hashes
186/// differently.
187#[derive(Clone, Debug, Serialize, Deserialize)]
188pub enum SchemaTreeObject {
189    /// Nameless wrapper around a single file schema.
190    SingleLeaf {
191        /// [`ObjectId`] of the wrapped [`FileSchemaObject`].
192        file_schema_id: ObjectId,
193    },
194    /// Directory node holding named child entries.
195    Directory {
196        /// Child entries, canonically sorted lexicographically by name.
197        ///
198        /// Callers consuming a deserialized `Directory` should iterate
199        /// via [`SchemaTreeObject::sorted_entries`] to enforce the
200        /// canonical ordering regardless of how the bytes arrived on
201        /// the wire.
202        entries: Vec<(String, SchemaTreeEntry)>,
203    },
204}
205
206impl SchemaTreeObject {
207    /// Return a canonically ordered view of a directory's entries,
208    /// or an empty vector for a [`SchemaTreeObject::SingleLeaf`].
209    ///
210    /// Entries are sorted lexicographically by name. Use this anywhere
211    /// the flat-schema hash or walk order is observable, so a remote
212    /// cannot produce different flat hashes for the same tree
213    /// [`ObjectId`] by permuting wire-order.
214    #[must_use]
215    pub fn sorted_entries(&self) -> Vec<(&str, &SchemaTreeEntry)> {
216        match self {
217            Self::SingleLeaf { .. } => Vec::new(),
218            Self::Directory { entries } => {
219                let mut out: Vec<(&str, &SchemaTreeEntry)> =
220                    entries.iter().map(|(n, e)| (n.as_str(), e)).collect();
221                out.sort_by(|a, b| a.0.cmp(b.0));
222                out
223            }
224        }
225    }
226}
227
228/// A commit in the schema evolution DAG.
229///
230/// Commits form a DAG via parent pointers. A root commit has no parents,
231/// a normal commit has one parent, and a merge commit has two.
232#[derive(Clone, Debug, Serialize, Deserialize)]
233pub struct CommitObject {
234    /// Object ID of the schema at this commit.
235    ///
236    /// This always points at an [`Object::SchemaTree`] root whose
237    /// leaves are [`Object::FileSchema`] objects. Callers that need
238    /// a flat [`Schema`] should go through
239    /// [`resolve_commit_schema`](crate::resolve_commit_schema) (or
240    /// [`assemble_schema`](crate::assemble_schema) when they already
241    /// hold the tree id), which walks the tree and assembles the
242    /// project-coproduct schema.
243    pub schema_id: ObjectId,
244
245    /// Parent commit IDs (0 = root, 1 = normal, 2 = merge).
246    pub parents: Vec<ObjectId>,
247
248    /// Object ID of the migration from parent's schema to this commit's
249    /// schema. `None` for root commits.
250    pub migration_id: Option<ObjectId>,
251
252    /// The protocol this schema lineage tracks.
253    pub protocol: String,
254
255    /// Author identifier.
256    pub author: String,
257
258    /// Unix timestamp in seconds.
259    pub timestamp: u64,
260
261    /// Human-readable commit message.
262    pub message: String,
263
264    /// Renames detected or declared for this commit's migration.
265    pub renames: Vec<SiteRename>,
266
267    /// Object ID of the protocol definition at this commit.
268    pub protocol_id: Option<ObjectId>,
269
270    /// Object IDs of data sets tracked at this commit.
271    pub data_ids: Vec<ObjectId>,
272
273    /// Object IDs of complements from the migration at this commit.
274    pub complement_ids: Vec<ObjectId>,
275
276    /// Object IDs of edit logs for incremental migration at this commit.
277    pub edit_log_ids: Vec<ObjectId>,
278
279    /// Theory object IDs at this commit, keyed by theory name.
280    pub theory_ids: BTreeMap<String, ObjectId>,
281
282    /// Object IDs of CST complements for format-preserving round-trips.
283    #[serde(default, skip_serializing_if = "Vec::is_empty")]
284    pub cst_complement_ids: Vec<ObjectId>,
285}
286
287impl CommitObject {
288    /// Create a builder for `CommitObject` with required fields.
289    #[must_use]
290    pub fn builder(
291        schema_id: ObjectId,
292        protocol: impl Into<String>,
293        author: impl Into<String>,
294        message: impl Into<String>,
295    ) -> CommitObjectBuilder {
296        CommitObjectBuilder {
297            schema_id,
298            parents: Vec::new(),
299            migration_id: None,
300            protocol: protocol.into(),
301            author: author.into(),
302            timestamp: std::time::SystemTime::now()
303                .duration_since(std::time::UNIX_EPOCH)
304                .unwrap_or_default()
305                .as_secs(),
306            message: message.into(),
307            renames: Vec::new(),
308            protocol_id: None,
309            data_ids: Vec::new(),
310            complement_ids: Vec::new(),
311            edit_log_ids: Vec::new(),
312            theory_ids: BTreeMap::new(),
313            cst_complement_ids: Vec::new(),
314        }
315    }
316}
317
318/// Builder for [`CommitObject`] with sensible defaults for optional fields.
319pub struct CommitObjectBuilder {
320    schema_id: ObjectId,
321    parents: Vec<ObjectId>,
322    migration_id: Option<ObjectId>,
323    protocol: String,
324    author: String,
325    timestamp: u64,
326    message: String,
327    renames: Vec<SiteRename>,
328    protocol_id: Option<ObjectId>,
329    data_ids: Vec<ObjectId>,
330    complement_ids: Vec<ObjectId>,
331    edit_log_ids: Vec<ObjectId>,
332    theory_ids: BTreeMap<String, ObjectId>,
333    cst_complement_ids: Vec<ObjectId>,
334}
335
336impl CommitObjectBuilder {
337    /// Set the parent commit IDs.
338    #[must_use]
339    pub fn parents(mut self, parents: Vec<ObjectId>) -> Self {
340        self.parents = parents;
341        self
342    }
343
344    /// Set the migration object ID.
345    #[must_use]
346    pub const fn migration_id(mut self, id: ObjectId) -> Self {
347        self.migration_id = Some(id);
348        self
349    }
350
351    /// Set the unix timestamp (seconds).
352    #[must_use]
353    pub const fn timestamp(mut self, ts: u64) -> Self {
354        self.timestamp = ts;
355        self
356    }
357
358    /// Set the detected/declared renames.
359    #[must_use]
360    pub fn renames(mut self, renames: Vec<SiteRename>) -> Self {
361        self.renames = renames;
362        self
363    }
364
365    /// Set the protocol definition object ID.
366    #[must_use]
367    pub const fn protocol_id(mut self, id: ObjectId) -> Self {
368        self.protocol_id = Some(id);
369        self
370    }
371
372    /// Set the data set object IDs.
373    #[must_use]
374    pub fn data_ids(mut self, ids: Vec<ObjectId>) -> Self {
375        self.data_ids = ids;
376        self
377    }
378
379    /// Set the complement object IDs.
380    #[must_use]
381    pub fn complement_ids(mut self, ids: Vec<ObjectId>) -> Self {
382        self.complement_ids = ids;
383        self
384    }
385
386    /// Set the edit log object IDs.
387    #[must_use]
388    pub fn edit_log_ids(mut self, ids: Vec<ObjectId>) -> Self {
389        self.edit_log_ids = ids;
390        self
391    }
392
393    /// Set the theory object IDs.
394    #[must_use]
395    pub fn theory_ids(mut self, ids: BTreeMap<String, ObjectId>) -> Self {
396        self.theory_ids = ids;
397        self
398    }
399
400    /// Build the [`CommitObject`].
401    #[must_use]
402    pub fn build(self) -> CommitObject {
403        CommitObject {
404            schema_id: self.schema_id,
405            parents: self.parents,
406            migration_id: self.migration_id,
407            protocol: self.protocol,
408            author: self.author,
409            timestamp: self.timestamp,
410            message: self.message,
411            renames: self.renames,
412            protocol_id: self.protocol_id,
413            data_ids: self.data_ids,
414            complement_ids: self.complement_ids,
415            edit_log_ids: self.edit_log_ids,
416            theory_ids: self.theory_ids,
417            cst_complement_ids: self.cst_complement_ids,
418        }
419    }
420
421    /// Set the CST complement IDs.
422    #[must_use]
423    pub fn cst_complement_ids(mut self, ids: Vec<ObjectId>) -> Self {
424        self.cst_complement_ids = ids;
425        self
426    }
427}
428
429/// A data snapshot stored in the VCS.
430#[derive(Clone, Debug, Serialize, Deserialize)]
431pub struct DataSetObject {
432    /// Which schema this data conforms to.
433    pub schema_id: ObjectId,
434    /// MessagePack-encoded instance data.
435    pub data: Vec<u8>,
436    /// Number of records.
437    pub record_count: u64,
438}
439
440/// A complement from data migration, enabling backward migration.
441#[derive(Clone, Debug, Serialize, Deserialize)]
442pub struct ComplementObject {
443    /// The migration that produced this complement.
444    pub migration_id: ObjectId,
445    /// The data set this complement was computed from.
446    pub data_id: ObjectId,
447    /// MessagePack-encoded Complement data.
448    pub complement: Vec<u8>,
449}
450
451/// A CST complement for format-preserving round-trips.
452///
453/// Stores the tree-sitter CST Schema (which includes all formatting
454/// information as constraints) alongside a data set, enabling
455/// `emit_from_schema` to reconstruct the original file formatting
456/// after schema migration.
457#[derive(Clone, Debug, Serialize, Deserialize)]
458pub struct CstComplementObject {
459    /// The data set this CST complement was captured from.
460    pub data_id: ObjectId,
461    /// MessagePack-encoded `CstComplement` (from `panproto_io::cst_extract`).
462    pub cst_complement: Vec<u8>,
463}
464
465/// An edit log: a sequence of edits applied to a data set.
466///
467/// Edit logs are content-addressed by hashing the sequence of edits.
468/// Two edit logs with the same edits hash to the same object, enabling
469/// deduplication.
470#[derive(Clone, Debug, Serialize, Deserialize)]
471pub struct EditLogObject {
472    /// The schema these edits apply to.
473    pub schema_id: ObjectId,
474    /// The data set these edits were applied to.
475    pub data_id: ObjectId,
476    /// MessagePack-encoded `Vec<TreeEdit>`.
477    pub edits: Vec<u8>,
478    /// Number of edits in the log.
479    pub edit_count: u64,
480    /// Object ID of the complement state after all edits.
481    pub final_complement: ObjectId,
482}
483
484/// An annotated tag object.
485///
486/// Unlike lightweight tags (which are just refs pointing directly at a
487/// commit), annotated tags are stored as objects in the store and carry
488/// metadata: tagger, timestamp, and message.
489#[derive(Clone, Debug, Serialize, Deserialize)]
490pub struct TagObject {
491    /// Object ID of the tagged object (usually a commit).
492    pub target: ObjectId,
493
494    /// Who created the tag.
495    pub tagger: String,
496
497    /// Unix timestamp in seconds.
498    pub timestamp: u64,
499
500    /// Tag message.
501    pub message: String,
502}
503
504#[cfg(test)]
505mod tests {
506    use super::*;
507
508    #[test]
509    fn dataset_round_trip() -> Result<(), Box<dyn std::error::Error>> {
510        let ds = DataSetObject {
511            schema_id: ObjectId::ZERO,
512            data: vec![1, 2, 3, 4],
513            record_count: 42,
514        };
515        let bytes = rmp_serde::to_vec(&ds)?;
516        let ds2: DataSetObject = rmp_serde::from_slice(&bytes)?;
517        assert_eq!(ds.schema_id, ds2.schema_id);
518        assert_eq!(ds.data, ds2.data);
519        assert_eq!(ds.record_count, ds2.record_count);
520        Ok(())
521    }
522
523    #[test]
524    fn complement_round_trip() -> Result<(), Box<dyn std::error::Error>> {
525        let comp = ComplementObject {
526            migration_id: ObjectId::from_bytes([1; 32]),
527            data_id: ObjectId::from_bytes([2; 32]),
528            complement: vec![10, 20, 30],
529        };
530        let bytes = rmp_serde::to_vec(&comp)?;
531        let comp2: ComplementObject = rmp_serde::from_slice(&bytes)?;
532        assert_eq!(comp.migration_id, comp2.migration_id);
533        assert_eq!(comp.data_id, comp2.data_id);
534        assert_eq!(comp.complement, comp2.complement);
535        Ok(())
536    }
537
538    #[test]
539    fn edit_log_round_trip() -> Result<(), Box<dyn std::error::Error>> {
540        let el = EditLogObject {
541            schema_id: ObjectId::from_bytes([1; 32]),
542            data_id: ObjectId::from_bytes([2; 32]),
543            edits: vec![42, 43, 44],
544            edit_count: 3,
545            final_complement: ObjectId::from_bytes([3; 32]),
546        };
547        let bytes = rmp_serde::to_vec(&el)?;
548        let el2: EditLogObject = rmp_serde::from_slice(&bytes)?;
549        assert_eq!(el.schema_id, el2.schema_id);
550        assert_eq!(el.data_id, el2.data_id);
551        assert_eq!(el.edits, el2.edits);
552        assert_eq!(el.edit_count, el2.edit_count);
553        assert_eq!(el.final_complement, el2.final_complement);
554        Ok(())
555    }
556
557    #[test]
558    fn commit_with_edit_logs() -> Result<(), Box<dyn std::error::Error>> {
559        let commit = CommitObject::builder(ObjectId::ZERO, "test", "test", "test")
560            .timestamp(0)
561            .edit_log_ids(vec![
562                ObjectId::from_bytes([10; 32]),
563                ObjectId::from_bytes([11; 32]),
564            ])
565            .build();
566        let bytes = rmp_serde::to_vec(&commit)?;
567        let commit2: CommitObject = rmp_serde::from_slice(&bytes)?;
568        assert_eq!(commit.edit_log_ids, commit2.edit_log_ids);
569        Ok(())
570    }
571
572    #[test]
573    fn commit_with_theory_ids() -> Result<(), Box<dyn std::error::Error>> {
574        let mut theories = BTreeMap::new();
575        theories.insert("ThGraph".to_owned(), ObjectId::from_bytes([5; 32]));
576        let commit = CommitObject::builder(ObjectId::ZERO, "test", "test", "test")
577            .timestamp(0)
578            .theory_ids(theories)
579            .build();
580        let bytes = rmp_serde::to_vec(&commit)?;
581        let commit2: CommitObject = rmp_serde::from_slice(&bytes)?;
582        assert_eq!(commit.theory_ids, commit2.theory_ids);
583        assert_eq!(
584            commit2.theory_ids.get("ThGraph"),
585            Some(&ObjectId::from_bytes([5; 32]))
586        );
587        Ok(())
588    }
589
590    #[test]
591    fn flat_schema_round_trip_through_serde() -> Result<(), Box<dyn std::error::Error>> {
592        use panproto_gat::Name;
593        use panproto_schema::{Schema, Vertex};
594        use std::collections::HashMap;
595
596        let mut vertices = HashMap::new();
597        vertices.insert(
598            Name::from("root"),
599            Vertex {
600                id: Name::from("root"),
601                kind: Name::from("object"),
602                nsid: None,
603            },
604        );
605        let schema = Schema {
606            protocol: "flat-proto".into(),
607            vertices,
608            edges: HashMap::new(),
609            hyper_edges: HashMap::new(),
610            constraints: HashMap::new(),
611            required: HashMap::new(),
612            nsids: HashMap::new(),
613            entries: Vec::new(),
614            variants: HashMap::new(),
615            orderings: HashMap::new(),
616            recursion_points: HashMap::new(),
617            spans: HashMap::new(),
618            usage_modes: HashMap::new(),
619            nominal: HashMap::new(),
620            coercions: HashMap::new(),
621            mergers: HashMap::new(),
622            defaults: HashMap::new(),
623            policies: HashMap::new(),
624            outgoing: HashMap::new(),
625            incoming: HashMap::new(),
626            between: HashMap::new(),
627        };
628        let obj = Object::FlatSchema(Box::new(schema.clone()));
629        let bytes = rmp_serde::to_vec(&obj)?;
630        let obj2: Object = rmp_serde::from_slice(&bytes)?;
631        match obj2 {
632            Object::FlatSchema(s) => {
633                assert_eq!(s.protocol, schema.protocol);
634                assert_eq!(s.vertices.len(), 1);
635            }
636            other => panic!("expected FlatSchema, got {}", other.type_name()),
637        }
638        Ok(())
639    }
640
641    #[test]
642    fn single_leaf_and_directory_hash_distinctly() -> Result<(), Box<dyn std::error::Error>> {
643        use crate::hash::hash_schema_tree;
644
645        let leaf_id = ObjectId::from_bytes([9; 32]);
646
647        let single = SchemaTreeObject::SingleLeaf {
648            file_schema_id: leaf_id,
649        };
650        // A Directory with a single `File` entry carrying the same
651        // leaf id: prior to the typed variants, the two forms could
652        // have collapsed into ambiguous shapes.
653        let dir = SchemaTreeObject::Directory {
654            entries: vec![("only".to_owned(), SchemaTreeEntry::File(leaf_id))],
655        };
656
657        let h_single = hash_schema_tree(&single)?;
658        let h_dir = hash_schema_tree(&dir)?;
659        assert_ne!(
660            h_single, h_dir,
661            "SingleLeaf and Directory must hash to distinct ObjectIds"
662        );
663        Ok(())
664    }
665
666    #[test]
667    fn commit_backward_compat_no_theory_ids() -> Result<(), Box<dyn std::error::Error>> {
668        // Simulate a commit serialized before theory_ids existed.
669        let commit_old = CommitObject::builder(ObjectId::ZERO, "test", "test", "test")
670            .timestamp(0)
671            .build();
672        let bytes = rmp_serde::to_vec(&commit_old)?;
673        let commit2: CommitObject = rmp_serde::from_slice(&bytes)?;
674        assert!(commit2.theory_ids.is_empty());
675        Ok(())
676    }
677}