Skip to main content

clayers_repo/
object.rs

1//! Object model for the content-addressed Merkle DAG.
2//!
3//! All objects are content-addressed: identity = `SHA-256(ExclusiveC14N(xml_representation))`.
4//! Content objects represent XML Infoset nodes. Versioning objects (commits,
5//! tags, documents) are XML elements in `urn:clayers:repository`.
6
7use chrono::{DateTime, Utc};
8use clayers_xml::ContentHash;
9use xot::Xot;
10
11/// The `urn:clayers:repository` namespace URI.
12pub const REPO_NS: &str = "urn:clayers:repository";
13
14/// An XML attribute with canonical namespace URI (not prefix).
15#[derive(Debug, Clone, PartialEq, Eq)]
16#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
17pub struct Attribute {
18    /// The attribute's local name.
19    pub local_name: String,
20    /// The attribute's namespace URI, if any.
21    pub namespace_uri: Option<String>,
22    /// The namespace prefix used in the original XML (e.g. "app" for `app:id`).
23    #[cfg_attr(feature = "serde", serde(default))]
24    pub namespace_prefix: Option<String>,
25    /// The attribute value.
26    pub value: String,
27}
28
29/// A person (commit author or tag tagger).
30#[derive(Debug, Clone, PartialEq, Eq)]
31#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
32pub struct Author {
33    /// Display name.
34    pub name: String,
35    /// Email address.
36    pub email: String,
37}
38
39/// An element node in the Merkle DAG.
40#[derive(Debug, Clone, PartialEq, Eq)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub struct ElementObject {
43    /// The element's local name.
44    pub local_name: String,
45    /// The element's namespace URI, if any.
46    pub namespace_uri: Option<String>,
47    /// The namespace prefix used in the original XML (e.g. "app" for `<app:item>`).
48    #[cfg_attr(feature = "serde", serde(default))]
49    pub namespace_prefix: Option<String>,
50    /// Extra namespace declarations on this element for descendant convenience
51    /// (prefix, URI pairs not used by this element itself).
52    #[cfg_attr(feature = "serde", serde(default))]
53    pub extra_namespaces: Vec<(String, String)>,
54    /// Attributes in canonical order.
55    pub attributes: Vec<Attribute>,
56    /// Ordered child object hashes for graph traversal.
57    pub children: Vec<ContentHash>,
58    /// Inclusive C14N hash, indexed for drift detection compatibility.
59    pub inclusive_hash: ContentHash,
60}
61
62/// A text node (character data).
63#[derive(Debug, Clone, PartialEq, Eq)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65pub struct TextObject {
66    /// The character data content.
67    pub content: String,
68}
69
70/// A comment node.
71#[derive(Debug, Clone, PartialEq, Eq)]
72#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73pub struct CommentObject {
74    /// The comment text.
75    pub content: String,
76}
77
78/// A processing instruction node.
79#[derive(Debug, Clone, PartialEq, Eq)]
80#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
81pub struct PIObject {
82    /// The PI target.
83    pub target: String,
84    /// The PI data (optional).
85    pub data: Option<String>,
86}
87
88/// A document object pointing to a root element.
89#[derive(Debug, Clone, PartialEq, Eq)]
90#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
91pub struct DocumentObject {
92    /// Hash of the root element object.
93    pub root: ContentHash,
94    /// Hashes of document-level children before the root element
95    /// (comments, processing instructions). Preserves prologues.
96    #[cfg_attr(feature = "serde", serde(default))]
97    pub prologue: Vec<ContentHash>,
98}
99
100/// An entry in a tree object, mapping a file path to a document hash.
101#[derive(Debug, Clone, PartialEq, Eq)]
102#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
103pub struct TreeEntry {
104    /// File path (e.g., "overview.xml").
105    pub path: String,
106    /// Hash of the `DocumentObject` for this file.
107    pub document: ContentHash,
108}
109
110/// A tree object mapping file paths to document hashes (like git's tree).
111#[derive(Debug, Clone, PartialEq, Eq)]
112#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
113pub struct TreeObject {
114    /// Entries sorted by path for deterministic hashing.
115    pub entries: Vec<TreeEntry>,
116}
117
118impl TreeObject {
119    /// Create a new tree with entries sorted by path.
120    #[must_use]
121    pub fn new(mut entries: Vec<TreeEntry>) -> Self {
122        entries.sort_by(|a, b| a.path.cmp(&b.path));
123        Self { entries }
124    }
125
126    /// Look up a document hash by path.
127    #[must_use]
128    pub fn get(&self, path: &str) -> Option<&TreeEntry> {
129        self.entries.iter().find(|e| e.path == path)
130    }
131
132    /// Return sorted list of all paths in the tree.
133    #[must_use]
134    pub fn paths(&self) -> Vec<&str> {
135        self.entries.iter().map(|e| e.path.as_str()).collect()
136    }
137
138    /// Serialize to XML in `urn:clayers:repository` namespace.
139    #[must_use]
140    #[allow(clippy::missing_panics_doc)]
141    pub fn to_xml(&self) -> String {
142        let mut xot = Xot::new();
143        let ns = xot.add_namespace(REPO_NS);
144        let prefix = xot.add_prefix("repo");
145        let tree_name = xot.add_name_ns("tree", ns);
146        let entry_name = xot.add_name_ns("entry", ns);
147        let path_attr = xot.add_name("path");
148
149        let tree_el = xot.new_element(tree_name);
150        xot.namespaces_mut(tree_el).insert(prefix, ns);
151
152        for entry in &self.entries {
153            let entry_el = xot.new_element(entry_name);
154            xot.attributes_mut(entry_el)
155                .insert(path_attr, entry.path.clone());
156            let text = xot.new_text(&entry.document.to_string());
157            xot.append(entry_el, text).expect("append text");
158            xot.append(tree_el, entry_el).expect("append entry");
159        }
160
161        xot.to_string(tree_el).expect("serialize tree")
162    }
163}
164
165/// A commit object with parent references.
166#[derive(Debug, Clone, PartialEq, Eq)]
167#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
168pub struct CommitObject {
169    /// Hash of the tree object this commit snapshots.
170    pub tree: ContentHash,
171    /// Parent commit hashes (empty for initial commit, 2+ for multi-parent commits).
172    pub parents: Vec<ContentHash>,
173    /// The commit author.
174    pub author: Author,
175    /// Commit timestamp.
176    pub timestamp: DateTime<Utc>,
177    /// Commit message.
178    pub message: String,
179}
180
181/// An annotated tag object.
182#[derive(Debug, Clone, PartialEq, Eq)]
183#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
184pub struct TagObject {
185    /// Hash of the tagged object (usually a commit).
186    pub target: ContentHash,
187    /// Tag name.
188    pub name: String,
189    /// The tagger.
190    pub tagger: Author,
191    /// Tag timestamp.
192    pub timestamp: DateTime<Utc>,
193    /// Tag message.
194    pub message: String,
195}
196
197/// A content-addressed object in the Merkle DAG.
198#[derive(Debug, Clone, PartialEq, Eq)]
199#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
200pub enum Object {
201    /// An XML element with its subtree.
202    Element(ElementObject),
203    /// Character data.
204    Text(TextObject),
205    /// An XML comment.
206    Comment(CommentObject),
207    /// A processing instruction.
208    PI(PIObject),
209    /// A document root pointer.
210    Document(DocumentObject),
211    /// A tree mapping file paths to documents.
212    Tree(TreeObject),
213    /// A commit (versioning).
214    Commit(CommitObject),
215    /// An annotated tag (versioning).
216    Tag(TagObject),
217}
218
219impl DocumentObject {
220    /// Serialize to XML in `urn:clayers:repository` namespace.
221    #[must_use]
222    #[allow(clippy::missing_panics_doc)]
223    pub fn to_xml(&self) -> String {
224        let mut xot = Xot::new();
225        let ns = xot.add_namespace(REPO_NS);
226        let prefix = xot.add_prefix("repo");
227        let doc_name = xot.add_name_ns("document", ns);
228        let root_name = xot.add_name_ns("root", ns);
229        let prologue_name = xot.add_name_ns("prologue", ns);
230        let version_attr = xot.add_name("version");
231        let encoding_attr = xot.add_name("encoding");
232
233        let doc_el = xot.new_element(doc_name);
234        xot.namespaces_mut(doc_el).insert(prefix, ns);
235        xot.attributes_mut(doc_el)
236            .insert(encoding_attr, "UTF-8".into());
237        xot.attributes_mut(doc_el)
238            .insert(version_attr, "1.0".into());
239
240        let root_el = xot.new_element(root_name);
241        let root_text = xot.new_text(&self.root.to_string());
242        xot.append(root_el, root_text).expect("append text");
243        xot.append(doc_el, root_el).expect("append root");
244
245        for h in &self.prologue {
246            let prologue_el = xot.new_element(prologue_name);
247            let text = xot.new_text(&h.to_string());
248            xot.append(prologue_el, text).expect("append text");
249            xot.append(doc_el, prologue_el).expect("append prologue");
250        }
251
252        xot.to_string(doc_el).expect("serialize document")
253    }
254}
255
256impl CommitObject {
257    /// Serialize to XML in `urn:clayers:repository` namespace.
258    #[must_use]
259    #[allow(clippy::missing_panics_doc)]
260    pub fn to_xml(&self) -> String {
261        let mut xot = Xot::new();
262        let ns = xot.add_namespace(REPO_NS);
263        let prefix = xot.add_prefix("repo");
264        let commit_name = xot.add_name_ns("commit", ns);
265        let tree_name = xot.add_name_ns("tree", ns);
266        let parent_name = xot.add_name_ns("parent", ns);
267        let author_name = xot.add_name_ns("author", ns);
268        let timestamp_name = xot.add_name_ns("timestamp", ns);
269        let message_name = xot.add_name_ns("message", ns);
270        let name_attr = xot.add_name("name");
271        let email_attr = xot.add_name("email");
272
273        let commit_el = xot.new_element(commit_name);
274        xot.namespaces_mut(commit_el).insert(prefix, ns);
275
276        // <repo:tree>
277        let tree_el = xot.new_element(tree_name);
278        let text = xot.new_text(&self.tree.to_string());
279        xot.append(tree_el, text).expect("append text");
280        xot.append(commit_el, tree_el).expect("append tree");
281
282        // <repo:parent>
283        for p in &self.parents {
284            let parent_el = xot.new_element(parent_name);
285            let text = xot.new_text(&p.to_string());
286            xot.append(parent_el, text).expect("append text");
287            xot.append(commit_el, parent_el).expect("append parent");
288        }
289
290        // <repo:author name="..." email="..."/>
291        let author_el = xot.new_element(author_name);
292        xot.attributes_mut(author_el)
293            .insert(email_attr, self.author.email.clone());
294        xot.attributes_mut(author_el)
295            .insert(name_attr, self.author.name.clone());
296        xot.append(commit_el, author_el).expect("append author");
297
298        // <repo:timestamp>
299        let ts_el = xot.new_element(timestamp_name);
300        let ts_text = xot.new_text(&self.timestamp.format("%Y-%m-%dT%H:%M:%SZ").to_string());
301        xot.append(ts_el, ts_text).expect("append text");
302        xot.append(commit_el, ts_el).expect("append timestamp");
303
304        // <repo:message>
305        let msg_el = xot.new_element(message_name);
306        let msg_text = xot.new_text(&self.message);
307        xot.append(msg_el, msg_text).expect("append text");
308        xot.append(commit_el, msg_el).expect("append message");
309
310        xot.to_string(commit_el).expect("serialize commit")
311    }
312}
313
314impl TagObject {
315    /// Serialize to XML in `urn:clayers:repository` namespace.
316    #[must_use]
317    #[allow(clippy::missing_panics_doc)]
318    pub fn to_xml(&self) -> String {
319        let mut xot = Xot::new();
320        let ns = xot.add_namespace(REPO_NS);
321        let prefix = xot.add_prefix("repo");
322        let tag_name = xot.add_name_ns("tag", ns);
323        let target_name = xot.add_name_ns("target", ns);
324        let name_name = xot.add_name_ns("name", ns);
325        let tagger_name = xot.add_name_ns("tagger", ns);
326        let timestamp_name = xot.add_name_ns("timestamp", ns);
327        let message_name = xot.add_name_ns("message", ns);
328        let name_attr = xot.add_name("name");
329        let email_attr = xot.add_name("email");
330
331        let tag_el = xot.new_element(tag_name);
332        xot.namespaces_mut(tag_el).insert(prefix, ns);
333
334        // <repo:target>
335        let target_el = xot.new_element(target_name);
336        let text = xot.new_text(&self.target.to_string());
337        xot.append(target_el, text).expect("append text");
338        xot.append(tag_el, target_el).expect("append target");
339
340        // <repo:name>
341        let name_el = xot.new_element(name_name);
342        let name_text = xot.new_text(&self.name);
343        xot.append(name_el, name_text).expect("append text");
344        xot.append(tag_el, name_el).expect("append name");
345
346        // <repo:tagger name="..." email="..."/>
347        let tagger_el = xot.new_element(tagger_name);
348        xot.attributes_mut(tagger_el)
349            .insert(email_attr, self.tagger.email.clone());
350        xot.attributes_mut(tagger_el)
351            .insert(name_attr, self.tagger.name.clone());
352        xot.append(tag_el, tagger_el).expect("append tagger");
353
354        // <repo:timestamp>
355        let ts_el = xot.new_element(timestamp_name);
356        let ts_text = xot.new_text(&self.timestamp.format("%Y-%m-%dT%H:%M:%SZ").to_string());
357        xot.append(ts_el, ts_text).expect("append text");
358        xot.append(tag_el, ts_el).expect("append timestamp");
359
360        // <repo:message>
361        let msg_el = xot.new_element(message_name);
362        let msg_text = xot.new_text(&self.message);
363        xot.append(msg_el, msg_text).expect("append text");
364        xot.append(tag_el, msg_el).expect("append message");
365
366        xot.to_string(tag_el).expect("serialize tag")
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn document_to_xml_contains_root_hash() {
376        let hash = ContentHash::from_canonical(b"test");
377        let doc = DocumentObject { root: hash, prologue: vec![] };
378        let xml = doc.to_xml();
379        assert!(xml.contains(&hash.to_string()));
380        assert!(xml.contains(REPO_NS));
381    }
382
383    #[test]
384    fn commit_to_xml_contains_all_fields() {
385        let hash = ContentHash::from_canonical(b"test");
386        let commit = CommitObject {
387            tree: hash,
388            parents: vec![hash],
389            author: Author {
390                name: "Alice".into(),
391                email: "alice@example.com".into(),
392            },
393            timestamp: DateTime::parse_from_rfc3339("2026-03-17T10:30:00Z")
394                .expect("valid timestamp")
395                .to_utc(),
396            message: "Test commit".into(),
397        };
398        let xml = commit.to_xml();
399        assert!(xml.contains("repo:commit"));
400        assert!(xml.contains("repo:tree"));
401        assert!(xml.contains("repo:parent"));
402        assert!(xml.contains("Alice"));
403        assert!(xml.contains("Test commit"));
404    }
405
406    #[test]
407    fn tree_sorts_entries() {
408        let h1 = ContentHash::from_canonical(b"doc1");
409        let h2 = ContentHash::from_canonical(b"doc2");
410        let tree = TreeObject::new(vec![
411            TreeEntry { path: "z.xml".into(), document: h1 },
412            TreeEntry { path: "a.xml".into(), document: h2 },
413        ]);
414        assert_eq!(tree.entries[0].path, "a.xml");
415        assert_eq!(tree.entries[1].path, "z.xml");
416    }
417
418    #[test]
419    fn tree_get_by_path() {
420        let h1 = ContentHash::from_canonical(b"doc1");
421        let tree = TreeObject::new(vec![
422            TreeEntry { path: "file.xml".into(), document: h1 },
423        ]);
424        assert!(tree.get("file.xml").is_some());
425        assert_eq!(tree.get("file.xml").unwrap().document, h1);
426    }
427
428    #[test]
429    fn tree_get_missing() {
430        let tree = TreeObject::new(vec![]);
431        assert!(tree.get("nonexistent.xml").is_none());
432    }
433
434    #[test]
435    fn tree_to_xml_deterministic() {
436        let h1 = ContentHash::from_canonical(b"doc1");
437        let h2 = ContentHash::from_canonical(b"doc2");
438        let tree1 = TreeObject::new(vec![
439            TreeEntry { path: "z.xml".into(), document: h1 },
440            TreeEntry { path: "a.xml".into(), document: h2 },
441        ]);
442        let tree2 = TreeObject::new(vec![
443            TreeEntry { path: "a.xml".into(), document: h2 },
444            TreeEntry { path: "z.xml".into(), document: h1 },
445        ]);
446        assert_eq!(tree1.to_xml(), tree2.to_xml());
447    }
448
449    #[test]
450    fn tree_to_xml_empty() {
451        let tree = TreeObject::new(vec![]);
452        let xml = tree.to_xml();
453        assert!(xml.contains("repo:tree"));
454        assert!(!xml.contains("repo:entry"));
455    }
456
457    #[test]
458    fn tree_to_xml_contains_entries() {
459        let h = ContentHash::from_canonical(b"doc1");
460        let tree = TreeObject::new(vec![
461            TreeEntry { path: "file.xml".into(), document: h },
462        ]);
463        let xml = tree.to_xml();
464        assert!(xml.contains("repo:entry"));
465        assert!(xml.contains("path=\"file.xml\""));
466        assert!(xml.contains(&h.to_string()));
467    }
468
469    #[test]
470    fn tree_paths() {
471        let h = ContentHash::from_canonical(b"doc1");
472        let tree = TreeObject::new(vec![
473            TreeEntry { path: "c.xml".into(), document: h },
474            TreeEntry { path: "a.xml".into(), document: h },
475            TreeEntry { path: "b.xml".into(), document: h },
476        ]);
477        assert_eq!(tree.paths(), vec!["a.xml", "b.xml", "c.xml"]);
478    }
479
480    #[test]
481    fn tag_to_xml_contains_all_fields() {
482        let hash = ContentHash::from_canonical(b"test");
483        let tag = TagObject {
484            target: hash,
485            name: "v1.0".into(),
486            tagger: Author {
487                name: "Bob".into(),
488                email: "bob@example.com".into(),
489            },
490            timestamp: DateTime::parse_from_rfc3339("2026-03-17T10:30:00Z")
491                .expect("valid timestamp")
492                .to_utc(),
493            message: "Release v1.0".into(),
494        };
495        let xml = tag.to_xml();
496        assert!(xml.contains("repo:tag"));
497        assert!(xml.contains("v1.0"));
498        assert!(xml.contains("Bob"));
499    }
500
501    // -----------------------------------------------------------------------
502    // Property-based tests (Group D)
503    // -----------------------------------------------------------------------
504    use proptest::prelude::*;
505
506    proptest! {
507        #![proptest_config(ProptestConfig::with_cases(256))]
508
509        /// D1: Sort determinism - any permutation of the same entries produces the same to_xml().
510        #[test]
511        fn prop_tree_sort_determinism(
512            entries in prop::collection::hash_map(
513                "[a-z]{1,8}\\.xml",
514                crate::store::prop_strategies::arb_content_hash(),
515                2..=10,
516            )
517        ) {
518            let tree_entries: Vec<TreeEntry> = entries.iter()
519                .map(|(path, hash)| TreeEntry { path: path.clone(), document: *hash })
520                .collect();
521            let tree1 = TreeObject::new(tree_entries.clone());
522
523            let mut reversed = tree_entries;
524            reversed.reverse();
525            let tree2 = TreeObject::new(reversed);
526
527            prop_assert_eq!(tree1.to_xml(), tree2.to_xml());
528        }
529
530        /// D2: Build tree hash determinism - shuffled entries give same hash via Repo::build_tree.
531        #[test]
532        fn prop_build_tree_hash_determinism(
533            entries in prop::collection::hash_map(
534                "[a-z]{1,8}\\.xml",
535                crate::store::prop_strategies::arb_content_hash(),
536                2..=10,
537            )
538        ) {
539            let rt = crate::store::prop_strategies::runtime();
540            rt.block_on(async {
541                let store = crate::store::memory::MemoryStore::new();
542                let repo = crate::repo::Repo::init(store);
543
544                let forward: Vec<(String, ContentHash)> = entries.iter()
545                    .map(|(p, h)| (p.clone(), *h))
546                    .collect();
547                let mut backward = forward.clone();
548                backward.reverse();
549
550                let h1 = repo.build_tree(forward).await.unwrap();
551                let h2 = repo.build_tree(backward).await.unwrap();
552                prop_assert_eq!(h1, h2, "shuffled entries should produce same tree hash");
553                Ok(())
554            })?;
555        }
556    }
557}