Skip to main content

binoc_sdk/
ir.rs

1use serde::{Deserialize, Serialize};
2use std::collections::{BTreeMap, BTreeSet};
3
4use crate::types::{ArtifactDescriptor, ItemPair};
5
6/// A node in the diff tree — the central data structure of the system.
7/// Every comparator emits it, every transformer rewrites it, and serializers
8/// or bindings read it.
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct DiffNode {
11    /// Open enum: "add", "remove", "modify", "move", "reorder",
12    /// "schema_change", etc. Plugins may define new actions.
13    pub action: String,
14
15    /// Open string: "directory", "file", "tabular", "zip_archive", etc.
16    /// No built-in types — conventions, not enforcement.
17    pub item_type: String,
18
19    /// Location within snapshot (logical path, including interior paths
20    /// like "archive.zip/data/file.csv").
21    pub path: String,
22
23    /// For moves/renames: the original path.
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub source_path: Option<String>,
26
27    /// Optional human-readable one-liner describing the change.
28    /// Set by comparator or transformer; used by renderers for narrative rendering.
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub summary: Option<String>,
31
32    /// Open bag of semantic tags, namespaced by convention.
33    /// e.g. "binoc.column-reorder", "biobinoc.gap-change"
34    #[serde(default, skip_serializing_if = "BTreeSet::is_empty")]
35    pub tags: BTreeSet<String>,
36
37    /// Child diff nodes forming the tree structure.
38    #[serde(default, skip_serializing_if = "Vec::is_empty")]
39    pub children: Vec<DiffNode>,
40
41    /// Comparator-specific payload, schema determined by item_type convention.
42    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
43    pub details: BTreeMap<String, serde_json::Value>,
44
45    /// Transformer-added metadata.
46    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
47    pub annotations: BTreeMap<String, serde_json::Value>,
48
49    /// Which comparator produced this node (provenance for extract chain).
50    #[serde(default, skip_serializing_if = "Option::is_none")]
51    pub comparator: Option<String>,
52
53    /// Transformers that modified this node, in order (provenance for extract chain).
54    #[serde(default, skip_serializing_if = "Vec::is_empty")]
55    pub transformed_by: Vec<String>,
56
57    /// The original item pair that produced this node. Transient — available
58    /// during the live diff/transform session for transformers and extractors
59    /// that need to re-read source data. Not serialized into changeset JSON.
60    #[serde(skip)]
61    pub source_items: Option<ItemPair>,
62
63    /// Published artifacts for this node. Transient session data — not
64    /// serialized into changeset JSON. Keyed by `(subject, format_id)`.
65    #[serde(skip)]
66    pub artifacts: Vec<ArtifactDescriptor>,
67}
68
69impl DiffNode {
70    pub fn new(
71        action: impl Into<String>,
72        item_type: impl Into<String>,
73        path: impl Into<String>,
74    ) -> Self {
75        Self {
76            action: action.into(),
77            item_type: item_type.into(),
78            path: path.into(),
79            source_path: None,
80            summary: None,
81            tags: BTreeSet::new(),
82            children: Vec::new(),
83            details: BTreeMap::new(),
84            annotations: BTreeMap::new(),
85            comparator: None,
86            transformed_by: Vec::new(),
87            source_items: None,
88            artifacts: Vec::new(),
89        }
90    }
91
92    pub fn with_summary(mut self, summary: impl Into<String>) -> Self {
93        self.summary = Some(summary.into());
94        self
95    }
96
97    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
98        self.tags.insert(tag.into());
99        self
100    }
101
102    pub fn with_detail(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
103        self.details.insert(key.into(), value);
104        self
105    }
106
107    pub fn with_children(mut self, children: Vec<DiffNode>) -> Self {
108        self.children = children;
109        self
110    }
111
112    pub fn with_source_path(mut self, source: impl Into<String>) -> Self {
113        self.source_path = Some(source.into());
114        self
115    }
116
117    pub fn with_source_items(mut self, items: ItemPair) -> Self {
118        self.source_items = Some(items);
119        self
120    }
121
122    pub fn with_artifact(mut self, artifact: ArtifactDescriptor) -> Self {
123        self.artifacts.push(artifact);
124        self
125    }
126
127    pub fn node_count(&self) -> usize {
128        1 + self.children.iter().map(|c| c.node_count()).sum::<usize>()
129    }
130
131    pub fn all_tags(&self) -> BTreeSet<String> {
132        let mut tags = self.tags.clone();
133        for child in &self.children {
134            tags.extend(child.all_tags());
135        }
136        tags
137    }
138}
139
140/// A structured description of how to get from one snapshot to the next.
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct Changeset {
143    pub from_snapshot: String,
144    pub to_snapshot: String,
145    pub root: Option<DiffNode>,
146    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
147    pub metadata: BTreeMap<String, String>,
148}
149
150impl Changeset {
151    pub fn new(from: impl Into<String>, to: impl Into<String>, root: Option<DiffNode>) -> Self {
152        Self {
153            from_snapshot: from.into(),
154            to_snapshot: to.into(),
155            root,
156            metadata: BTreeMap::new(),
157        }
158    }
159
160    pub fn node_count(&self) -> usize {
161        self.root.as_ref().map_or(0, |r| r.node_count())
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn diff_node_new_creates_node_with_correct_fields() {
171        let node = DiffNode::new("modify", "file", "path/to/file.csv");
172        assert_eq!(node.action, "modify");
173        assert_eq!(node.item_type, "file");
174        assert_eq!(node.path, "path/to/file.csv");
175        assert!(node.source_path.is_none());
176        assert!(node.tags.is_empty());
177        assert!(node.children.is_empty());
178        assert!(node.details.is_empty());
179        assert!(node.annotations.is_empty());
180    }
181
182    #[test]
183    fn diff_node_builder_methods_chain_correctly() {
184        let child = DiffNode::new("add", "file", "child.txt");
185        let node = DiffNode::new("modify", "directory", "dir")
186            .with_tag("binoc.column-reorder")
187            .with_tag("binoc.whitespace")
188            .with_detail("lines_changed", serde_json::json!(42))
189            .with_children(vec![child])
190            .with_source_path("old/dir");
191
192        assert_eq!(node.tags.len(), 2);
193        assert!(node.tags.contains("binoc.column-reorder"));
194        assert!(node.tags.contains("binoc.whitespace"));
195        assert_eq!(
196            node.details.get("lines_changed"),
197            Some(&serde_json::json!(42))
198        );
199        assert_eq!(node.children.len(), 1);
200        assert_eq!(node.children[0].path, "child.txt");
201        assert_eq!(node.source_path.as_deref(), Some("old/dir"));
202    }
203
204    #[test]
205    fn node_count_leaf_returns_one() {
206        let node = DiffNode::new("add", "file", "file.txt");
207        assert_eq!(node.node_count(), 1);
208    }
209
210    #[test]
211    fn node_count_tree_returns_correct_total() {
212        let node = DiffNode::new("modify", "dir", "dir").with_children(vec![
213            DiffNode::new("add", "file", "a.txt"),
214            DiffNode::new("modify", "dir", "sub").with_children(vec![DiffNode::new(
215                "remove",
216                "file",
217                "sub/b.txt",
218            )]),
219        ]);
220        assert_eq!(node.node_count(), 4);
221    }
222
223    #[test]
224    fn all_tags_collects_from_entire_subtree() {
225        let node = DiffNode::new("modify", "dir", "dir")
226            .with_tag("root-tag")
227            .with_children(vec![
228                DiffNode::new("add", "file", "a").with_tag("child-tag"),
229                DiffNode::new("remove", "file", "b")
230                    .with_children(vec![
231                        DiffNode::new("modify", "file", "c").with_tag("grandchild-tag")
232                    ]),
233            ]);
234        let tags = node.all_tags();
235        assert_eq!(tags.len(), 3);
236        assert!(tags.contains("root-tag"));
237        assert!(tags.contains("child-tag"));
238        assert!(tags.contains("grandchild-tag"));
239    }
240
241    #[test]
242    fn serde_round_trip_preserves_equality() {
243        let node = DiffNode::new("move", "file", "new/path.csv")
244            .with_tag("binoc.move")
245            .with_detail("distance", serde_json::json!(10))
246            .with_source_path("old/path.csv");
247        let json = serde_json::to_string(&node).unwrap();
248        let restored: DiffNode = serde_json::from_str(&json).unwrap();
249        assert_eq!(node.action, restored.action);
250        assert_eq!(node.item_type, restored.item_type);
251        assert_eq!(node.path, restored.path);
252        assert_eq!(node.source_path, restored.source_path);
253        assert_eq!(node.tags, restored.tags);
254        assert_eq!(node.details, restored.details);
255    }
256
257    #[test]
258    fn changeset_construction_and_node_count() {
259        let root = DiffNode::new("modify", "dir", "root").with_children(vec![
260            DiffNode::new("add", "file", "root/a.txt"),
261            DiffNode::new("remove", "file", "root/b.txt"),
262        ]);
263        let changeset = Changeset::new("v1", "v2", Some(root));
264        assert_eq!(changeset.from_snapshot, "v1");
265        assert_eq!(changeset.to_snapshot, "v2");
266        assert_eq!(changeset.node_count(), 3);
267    }
268
269    #[test]
270    fn changeset_node_count_none_root() {
271        let changeset = Changeset::new("v1", "v2", None);
272        assert_eq!(changeset.node_count(), 0);
273    }
274}