Skip to main content

binoc_sdk/
types.rs

1use serde::{Deserialize, Serialize};
2
3use crate::ir::DiffNode;
4
5// ── Artifact types ──────────────────────────────────────────────────
6
7/// Which side of a comparison an artifact describes.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
9pub enum ArtifactSubject {
10    #[serde(rename = "left")]
11    Left,
12    #[serde(rename = "right")]
13    Right,
14    #[serde(rename = "pair")]
15    Pair,
16}
17
18/// Identifies an artifact's data format as a structured tuple of
19/// (package, name, version).
20///
21/// - **`package`** — the package that owns and defines this format,
22///   resolvable through the language's normal package system
23///   (e.g. `"binoc"`, `"binoc-csv"`, `"acme-parquet"`).
24/// - **`name`** — the format name within that package
25///   (e.g. `"tabular"`, `"relational-schema"`).
26/// - **`version`** — a single integer. Bump only for breaking schema
27///   changes. Adding optional fields to an existing version is fine
28///   and does not require a bump (JSON/serde naturally ignore unknown
29///   fields and default missing ones).
30#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
31pub struct ArtifactFormat {
32    pub package: String,
33    pub name: String,
34    pub version: u32,
35}
36
37impl ArtifactFormat {
38    pub fn new(package: impl Into<String>, name: impl Into<String>, version: u32) -> Self {
39        Self {
40            package: package.into(),
41            name: name.into(),
42            version,
43        }
44    }
45}
46
47impl std::fmt::Display for ArtifactFormat {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        write!(f, "{}.{}.v{}", self.package, self.name, self.version)
50    }
51}
52
53/// Descriptor for a published artifact attached to a node.
54///
55/// Artifacts are the unified mechanism for both private reuse and
56/// cross-plugin composition. A comparator or transformer publishes
57/// zero or more artifacts; downstream plugins consume them by format.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct ArtifactDescriptor {
60    pub format: ArtifactFormat,
61    pub subject: ArtifactSubject,
62    pub producer: String,
63    /// Opaque handle managed by the SDK's DataAccess implementation.
64    /// Plugins should not create or interpret this value directly.
65    pub handle: String,
66}
67
68// ── Standard artifact formats ───────────────────────────────────────
69
70/// Standard format for tabular data artifacts.
71///
72/// Any comparator that parses a tabular source format (CSV, TSV, Excel,
73/// Parquet, …) should publish artifacts with this format so that
74/// generic tabular transformers and extractors can consume them without
75/// knowing the source format.
76pub fn tabular_v1() -> ArtifactFormat {
77    ArtifactFormat::new("binoc", "tabular", 1)
78}
79
80// ── Format-neutral data types ───────────────────────────────────────
81
82/// Format-neutral tabular data. Produced by CSV, Excel, Parquet comparators;
83/// consumed by tabular transformers and extractors.
84///
85/// This is the codec type for the [`TABULAR_V1`] artifact format.
86/// Serialize with `serde_json::to_vec`, deserialize with `serde_json::from_slice`.
87#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
88pub struct TabularData {
89    pub headers: Vec<String>,
90    pub rows: Vec<Vec<String>>,
91}
92
93impl TabularData {
94    pub fn column_index(&self, name: &str) -> Option<usize> {
95        self.headers.iter().position(|h| h == name)
96    }
97
98    pub fn column_values(&self, name: &str) -> Option<Vec<&str>> {
99        let idx = self.column_index(name)?;
100        Some(
101            self.rows
102                .iter()
103                .map(|r| r.get(idx).map(|s| s.as_str()).unwrap_or(""))
104                .collect(),
105        )
106    }
107
108    pub fn to_csv(&self) -> String {
109        let mut out = self.headers.join(",");
110        out.push('\n');
111        for row in &self.rows {
112            out.push_str(&row.join(","));
113            out.push('\n');
114        }
115        out
116    }
117}
118
119/// A pair of tabular data (left/right sides of a comparison).
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct TabularDataPair {
122    pub left: Option<TabularData>,
123    pub right: Option<TabularData>,
124}
125
126impl TabularDataPair {
127    /// Build a `TabularDataPair` from [`tabular_v1`] artifacts on a node.
128    ///
129    /// Returns `None` if neither left nor right artifact is present.
130    /// This is the standard way for transformers and extractors to obtain
131    /// tabular data without knowing the source format.
132    pub fn from_artifacts(
133        node: &crate::ir::DiffNode,
134        data: &dyn crate::traits::DataAccess,
135    ) -> Option<Self> {
136        let fmt = tabular_v1();
137        let left = node
138            .artifacts
139            .iter()
140            .find(|a| a.format == fmt && a.subject == ArtifactSubject::Left)
141            .and_then(|desc| data.get_artifact(desc).ok()?)
142            .and_then(|bytes| serde_json::from_slice(&bytes).ok());
143        let right = node
144            .artifacts
145            .iter()
146            .find(|a| a.format == fmt && a.subject == ArtifactSubject::Right)
147            .and_then(|desc| data.get_artifact(desc).ok()?)
148            .and_then(|bytes| serde_json::from_slice(&bytes).ok());
149        if left.is_none() && right.is_none() {
150            return None;
151        }
152        Some(Self { left, right })
153    }
154}
155
156// ── Tabular extraction ──────────────────────────────────────────────
157
158/// Shared extraction logic for tabular data.
159///
160/// Given a `TabularDataPair` and an aspect name, produces the
161/// corresponding `ExtractResult`. This is format-neutral — any
162/// comparator or transformer that works with tabular artifacts can
163/// delegate extraction here.
164pub fn tabular_extract(
165    pair: &TabularDataPair,
166    _node: &DiffNode,
167    aspect: &str,
168) -> Option<ExtractResult> {
169    match aspect {
170        "rows_added" => {
171            let right = pair.right.as_ref()?;
172            let left_len = pair.left.as_ref().map_or(0, |l| l.rows.len());
173            if left_len >= right.rows.len() {
174                return Some(ExtractResult::Text("No rows added.\n".into()));
175            }
176            let added = TabularData {
177                headers: right.headers.clone(),
178                rows: right.rows[left_len..].to_vec(),
179            };
180            Some(ExtractResult::Text(added.to_csv()))
181        }
182        "rows_removed" => {
183            let left = pair.left.as_ref()?;
184            let right_len = pair.right.as_ref().map_or(0, |r| r.rows.len());
185            if right_len >= left.rows.len() {
186                return Some(ExtractResult::Text("No rows removed.\n".into()));
187            }
188            let removed = TabularData {
189                headers: left.headers.clone(),
190                rows: left.rows[right_len..].to_vec(),
191            };
192            Some(ExtractResult::Text(removed.to_csv()))
193        }
194        "cells_changed" => {
195            let left = pair.left.as_ref()?;
196            let right = pair.right.as_ref()?;
197            let common_cols = tabular_columns_in_common(left, right);
198            let min_rows = left.rows.len().min(right.rows.len());
199
200            let mut out = String::from("row,column,old_value,new_value\n");
201            for i in 0..min_rows {
202                for col in &common_cols {
203                    let li = left.column_index(col)?;
204                    let ri = right.column_index(col)?;
205                    let lv = left.rows[i].get(li).map(|s| s.as_str()).unwrap_or("");
206                    let rv = right.rows[i].get(ri).map(|s| s.as_str()).unwrap_or("");
207                    if lv != rv {
208                        out.push_str(&format!("{i},{col},{lv},{rv}\n"));
209                    }
210                }
211            }
212            Some(ExtractResult::Text(out))
213        }
214        "columns_added" => {
215            let left = pair.left.as_ref()?;
216            let right = pair.right.as_ref()?;
217            let left_set: std::collections::BTreeSet<&str> =
218                left.headers.iter().map(|s| s.as_str()).collect();
219            let added: Vec<&str> = right
220                .headers
221                .iter()
222                .filter(|h| !left_set.contains(h.as_str()))
223                .map(|h| h.as_str())
224                .collect();
225            if added.is_empty() {
226                return Some(ExtractResult::Text("No columns added.\n".into()));
227            }
228            let mut out = String::new();
229            for col in &added {
230                out.push_str(&format!("{col}\n"));
231                if let Some(vals) = right.column_values(col) {
232                    for val in vals {
233                        out.push_str(&format!("  {val}\n"));
234                    }
235                }
236            }
237            Some(ExtractResult::Text(out))
238        }
239        "columns_removed" => {
240            let left = pair.left.as_ref()?;
241            let right = pair.right.as_ref()?;
242            let right_set: std::collections::BTreeSet<&str> =
243                right.headers.iter().map(|s| s.as_str()).collect();
244            let removed: Vec<&str> = left
245                .headers
246                .iter()
247                .filter(|h| !right_set.contains(h.as_str()))
248                .map(|h| h.as_str())
249                .collect();
250            if removed.is_empty() {
251                return Some(ExtractResult::Text("No columns removed.\n".into()));
252            }
253            let mut out = String::new();
254            for col in &removed {
255                out.push_str(&format!("{col}\n"));
256                if let Some(vals) = left.column_values(col) {
257                    for val in vals {
258                        out.push_str(&format!("  {val}\n"));
259                    }
260                }
261            }
262            Some(ExtractResult::Text(out))
263        }
264        "content" | "full" => {
265            let mut out = String::new();
266            if let Some(left) = &pair.left {
267                out.push_str("--- left\n");
268                out.push_str(&left.to_csv());
269            }
270            if let Some(right) = &pair.right {
271                out.push_str("+++ right\n");
272                out.push_str(&right.to_csv());
273            }
274            Some(ExtractResult::Text(out))
275        }
276        _ => None,
277    }
278}
279
280fn tabular_columns_in_common(left: &TabularData, right: &TabularData) -> Vec<String> {
281    let left_set: std::collections::BTreeSet<&str> =
282        left.headers.iter().map(|s| s.as_str()).collect();
283    right
284        .headers
285        .iter()
286        .filter(|h| left_set.contains(h.as_str()))
287        .cloned()
288        .collect()
289}
290
291// ── Item types ──────────────────────────────────────────────────────
292
293/// Metadata-only view of one side of a comparison. Carries logical identity
294/// and content metadata but NOT a filesystem path — data access goes through
295/// [`DataAccess`].
296#[derive(Debug, Clone, Serialize, Deserialize)]
297pub struct ItemRef {
298    pub logical_path: String,
299    pub is_dir: bool,
300    #[serde(default, skip_serializing_if = "Option::is_none")]
301    pub content_hash: Option<String>,
302    #[serde(default, skip_serializing_if = "Option::is_none")]
303    pub media_type: Option<String>,
304    /// Opaque identifier used by DataAccess implementations to locate data.
305    /// Plugin authors should not create or interpret this value directly.
306    #[serde(default)]
307    pub handle: String,
308}
309
310impl ItemRef {
311    pub fn extension(&self) -> Option<String> {
312        std::path::Path::new(&self.logical_path)
313            .extension()
314            .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
315    }
316}
317
318/// A pair of items to compare. Either side may be None (add/remove).
319#[derive(Debug, Clone, Serialize, Deserialize)]
320pub struct ItemPair {
321    pub left: Option<ItemRef>,
322    pub right: Option<ItemRef>,
323}
324
325impl ItemPair {
326    pub fn both(left: ItemRef, right: ItemRef) -> Self {
327        Self {
328            left: Some(left),
329            right: Some(right),
330        }
331    }
332
333    pub fn added(right: ItemRef) -> Self {
334        Self {
335            left: None,
336            right: Some(right),
337        }
338    }
339
340    pub fn removed(left: ItemRef) -> Self {
341        Self {
342            left: Some(left),
343            right: None,
344        }
345    }
346
347    pub fn logical_path(&self) -> &str {
348        self.right
349            .as_ref()
350            .or(self.left.as_ref())
351            .map(|i| i.logical_path.as_str())
352            .unwrap_or("")
353    }
354
355    pub fn extension(&self) -> Option<String> {
356        self.right
357            .as_ref()
358            .or(self.left.as_ref())
359            .and_then(|i| i.extension())
360    }
361
362    pub fn media_type(&self) -> Option<&str> {
363        self.right
364            .as_ref()
365            .or(self.left.as_ref())
366            .and_then(|i| i.media_type.as_deref())
367    }
368
369    pub fn is_dir(&self) -> bool {
370        self.right.as_ref().is_some_and(|i| i.is_dir)
371            || self.left.as_ref().is_some_and(|i| i.is_dir)
372    }
373
374    pub fn matching_content_hash(&self) -> Option<&str> {
375        match (&self.left, &self.right) {
376            (Some(l), Some(r)) => match (&l.content_hash, &r.content_hash) {
377                (Some(hl), Some(hr)) if hl == hr => Some(hl.as_str()),
378                _ => None,
379            },
380            _ => None,
381        }
382    }
383}
384
385// ── Result enums ────────────────────────────────────────────────────
386
387/// Result of a comparator's compare operation.
388#[derive(Debug, Serialize, Deserialize)]
389#[non_exhaustive]
390pub enum CompareResult {
391    /// Items are identical — no diff node produced.
392    Identical,
393    /// Terminal diff — no further expansion needed.
394    Leaf(DiffNode),
395    /// Container node with children to recursively process.
396    Expand(DiffNode, Vec<ItemPair>),
397    /// Comparator cannot handle this item after all — try the next one.
398    Skip,
399}
400
401/// Result of a transformer's transform operation.
402#[non_exhaustive]
403pub enum TransformResult {
404    /// Node unchanged — zero cost.
405    Unchanged,
406    /// Replace this node with a new one.
407    Replace(Box<DiffNode>),
408    /// Replace this node with multiple sibling nodes.
409    ReplaceMany(Vec<DiffNode>),
410    /// Remove this node entirely.
411    Remove,
412}
413
414/// Scope at which a transformer operates.
415#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
416pub enum TransformScope {
417    /// Transformer receives individual matched nodes; controller recurses into children.
418    #[default]
419    Node,
420    /// Transformer receives the whole subtree; controller does NOT recurse.
421    Subtree,
422}
423
424/// Dispatch filter on node shape for transformer matching.
425#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
426pub enum NodeShapeFilter {
427    /// Match any node regardless of children.
428    #[default]
429    Any,
430    /// Match only container nodes (those with children).
431    Container,
432    /// Match only leaf nodes (those without children).
433    Leaf,
434}
435
436/// Whether a comparator handles files, containers (directories), or both.
437#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
438pub enum ItemScope {
439    /// Non-directory items only (most comparators).
440    #[default]
441    Files,
442    /// Directories only (directory comparator).
443    Containers,
444    /// Both files and directories.
445    Any,
446}
447
448/// Result of an extract (on-demand detail retrieval) operation.
449pub enum ExtractResult {
450    Text(String),
451    Binary(Vec<u8>),
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[test]
459    fn item_ref_extension() {
460        let item = ItemRef {
461            logical_path: "data.csv".into(),
462            is_dir: false,
463            content_hash: None,
464            media_type: None,
465            handle: String::new(),
466        };
467        assert_eq!(item.extension(), Some(".csv".into()));
468    }
469
470    #[test]
471    fn item_ref_extension_none() {
472        let item = ItemRef {
473            logical_path: "Makefile".into(),
474            is_dir: false,
475            content_hash: None,
476            media_type: None,
477            handle: String::new(),
478        };
479        assert_eq!(item.extension(), None);
480    }
481
482    #[test]
483    fn item_pair_logical_path_prefers_right() {
484        let left = ItemRef {
485            logical_path: "left.txt".into(),
486            is_dir: false,
487            content_hash: None,
488            media_type: None,
489            handle: String::new(),
490        };
491        let right = ItemRef {
492            logical_path: "right.txt".into(),
493            is_dir: false,
494            content_hash: None,
495            media_type: None,
496            handle: String::new(),
497        };
498        let pair = ItemPair::both(left, right);
499        assert_eq!(pair.logical_path(), "right.txt");
500    }
501
502    #[test]
503    fn item_pair_logical_path_falls_back_to_left() {
504        let left = ItemRef {
505            logical_path: "only.txt".into(),
506            is_dir: false,
507            content_hash: None,
508            media_type: None,
509            handle: String::new(),
510        };
511        let pair = ItemPair::removed(left);
512        assert_eq!(pair.logical_path(), "only.txt");
513    }
514
515    #[test]
516    fn item_pair_is_dir() {
517        let dir = ItemRef {
518            logical_path: "sub".into(),
519            is_dir: true,
520            content_hash: None,
521            media_type: None,
522            handle: String::new(),
523        };
524        let pair = ItemPair::added(dir);
525        assert!(pair.is_dir());
526    }
527
528    #[test]
529    fn item_pair_matching_hash() {
530        let left = ItemRef {
531            logical_path: "f".into(),
532            is_dir: false,
533            content_hash: Some("abc".into()),
534            media_type: None,
535            handle: String::new(),
536        };
537        let right = ItemRef {
538            logical_path: "f".into(),
539            is_dir: false,
540            content_hash: Some("abc".into()),
541            media_type: None,
542            handle: String::new(),
543        };
544        let pair = ItemPair::both(left, right);
545        assert_eq!(pair.matching_content_hash(), Some("abc"));
546    }
547}