gitsheets/
lib.rs

1// git-sheets: Version control for spreadsheets
2// A tool for Excel sufferers who deserve better
3
4use std::collections::HashMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use sha2::{Sha256, Digest};
10
11// ============================================================================
12// CORE PRIMITIVES
13// ============================================================================
14
15/// A snapshot represents the complete state of a table at a point in time
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct Snapshot {
18    /// Unique identifier for this snapshot
19    pub id: String,
20    /// When this snapshot was taken
21    pub timestamp: DateTime<Utc>,
22    /// User-provided message explaining the snapshot
23    pub message: Option<String>,
24    /// The table data
25    pub table: Table,
26    /// Hashes for integrity verification
27    pub hashes: TableHashes,
28    /// Dependencies on other tables/files
29    pub dependencies: Vec<Dependency>,
30}
31
32/// A table is just headers + rows, nothing fancy
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct Table {
35    /// Column names (the primary key lives here)
36    pub headers: Vec<String>,
37    /// Raw row data
38    pub rows: Vec<Vec<String>>,
39    /// Optional: which column(s) form the primary key
40    pub primary_key: Option<Vec<usize>>,
41}
42
43/// Hashes for verifying table integrity
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct TableHashes {
46    /// Hash of the entire table (quick integrity check)
47    pub table_hash: String,
48    /// Per-header hashes (column-level verification)
49    pub header_hashes: HashMap<String, String>,
50    /// Optional: per-row hashes (fine-grained verification)
51    pub row_hashes: Option<Vec<String>>,
52}
53
54/// A dependency represents a reference to another table or file
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct Dependency {
57    /// Name or identifier of the dependency
58    pub name: String,
59    /// File path if it's external
60    pub path: Option<PathBuf>,
61    /// Hash of the dependency at snapshot time
62    pub hash: String,
63}
64
65// ============================================================================
66// SNAPSHOT OPERATIONS
67// ============================================================================
68
69impl Snapshot {
70    /// Create a new snapshot from a table
71    pub fn new(table: Table, message: Option<String>) -> Self {
72        let hashes = TableHashes::compute(&table);
73        let id = format!("{}-{}",
74            Utc::now().timestamp(),
75            &hashes.table_hash[..8]
76        );
77
78        Self {
79            id,
80            timestamp: Utc::now(),
81            message,
82            table,
83            hashes,
84            dependencies: Vec::new(),
85        }
86    }
87
88    /// Add a dependency to this snapshot
89    pub fn add_dependency(&mut self, name: String, path: Option<PathBuf>, hash: String) {
90        self.dependencies.push(Dependency { name, path, hash });
91    }
92
93    /// Save snapshot to disk as TOML
94    pub fn save(&self, output_path: &Path) -> Result<(), Box<dyn std::error::Error>> {
95        let toml_string = toml::to_string_pretty(self)?;
96        fs::write(output_path, toml_string)?;
97        Ok(())
98    }
99
100    /// Load snapshot from disk
101    pub fn load(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
102        let content = fs::read_to_string(path)?;
103        let snapshot: Snapshot = toml::from_str(&content)?;
104        Ok(snapshot)
105    }
106
107    /// Verify integrity of this snapshot
108    pub fn verify(&self) -> bool {
109        let computed = TableHashes::compute(&self.table);
110        computed.table_hash == self.hashes.table_hash
111    }
112}
113
114// ============================================================================
115// HASH COMPUTATION
116// ============================================================================
117
118impl TableHashes {
119    /// Compute all hashes for a table
120    pub fn compute(table: &Table) -> Self {
121        let mut header_hashes = HashMap::new();
122
123        // Hash each column
124        for (idx, header) in table.headers.iter().enumerate() {
125            let column_data: Vec<&str> = table.rows
126                .iter()
127                .map(|row| row.get(idx).map(|s| s.as_str()).unwrap_or(""))
128                .collect();
129
130            let hash = Self::hash_column(header, &column_data);
131            header_hashes.insert(header.clone(), hash);
132        }
133
134        // Hash entire table
135        let table_hash = Self::hash_table(&table.headers, &table.rows);
136
137        // Optional: per-row hashes
138        let row_hashes = Some(
139            table.rows
140                .iter()
141                .map(|row| Self::hash_row(row))
142                .collect()
143        );
144
145        Self {
146            table_hash,
147            header_hashes,
148            row_hashes,
149        }
150    }
151
152    fn hash_column(header: &str, data: &[&str]) -> String {
153        let mut hasher = Sha256::new();
154        hasher.update(header.as_bytes());
155        for value in data {
156            hasher.update(value.as_bytes());
157        }
158        format!("{:x}", hasher.finalize())
159    }
160
161    fn hash_row(row: &[String]) -> String {
162        let mut hasher = Sha256::new();
163        for cell in row {
164            hasher.update(cell.as_bytes());
165        }
166        format!("{:x}", hasher.finalize())
167    }
168
169    fn hash_table(headers: &[String], rows: &[Vec<String>]) -> String {
170        let mut hasher = Sha256::new();
171
172        // Hash headers
173        for h in headers {
174            hasher.update(h.as_bytes());
175        }
176
177        // Hash all row data
178        for row in rows {
179            for cell in row {
180                hasher.update(cell.as_bytes());
181            }
182        }
183
184        format!("{:x}", hasher.finalize())
185    }
186}
187
188// ============================================================================
189// TABLE OPERATIONS
190// ============================================================================
191
192impl Table {
193    /// Create a table from CSV data
194    pub fn from_csv(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
195        let mut reader = csv::Reader::from_path(path)?;
196
197        // Get headers
198        let headers: Vec<String> = reader
199            .headers()?
200            .iter()
201            .map(|h| h.trim().to_string())
202            .collect();
203
204        // Get rows
205        let mut rows = Vec::new();
206        for result in reader.records() {
207            let record = result?;
208            let row: Vec<String> = record
209                .iter()
210                .map(|cell| cell.trim().to_string())
211                .collect();
212            rows.push(row);
213        }
214
215        Ok(Self {
216            headers,
217            rows,
218            primary_key: None,
219        })
220    }
221
222    /// Set which columns form the primary key
223    pub fn set_primary_key(&mut self, column_indices: Vec<usize>) {
224        self.primary_key = Some(column_indices);
225    }
226
227    /// Get the primary key for a specific row
228    pub fn get_row_key(&self, row_idx: usize) -> Option<Vec<String>> {
229        let pk_indices = self.primary_key.as_ref()?;
230        let row = self.rows.get(row_idx)?;
231
232        Some(
233            pk_indices
234                .iter()
235                .filter_map(|&idx| row.get(idx).cloned())
236                .collect()
237        )
238    }
239}
240
241// ============================================================================
242// DIFF OPERATIONS
243// ============================================================================
244
245/// A diff between two snapshots
246#[derive(Debug, Serialize, Deserialize)]
247pub struct SnapshotDiff {
248    pub from_id: String,
249    pub to_id: String,
250    pub summary: DiffSummary,
251    pub changes: Vec<Change>,
252}
253
254#[derive(Debug, Serialize, Deserialize)]
255pub struct DiffSummary {
256    pub rows_added: usize,
257    pub rows_removed: usize,
258    pub rows_modified: usize,
259    pub columns_added: usize,
260    pub columns_removed: usize,
261}
262
263#[derive(Debug, Serialize, Deserialize)]
264pub enum Change {
265    RowAdded { index: usize, data: Vec<String> },
266    RowRemoved { index: usize, data: Vec<String> },
267    CellChanged { row: usize, col: usize, old: String, new: String },
268    ColumnAdded { name: String, index: usize },
269    ColumnRemoved { name: String, index: usize },
270}
271
272impl SnapshotDiff {
273    /// Compute diff between two snapshots
274    pub fn compute(from: &Snapshot, to: &Snapshot) -> Self {
275        let mut changes = Vec::new();
276        let mut summary = DiffSummary {
277            rows_added: 0,
278            rows_removed: 0,
279            rows_modified: 0,
280            columns_added: 0,
281            columns_removed: 0,
282        };
283
284        // Compare headers
285        let from_headers: std::collections::HashSet<_> = from.table.headers.iter().collect();
286        let to_headers: std::collections::HashSet<_> = to.table.headers.iter().collect();
287
288        for (idx, header) in to.table.headers.iter().enumerate() {
289            if !from_headers.contains(header) {
290                changes.push(Change::ColumnAdded {
291                    name: header.clone(),
292                    index: idx
293                });
294                summary.columns_added += 1;
295            }
296        }
297
298        for (idx, header) in from.table.headers.iter().enumerate() {
299            if !to_headers.contains(header) {
300                changes.push(Change::ColumnRemoved {
301                    name: header.clone(),
302                    index: idx
303                });
304                summary.columns_removed += 1;
305            }
306        }
307
308        // Simple row comparison (could be smarter with primary keys)
309        let max_rows = from.table.rows.len().max(to.table.rows.len());
310
311        for i in 0..max_rows {
312            match (from.table.rows.get(i), to.table.rows.get(i)) {
313                (None, Some(row)) => {
314                    changes.push(Change::RowAdded {
315                        index: i,
316                        data: row.clone()
317                    });
318                    summary.rows_added += 1;
319                }
320                (Some(row), None) => {
321                    changes.push(Change::RowRemoved {
322                        index: i,
323                        data: row.clone()
324                    });
325                    summary.rows_removed += 1;
326                }
327                (Some(from_row), Some(to_row)) => {
328                    if from_row != to_row {
329                        summary.rows_modified += 1;
330                        // Find specific cell changes
331                        for (col, (old, new)) in from_row.iter().zip(to_row.iter()).enumerate() {
332                            if old != new {
333                                changes.push(Change::CellChanged {
334                                    row: i,
335                                    col,
336                                    old: old.clone(),
337                                    new: new.clone(),
338                                });
339                            }
340                        }
341                    }
342                }
343                (None, None) => unreachable!(),
344            }
345        }
346
347        Self {
348            from_id: from.id.clone(),
349            to_id: to.id.clone(),
350            summary,
351            changes,
352        }
353    }
354
355    /// Save diff to disk
356    pub fn save(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
357        let json = serde_json::to_string_pretty(self)?;
358        fs::write(path, json)?;
359        Ok(())
360    }
361}
362
363// ============================================================================
364// CLI INTERFACE (example usage)
365// ============================================================================
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    #[test]
372    fn test_snapshot_creation() {
373        let table = Table {
374            headers: vec!["ID".to_string(), "Name".to_string(), "Amount".to_string()],
375            rows: vec![
376                vec!["1".to_string(), "Alice".to_string(), "100".to_string()],
377                vec!["2".to_string(), "Bob".to_string(), "200".to_string()],
378            ],
379            primary_key: Some(vec![0]),
380        };
381
382        let snapshot = Snapshot::new(table, Some("Initial snapshot".to_string()));
383
384        assert!(snapshot.verify());
385        assert_eq!(snapshot.table.headers.len(), 3);
386        assert_eq!(snapshot.table.rows.len(), 2);
387    }
388
389    #[test]
390    fn test_hash_consistency() {
391        let table = Table {
392            headers: vec!["A".to_string(), "B".to_string()],
393            rows: vec![
394                vec!["1".to_string(), "2".to_string()],
395            ],
396            primary_key: None,
397        };
398
399        let hash1 = TableHashes::compute(&table);
400        let hash2 = TableHashes::compute(&table);
401
402        assert_eq!(hash1.table_hash, hash2.table_hash);
403    }
404}
405
406// ============================================================================
407// USAGE NOTES
408// ============================================================================
409
410/*
411Example usage:
412
413// 1. Load a CSV
414let table = Table::from_csv(Path::new("sales.csv"))?;
415
416// 2. Create a snapshot
417let mut snapshot = Snapshot::new(table, Some("Initial import".to_string()));
418
419// 3. Add dependencies if needed
420snapshot.add_dependency(
421    "customers.csv".to_string(),
422    Some(PathBuf::from("../data/customers.csv")),
423    "abc123...".to_string()
424);
425
426// 4. Save snapshot
427snapshot.save(Path::new("snapshots/sales_001.toml"))?;
428
429// 5. Later: verify integrity
430let loaded = Snapshot::load(Path::new("snapshots/sales_001.toml"))?;
431assert!(loaded.verify());
432
433// 6. Compare two snapshots
434let old_snapshot = Snapshot::load(Path::new("snapshots/sales_001.toml"))?;
435let new_snapshot = Snapshot::load(Path::new("snapshots/sales_002.toml"))?;
436let diff = SnapshotDiff::compute(&old_snapshot, &new_snapshot);
437diff.save(Path::new("diffs/sales_001_to_002.json"))?;
438
439*/