1use std::collections::HashMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use sha2::{Sha256, Digest};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct Snapshot {
18 pub id: String,
20 pub timestamp: DateTime<Utc>,
22 pub message: Option<String>,
24 pub table: Table,
26 pub hashes: TableHashes,
28 pub dependencies: Vec<Dependency>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct Table {
35 pub headers: Vec<String>,
37 pub rows: Vec<Vec<String>>,
39 pub primary_key: Option<Vec<usize>>,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct TableHashes {
46 pub table_hash: String,
48 pub header_hashes: HashMap<String, String>,
50 pub row_hashes: Option<Vec<String>>,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct Dependency {
57 pub name: String,
59 pub path: Option<PathBuf>,
61 pub hash: String,
63}
64
65impl Snapshot {
70 pub fn new(table: Table, message: Option<String>) -> Self {
72 let hashes = TableHashes::compute(&table);
73 let id = format!("{}-{}",
74 Utc::now().timestamp(),
75 &hashes.table_hash[..8]
76 );
77
78 Self {
79 id,
80 timestamp: Utc::now(),
81 message,
82 table,
83 hashes,
84 dependencies: Vec::new(),
85 }
86 }
87
88 pub fn add_dependency(&mut self, name: String, path: Option<PathBuf>, hash: String) {
90 self.dependencies.push(Dependency { name, path, hash });
91 }
92
93 pub fn save(&self, output_path: &Path) -> Result<(), Box<dyn std::error::Error>> {
95 let toml_string = toml::to_string_pretty(self)?;
96 fs::write(output_path, toml_string)?;
97 Ok(())
98 }
99
100 pub fn load(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
102 let content = fs::read_to_string(path)?;
103 let snapshot: Snapshot = toml::from_str(&content)?;
104 Ok(snapshot)
105 }
106
107 pub fn verify(&self) -> bool {
109 let computed = TableHashes::compute(&self.table);
110 computed.table_hash == self.hashes.table_hash
111 }
112}
113
114impl TableHashes {
119 pub fn compute(table: &Table) -> Self {
121 let mut header_hashes = HashMap::new();
122
123 for (idx, header) in table.headers.iter().enumerate() {
125 let column_data: Vec<&str> = table.rows
126 .iter()
127 .map(|row| row.get(idx).map(|s| s.as_str()).unwrap_or(""))
128 .collect();
129
130 let hash = Self::hash_column(header, &column_data);
131 header_hashes.insert(header.clone(), hash);
132 }
133
134 let table_hash = Self::hash_table(&table.headers, &table.rows);
136
137 let row_hashes = Some(
139 table.rows
140 .iter()
141 .map(|row| Self::hash_row(row))
142 .collect()
143 );
144
145 Self {
146 table_hash,
147 header_hashes,
148 row_hashes,
149 }
150 }
151
152 fn hash_column(header: &str, data: &[&str]) -> String {
153 let mut hasher = Sha256::new();
154 hasher.update(header.as_bytes());
155 for value in data {
156 hasher.update(value.as_bytes());
157 }
158 format!("{:x}", hasher.finalize())
159 }
160
161 fn hash_row(row: &[String]) -> String {
162 let mut hasher = Sha256::new();
163 for cell in row {
164 hasher.update(cell.as_bytes());
165 }
166 format!("{:x}", hasher.finalize())
167 }
168
169 fn hash_table(headers: &[String], rows: &[Vec<String>]) -> String {
170 let mut hasher = Sha256::new();
171
172 for h in headers {
174 hasher.update(h.as_bytes());
175 }
176
177 for row in rows {
179 for cell in row {
180 hasher.update(cell.as_bytes());
181 }
182 }
183
184 format!("{:x}", hasher.finalize())
185 }
186}
187
188impl Table {
193 pub fn from_csv(path: &Path) -> Result<Self, Box<dyn std::error::Error>> {
195 let mut reader = csv::Reader::from_path(path)?;
196
197 let headers: Vec<String> = reader
199 .headers()?
200 .iter()
201 .map(|h| h.trim().to_string())
202 .collect();
203
204 let mut rows = Vec::new();
206 for result in reader.records() {
207 let record = result?;
208 let row: Vec<String> = record
209 .iter()
210 .map(|cell| cell.trim().to_string())
211 .collect();
212 rows.push(row);
213 }
214
215 Ok(Self {
216 headers,
217 rows,
218 primary_key: None,
219 })
220 }
221
222 pub fn set_primary_key(&mut self, column_indices: Vec<usize>) {
224 self.primary_key = Some(column_indices);
225 }
226
227 pub fn get_row_key(&self, row_idx: usize) -> Option<Vec<String>> {
229 let pk_indices = self.primary_key.as_ref()?;
230 let row = self.rows.get(row_idx)?;
231
232 Some(
233 pk_indices
234 .iter()
235 .filter_map(|&idx| row.get(idx).cloned())
236 .collect()
237 )
238 }
239}
240
241#[derive(Debug, Serialize, Deserialize)]
247pub struct SnapshotDiff {
248 pub from_id: String,
249 pub to_id: String,
250 pub summary: DiffSummary,
251 pub changes: Vec<Change>,
252}
253
254#[derive(Debug, Serialize, Deserialize)]
255pub struct DiffSummary {
256 pub rows_added: usize,
257 pub rows_removed: usize,
258 pub rows_modified: usize,
259 pub columns_added: usize,
260 pub columns_removed: usize,
261}
262
263#[derive(Debug, Serialize, Deserialize)]
264pub enum Change {
265 RowAdded { index: usize, data: Vec<String> },
266 RowRemoved { index: usize, data: Vec<String> },
267 CellChanged { row: usize, col: usize, old: String, new: String },
268 ColumnAdded { name: String, index: usize },
269 ColumnRemoved { name: String, index: usize },
270}
271
272impl SnapshotDiff {
273 pub fn compute(from: &Snapshot, to: &Snapshot) -> Self {
275 let mut changes = Vec::new();
276 let mut summary = DiffSummary {
277 rows_added: 0,
278 rows_removed: 0,
279 rows_modified: 0,
280 columns_added: 0,
281 columns_removed: 0,
282 };
283
284 let from_headers: std::collections::HashSet<_> = from.table.headers.iter().collect();
286 let to_headers: std::collections::HashSet<_> = to.table.headers.iter().collect();
287
288 for (idx, header) in to.table.headers.iter().enumerate() {
289 if !from_headers.contains(header) {
290 changes.push(Change::ColumnAdded {
291 name: header.clone(),
292 index: idx
293 });
294 summary.columns_added += 1;
295 }
296 }
297
298 for (idx, header) in from.table.headers.iter().enumerate() {
299 if !to_headers.contains(header) {
300 changes.push(Change::ColumnRemoved {
301 name: header.clone(),
302 index: idx
303 });
304 summary.columns_removed += 1;
305 }
306 }
307
308 let max_rows = from.table.rows.len().max(to.table.rows.len());
310
311 for i in 0..max_rows {
312 match (from.table.rows.get(i), to.table.rows.get(i)) {
313 (None, Some(row)) => {
314 changes.push(Change::RowAdded {
315 index: i,
316 data: row.clone()
317 });
318 summary.rows_added += 1;
319 }
320 (Some(row), None) => {
321 changes.push(Change::RowRemoved {
322 index: i,
323 data: row.clone()
324 });
325 summary.rows_removed += 1;
326 }
327 (Some(from_row), Some(to_row)) => {
328 if from_row != to_row {
329 summary.rows_modified += 1;
330 for (col, (old, new)) in from_row.iter().zip(to_row.iter()).enumerate() {
332 if old != new {
333 changes.push(Change::CellChanged {
334 row: i,
335 col,
336 old: old.clone(),
337 new: new.clone(),
338 });
339 }
340 }
341 }
342 }
343 (None, None) => unreachable!(),
344 }
345 }
346
347 Self {
348 from_id: from.id.clone(),
349 to_id: to.id.clone(),
350 summary,
351 changes,
352 }
353 }
354
355 pub fn save(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
357 let json = serde_json::to_string_pretty(self)?;
358 fs::write(path, json)?;
359 Ok(())
360 }
361}
362
363#[cfg(test)]
368mod tests {
369 use super::*;
370
371 #[test]
372 fn test_snapshot_creation() {
373 let table = Table {
374 headers: vec!["ID".to_string(), "Name".to_string(), "Amount".to_string()],
375 rows: vec![
376 vec!["1".to_string(), "Alice".to_string(), "100".to_string()],
377 vec!["2".to_string(), "Bob".to_string(), "200".to_string()],
378 ],
379 primary_key: Some(vec![0]),
380 };
381
382 let snapshot = Snapshot::new(table, Some("Initial snapshot".to_string()));
383
384 assert!(snapshot.verify());
385 assert_eq!(snapshot.table.headers.len(), 3);
386 assert_eq!(snapshot.table.rows.len(), 2);
387 }
388
389 #[test]
390 fn test_hash_consistency() {
391 let table = Table {
392 headers: vec!["A".to_string(), "B".to_string()],
393 rows: vec![
394 vec!["1".to_string(), "2".to_string()],
395 ],
396 primary_key: None,
397 };
398
399 let hash1 = TableHashes::compute(&table);
400 let hash2 = TableHashes::compute(&table);
401
402 assert_eq!(hash1.table_hash, hash2.table_hash);
403 }
404}
405
406