Skip to main content

task_graph_mcp/export/
diff.rs

1//! Diff functionality for comparing snapshots and databases.
2//!
3//! This module provides:
4//! - Comparison between a snapshot file and the current database state
5//! - Comparison between two snapshot files
6//! - Human-readable diff output suitable for review
7
8use super::{EXPORTED_TABLES, Snapshot, get_table_primary_key};
9use crate::db::Database;
10use anyhow::Result;
11use serde::{Deserialize, Serialize};
12use serde_json::Value;
13use std::collections::{BTreeMap, HashSet};
14use std::fmt;
15
16/// A single field change within a record.
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18pub struct FieldChange {
19    pub field: String,
20    pub old_value: Value,
21    pub new_value: Value,
22}
23
24/// A modified record showing which fields changed.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ModifiedRecord {
27    /// The primary key value(s) identifying this record
28    pub key: Value,
29    /// List of field changes
30    pub changes: Vec<FieldChange>,
31}
32
33/// Diff results for a single table.
34#[derive(Debug, Clone, Serialize, Deserialize, Default)]
35pub struct TableDiff {
36    /// Records present in target but not in source (added)
37    pub added: Vec<Value>,
38    /// Records present in source but not in target (removed)
39    pub removed: Vec<Value>,
40    /// Records present in both but with different values
41    pub modified: Vec<ModifiedRecord>,
42}
43
44impl TableDiff {
45    /// Check if there are any changes in this table.
46    pub fn is_empty(&self) -> bool {
47        self.added.is_empty() && self.removed.is_empty() && self.modified.is_empty()
48    }
49
50    /// Total number of changes.
51    pub fn change_count(&self) -> usize {
52        self.added.len() + self.removed.len() + self.modified.len()
53    }
54}
55
56/// Complete diff between two data sources.
57#[derive(Debug, Clone, Serialize, Deserialize, Default)]
58pub struct SnapshotDiff {
59    /// Source description (e.g., "snapshot.json" or "database")
60    pub source_label: String,
61    /// Target description
62    pub target_label: String,
63    /// Diff results per table
64    pub tables: BTreeMap<String, TableDiff>,
65}
66
67impl SnapshotDiff {
68    /// Check if there are any changes.
69    pub fn is_empty(&self) -> bool {
70        self.tables.values().all(|t| t.is_empty())
71    }
72
73    /// Total number of changes across all tables.
74    pub fn total_changes(&self) -> usize {
75        self.tables.values().map(|t| t.change_count()).sum()
76    }
77
78    /// Get a summary of changes per table.
79    pub fn summary(&self) -> Vec<(String, usize, usize, usize)> {
80        self.tables
81            .iter()
82            .filter(|(_, diff)| !diff.is_empty())
83            .map(|(name, diff)| {
84                (
85                    name.clone(),
86                    diff.added.len(),
87                    diff.removed.len(),
88                    diff.modified.len(),
89                )
90            })
91            .collect()
92    }
93}
94
95impl fmt::Display for SnapshotDiff {
96    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
97        if self.is_empty() {
98            writeln!(f, "No differences found.")?;
99            return Ok(());
100        }
101
102        writeln!(f, "Diff: {} -> {}", self.source_label, self.target_label)?;
103        writeln!(f, "{}", "=".repeat(60))?;
104
105        for (table_name, diff) in &self.tables {
106            if diff.is_empty() {
107                continue;
108            }
109
110            writeln!(f)?;
111            writeln!(f, "Table: {}", table_name)?;
112            writeln!(f, "{}", "-".repeat(40))?;
113
114            if !diff.added.is_empty() {
115                writeln!(f, "  Added ({}):", diff.added.len())?;
116                for record in &diff.added {
117                    writeln!(f, "    + {}", format_record_brief(record))?;
118                }
119            }
120
121            if !diff.removed.is_empty() {
122                writeln!(f, "  Removed ({}):", diff.removed.len())?;
123                for record in &diff.removed {
124                    writeln!(f, "    - {}", format_record_brief(record))?;
125                }
126            }
127
128            if !diff.modified.is_empty() {
129                writeln!(f, "  Modified ({}):", diff.modified.len())?;
130                for modified in &diff.modified {
131                    writeln!(f, "    ~ {}", modified.key)?;
132                    for change in &modified.changes {
133                        writeln!(
134                            f,
135                            "        {}: {} -> {}",
136                            change.field, change.old_value, change.new_value
137                        )?;
138                    }
139                }
140            }
141        }
142
143        writeln!(f)?;
144        writeln!(f, "Summary: {} total changes", self.total_changes())?;
145
146        Ok(())
147    }
148}
149
150/// Format a record for brief display (showing key fields).
151fn format_record_brief(record: &Value) -> String {
152    if let Some(obj) = record.as_object() {
153        // Try to show id and title if available
154        let id = obj.get("id").map(|v| v.to_string()).unwrap_or_default();
155        let title = obj
156            .get("title")
157            .and_then(|v| v.as_str())
158            .map(|s| {
159                if s.len() > 40 {
160                    format!("{}...", &s[..37])
161                } else {
162                    s.to_string()
163                }
164            })
165            .unwrap_or_default();
166
167        if !title.is_empty() {
168            format!("{} ({})", id, title)
169        } else {
170            id
171        }
172    } else {
173        record.to_string()
174    }
175}
176
177/// Extract the primary key value from a record.
178fn extract_key(record: &Value, key_columns: &[&str]) -> Value {
179    if key_columns.len() == 1 {
180        record.get(key_columns[0]).cloned().unwrap_or(Value::Null)
181    } else {
182        // Composite key - return as array
183        Value::Array(
184            key_columns
185                .iter()
186                .map(|col| record.get(*col).cloned().unwrap_or(Value::Null))
187                .collect(),
188        )
189    }
190}
191
192/// Create a string key for hash map lookups.
193fn key_to_string(key: &Value) -> String {
194    match key {
195        Value::Array(arr) => arr
196            .iter()
197            .map(|v| v.to_string())
198            .collect::<Vec<_>>()
199            .join("|"),
200        _ => key.to_string(),
201    }
202}
203
204/// Compare two values, ignoring floating point precision issues.
205fn values_equal(a: &Value, b: &Value) -> bool {
206    match (a, b) {
207        (Value::Number(na), Value::Number(nb)) => {
208            // Compare numbers with tolerance for floats
209            if let (Some(fa), Some(fb)) = (na.as_f64(), nb.as_f64()) {
210                (fa - fb).abs() < 1e-10
211            } else {
212                na == nb
213            }
214        }
215        _ => a == b,
216    }
217}
218
219/// Compare two records and return field differences.
220fn diff_records(source: &Value, target: &Value, key_columns: &[&str]) -> Vec<FieldChange> {
221    let mut changes = Vec::new();
222
223    let source_obj = source.as_object();
224    let target_obj = target.as_object();
225
226    if let (Some(src), Some(tgt)) = (source_obj, target_obj) {
227        // Get all field names from both records
228        let mut all_fields: HashSet<&str> = src.keys().map(|s| s.as_str()).collect();
229        all_fields.extend(tgt.keys().map(|s| s.as_str()));
230
231        for field in all_fields {
232            // Skip primary key columns
233            if key_columns.contains(&field) {
234                continue;
235            }
236
237            let src_val = src.get(field).unwrap_or(&Value::Null);
238            let tgt_val = tgt.get(field).unwrap_or(&Value::Null);
239
240            if !values_equal(src_val, tgt_val) {
241                changes.push(FieldChange {
242                    field: field.to_string(),
243                    old_value: src_val.clone(),
244                    new_value: tgt_val.clone(),
245                });
246            }
247        }
248    }
249
250    changes
251}
252
253/// Diff a single table's data.
254fn diff_table(source_rows: &[Value], target_rows: &[Value], key_columns: &[&str]) -> TableDiff {
255    // Build lookup maps by key
256    let source_by_key: BTreeMap<String, &Value> = source_rows
257        .iter()
258        .map(|row| (key_to_string(&extract_key(row, key_columns)), row))
259        .collect();
260
261    let target_by_key: BTreeMap<String, &Value> = target_rows
262        .iter()
263        .map(|row| (key_to_string(&extract_key(row, key_columns)), row))
264        .collect();
265
266    let mut diff = TableDiff::default();
267
268    // Find added records (in target but not in source)
269    for (key, row) in &target_by_key {
270        if !source_by_key.contains_key(key) {
271            diff.added.push((*row).clone());
272        }
273    }
274
275    // Find removed records (in source but not in target)
276    for (key, row) in &source_by_key {
277        if !target_by_key.contains_key(key) {
278            diff.removed.push((*row).clone());
279        }
280    }
281
282    // Find modified records (present in both but different)
283    for (key, source_row) in &source_by_key {
284        if let Some(target_row) = target_by_key.get(key) {
285            let changes = diff_records(source_row, target_row, key_columns);
286            if !changes.is_empty() {
287                diff.modified.push(ModifiedRecord {
288                    key: extract_key(source_row, key_columns),
289                    changes,
290                });
291            }
292        }
293    }
294
295    diff
296}
297
298/// Compare a snapshot against the current database state.
299///
300/// Returns a diff where:
301/// - "added" = records in DB but not in snapshot
302/// - "removed" = records in snapshot but not in DB
303/// - "modified" = records with same key but different values
304pub fn diff_snapshot_vs_database(snapshot: &Snapshot, db: &Database) -> Result<SnapshotDiff> {
305    let mut result = SnapshotDiff {
306        source_label: "snapshot".to_string(),
307        target_label: "database".to_string(),
308        tables: BTreeMap::new(),
309    };
310
311    // Get tables to compare
312    let tables: Vec<&str> = EXPORTED_TABLES
313        .iter()
314        .filter(|t| snapshot.tables.contains_key(**t))
315        .copied()
316        .collect();
317
318    let empty_vec: Vec<Value> = Vec::new();
319    for table_name in tables {
320        let key_columns = get_table_primary_key(table_name);
321        let snapshot_rows = snapshot.get_table(table_name).unwrap_or(&empty_vec);
322
323        // Query database for current state
324        let db_rows = query_table_as_json(db, table_name)?;
325
326        let table_diff = diff_table(snapshot_rows, &db_rows, key_columns);
327
328        if !table_diff.is_empty() {
329            result.tables.insert(table_name.to_string(), table_diff);
330        }
331    }
332
333    Ok(result)
334}
335
336/// Compare two snapshots.
337///
338/// Returns a diff where:
339/// - "added" = records in target but not in source
340/// - "removed" = records in source but not in target
341/// - "modified" = records with same key but different values
342pub fn diff_snapshots(source: &Snapshot, target: &Snapshot) -> SnapshotDiff {
343    let mut result = SnapshotDiff {
344        source_label: "source".to_string(),
345        target_label: "target".to_string(),
346        tables: BTreeMap::new(),
347    };
348
349    // Get all tables present in either snapshot
350    let mut all_tables: HashSet<&str> = source.tables.keys().map(|s| s.as_str()).collect();
351    all_tables.extend(target.tables.keys().map(|s| s.as_str()));
352
353    for table_name in all_tables {
354        let key_columns = get_table_primary_key(table_name);
355        let source_rows = source
356            .get_table(table_name)
357            .map(|v| v.as_slice())
358            .unwrap_or(&[]);
359        let target_rows = target
360            .get_table(table_name)
361            .map(|v| v.as_slice())
362            .unwrap_or(&[]);
363
364        let table_diff = diff_table(source_rows, target_rows, key_columns);
365
366        if !table_diff.is_empty() {
367            result.tables.insert(table_name.to_string(), table_diff);
368        }
369    }
370
371    result
372}
373
374/// Query a table and return rows as JSON values.
375///
376/// This is a generic query that returns all columns as a JSON object per row.
377fn query_table_as_json(db: &Database, table_name: &str) -> Result<Vec<Value>> {
378    use super::get_table_ordering;
379
380    let ordering = get_table_ordering(table_name);
381    let query = format!("SELECT * FROM {} {}", table_name, ordering);
382
383    db.with_conn(|conn| {
384        let mut stmt = conn.prepare(&query)?;
385        let column_names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
386
387        let rows: Vec<Value> = stmt
388            .query_map([], |row| {
389                let mut obj = serde_json::Map::new();
390                for (i, col_name) in column_names.iter().enumerate() {
391                    let value = row_value_to_json(row, i)?;
392                    obj.insert(col_name.clone(), value);
393                }
394                Ok(Value::Object(obj))
395            })?
396            .filter_map(|r| r.ok())
397            .collect();
398
399        Ok(rows)
400    })
401}
402
403/// Convert a SQLite row value to JSON.
404fn row_value_to_json(row: &rusqlite::Row, idx: usize) -> rusqlite::Result<Value> {
405    use rusqlite::types::ValueRef;
406
407    match row.get_ref(idx)? {
408        ValueRef::Null => Ok(Value::Null),
409        ValueRef::Integer(i) => Ok(Value::Number(i.into())),
410        ValueRef::Real(f) => Ok(serde_json::Number::from_f64(f)
411            .map(Value::Number)
412            .unwrap_or(Value::Null)),
413        ValueRef::Text(s) => {
414            let text = String::from_utf8_lossy(s).to_string();
415            Ok(Value::String(text))
416        }
417        ValueRef::Blob(b) => {
418            // Encode blob as base64
419            use base64::{Engine, engine::general_purpose::STANDARD};
420            Ok(Value::String(STANDARD.encode(b)))
421        }
422    }
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428    use serde_json::json;
429
430    #[test]
431    fn test_extract_key_single() {
432        let record = json!({"id": "task-1", "title": "Test"});
433        let key = extract_key(&record, &["id"]);
434        assert_eq!(key, json!("task-1"));
435    }
436
437    #[test]
438    fn test_extract_key_composite() {
439        let record = json!({
440            "from_task_id": "a",
441            "to_task_id": "b",
442            "dep_type": "blocks"
443        });
444        let key = extract_key(&record, &["from_task_id", "to_task_id", "dep_type"]);
445        assert_eq!(key, json!(["a", "b", "blocks"]));
446    }
447
448    #[test]
449    fn test_diff_records() {
450        let source = json!({
451            "id": "task-1",
452            "title": "Old Title",
453            "status": "pending"
454        });
455        let target = json!({
456            "id": "task-1",
457            "title": "New Title",
458            "status": "pending"
459        });
460
461        let changes = diff_records(&source, &target, &["id"]);
462        assert_eq!(changes.len(), 1);
463        assert_eq!(changes[0].field, "title");
464        assert_eq!(changes[0].old_value, json!("Old Title"));
465        assert_eq!(changes[0].new_value, json!("New Title"));
466    }
467
468    #[test]
469    fn test_diff_table() {
470        let source = vec![
471            json!({"id": "1", "title": "Keep"}),
472            json!({"id": "2", "title": "Remove"}),
473            json!({"id": "3", "title": "Old"}),
474        ];
475        let target = vec![
476            json!({"id": "1", "title": "Keep"}),
477            json!({"id": "3", "title": "New"}),
478            json!({"id": "4", "title": "Added"}),
479        ];
480
481        let diff = diff_table(&source, &target, &["id"]);
482
483        assert_eq!(diff.added.len(), 1);
484        assert_eq!(diff.removed.len(), 1);
485        assert_eq!(diff.modified.len(), 1);
486
487        assert_eq!(diff.added[0]["id"], json!("4"));
488        assert_eq!(diff.removed[0]["id"], json!("2"));
489        assert_eq!(diff.modified[0].key, json!("3"));
490    }
491
492    #[test]
493    fn test_diff_snapshots() {
494        let mut source = Snapshot::new();
495        source.tables.insert(
496            "tasks".to_string(),
497            vec![
498                json!({"id": "1", "title": "Task 1"}),
499                json!({"id": "2", "title": "Task 2"}),
500            ],
501        );
502
503        let mut target = Snapshot::new();
504        target.tables.insert(
505            "tasks".to_string(),
506            vec![
507                json!({"id": "1", "title": "Task 1 Updated"}),
508                json!({"id": "3", "title": "Task 3"}),
509            ],
510        );
511
512        let diff = diff_snapshots(&source, &target);
513
514        assert!(!diff.is_empty());
515        let tasks_diff = diff.tables.get("tasks").unwrap();
516        assert_eq!(tasks_diff.added.len(), 1);
517        assert_eq!(tasks_diff.removed.len(), 1);
518        assert_eq!(tasks_diff.modified.len(), 1);
519    }
520
521    #[test]
522    fn test_values_equal() {
523        assert!(values_equal(&json!(1), &json!(1)));
524        assert!(values_equal(&json!(1.0), &json!(1.0)));
525        assert!(values_equal(&json!("a"), &json!("a")));
526        assert!(!values_equal(&json!(1), &json!(2)));
527        assert!(!values_equal(&json!("a"), &json!("b")));
528    }
529
530    #[test]
531    fn test_snapshot_diff_display() {
532        let mut diff = SnapshotDiff {
533            source_label: "old.json".to_string(),
534            target_label: "new.json".to_string(),
535            tables: BTreeMap::new(),
536        };
537
538        diff.tables.insert(
539            "tasks".to_string(),
540            TableDiff {
541                added: vec![json!({"id": "new-task", "title": "New Task"})],
542                removed: vec![],
543                modified: vec![],
544            },
545        );
546
547        let output = format!("{}", diff);
548        assert!(output.contains("old.json -> new.json"));
549        assert!(output.contains("Table: tasks"));
550        assert!(output.contains("Added (1)"));
551    }
552}