Skip to main content

reflex/pulse/
diff.rs

1//! Diff engine: cross-snapshot structural comparison
2//!
3//! Takes two snapshot databases and produces a deterministic delta of all
4//! structural changes: files added/removed/modified, dependency edges
5//! added/removed, hotspot shifts, cycle changes, and threshold alerts.
6
7use anyhow::{Context, Result};
8use rusqlite::Connection;
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::path::Path;
12
13use crate::dependency::DependencyIndex;
14
15/// A file that was added or removed
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct FileDelta {
18    pub path: String,
19    pub language: Option<String>,
20    pub line_count: usize,
21}
22
23/// A file that was modified between snapshots
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct FileModDelta {
26    pub path: String,
27    pub language: Option<String>,
28    pub old_line_count: usize,
29    pub new_line_count: usize,
30}
31
32/// A dependency edge change
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct EdgeDelta {
35    pub source_path: String,
36    pub target_path: String,
37    pub import_type: String,
38}
39
40/// A hotspot that changed fan-in
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct HotspotDelta {
43    pub path: String,
44    pub old_fan_in: usize,
45    pub new_fan_in: usize,
46}
47
48/// Changes in disconnected components
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct IslandDelta {
51    pub old_count: usize,
52    pub new_count: usize,
53}
54
55/// Module-level metrics change
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct ModuleMetricsDelta {
58    pub module_path: String,
59    pub old_file_count: Option<usize>,
60    pub new_file_count: Option<usize>,
61    pub old_total_lines: Option<usize>,
62    pub new_total_lines: Option<usize>,
63}
64
65/// A threshold alert triggered by a metric crossing a boundary
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct ThresholdAlert {
68    pub severity: AlertSeverity,
69    pub category: String,
70    pub message: String,
71    pub path: Option<String>,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub enum AlertSeverity {
76    Warning,
77    Critical,
78}
79
80/// Complete diff between two snapshots
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct SnapshotDiff {
83    pub baseline_id: String,
84    pub current_id: String,
85    pub baseline_timestamp: String,
86    pub current_timestamp: String,
87
88    // File-level changes
89    pub files_added: Vec<FileDelta>,
90    pub files_removed: Vec<FileDelta>,
91    pub files_modified: Vec<FileModDelta>,
92
93    // Dependency graph changes
94    pub edges_added: Vec<EdgeDelta>,
95    pub edges_removed: Vec<EdgeDelta>,
96
97    // Structural analysis deltas
98    pub hotspot_changes: Vec<HotspotDelta>,
99    pub new_cycles: Vec<Vec<String>>,
100    pub resolved_cycles: Vec<Vec<String>>,
101    pub island_changes: IslandDelta,
102
103    // Module metrics
104    pub module_changes: Vec<ModuleMetricsDelta>,
105
106    // Threshold alerts
107    pub threshold_alerts: Vec<ThresholdAlert>,
108
109    // Summary stats
110    pub summary: DiffSummary,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct DiffSummary {
115    pub files_added: usize,
116    pub files_removed: usize,
117    pub files_modified: usize,
118    pub edges_added: usize,
119    pub edges_removed: usize,
120    pub net_line_change: i64,
121}
122
123/// Compute the diff between two snapshot databases
124pub fn compute_diff(
125    baseline_path: &Path,
126    current_path: &Path,
127    thresholds: &super::config::ThresholdConfig,
128) -> Result<SnapshotDiff> {
129    // Open in-memory connection and attach both snapshots
130    let conn = Connection::open_in_memory().context("Failed to open in-memory database")?;
131
132    conn.execute(
133        "ATTACH DATABASE ?1 AS baseline",
134        [baseline_path.to_str().unwrap()],
135    )?;
136    conn.execute(
137        "ATTACH DATABASE ?1 AS current",
138        [current_path.to_str().unwrap()],
139    )?;
140
141    // Read metadata
142    let baseline_id = read_meta(&conn, "baseline", "timestamp")?;
143    let current_id = read_meta(&conn, "current", "timestamp")?;
144
145    // File diffs
146    let files_added = query_file_deltas(
147        &conn,
148        "SELECT c.path, c.language, COALESCE(c.line_count, 0)
149         FROM current.files c
150         LEFT JOIN baseline.files b ON c.path = b.path
151         WHERE b.path IS NULL
152         ORDER BY c.path",
153    )?;
154
155    let files_removed = query_file_deltas(
156        &conn,
157        "SELECT b.path, b.language, COALESCE(b.line_count, 0)
158         FROM baseline.files b
159         LEFT JOIN current.files c ON b.path = c.path
160         WHERE c.path IS NULL
161         ORDER BY b.path",
162    )?;
163
164    let files_modified = query_file_mod_deltas(
165        &conn,
166        "SELECT b.path, b.language, COALESCE(b.line_count, 0), COALESCE(c.line_count, 0)
167         FROM baseline.files b
168         JOIN current.files c ON b.path = c.path
169         WHERE b.line_count != c.line_count OR b.language != c.language
170         ORDER BY b.path",
171    )?;
172
173    // Edge diffs (compare by path, not by file ID)
174    let edges_added = query_edge_deltas(
175        &conn,
176        "SELECT sf.path, tf.path, ce.import_type
177         FROM current.dependency_edges ce
178         JOIN current.files sf ON ce.source_file_id = sf.id
179         JOIN current.files tf ON ce.target_file_id = tf.id
180         WHERE NOT EXISTS (
181             SELECT 1 FROM baseline.dependency_edges be
182             JOIN baseline.files bsf ON be.source_file_id = bsf.id
183             JOIN baseline.files btf ON be.target_file_id = btf.id
184             WHERE bsf.path = sf.path AND btf.path = tf.path
185         )
186         ORDER BY sf.path, tf.path",
187    )?;
188
189    let edges_removed = query_edge_deltas(
190        &conn,
191        "SELECT sf.path, tf.path, be.import_type
192         FROM baseline.dependency_edges be
193         JOIN baseline.files sf ON be.source_file_id = sf.id
194         JOIN baseline.files tf ON be.target_file_id = tf.id
195         WHERE NOT EXISTS (
196             SELECT 1 FROM current.dependency_edges ce
197             JOIN current.files csf ON ce.source_file_id = csf.id
198             JOIN current.files ctf ON ce.target_file_id = ctf.id
199             WHERE csf.path = sf.path AND ctf.path = tf.path
200         )
201         ORDER BY sf.path, tf.path",
202    )?;
203
204    // Module metric diffs
205    let module_changes = query_module_deltas(&conn)?;
206
207    // Hotspot analysis via DependencyIndex on each snapshot
208    let baseline_deps = DependencyIndex::from_db_path(baseline_path);
209    let current_deps = DependencyIndex::from_db_path(current_path);
210
211    let baseline_hotspots = baseline_deps.find_hotspots(None, 1).unwrap_or_default();
212    let current_hotspots = current_deps.find_hotspots(None, 1).unwrap_or_default();
213
214    let hotspot_changes = compute_hotspot_changes(
215        &baseline_deps,
216        &current_deps,
217        &baseline_hotspots,
218        &current_hotspots,
219    );
220
221    // Cycle analysis
222    let baseline_cycles = baseline_deps
223        .detect_circular_dependencies()
224        .unwrap_or_default();
225    let current_cycles = current_deps
226        .detect_circular_dependencies()
227        .unwrap_or_default();
228
229    let (new_cycles, resolved_cycles) = compute_cycle_changes(
230        &baseline_deps,
231        &current_deps,
232        &baseline_cycles,
233        &current_cycles,
234    );
235
236    // Island analysis
237    let baseline_islands = baseline_deps.find_islands().unwrap_or_default();
238    let current_islands = current_deps.find_islands().unwrap_or_default();
239
240    let island_changes = IslandDelta {
241        old_count: baseline_islands.len(),
242        new_count: current_islands.len(),
243    };
244
245    // Compute net line change
246    let net_line_change: i64 = files_added.iter().map(|f| f.line_count as i64).sum::<i64>()
247        - files_removed
248            .iter()
249            .map(|f| f.line_count as i64)
250            .sum::<i64>()
251        + files_modified
252            .iter()
253            .map(|f| f.new_line_count as i64 - f.old_line_count as i64)
254            .sum::<i64>();
255
256    // Threshold alerts
257    let threshold_alerts = compute_threshold_alerts(
258        thresholds,
259        &current_hotspots,
260        &current_deps,
261        &current_cycles,
262        &module_changes,
263        &files_modified,
264    );
265
266    let summary = DiffSummary {
267        files_added: files_added.len(),
268        files_removed: files_removed.len(),
269        files_modified: files_modified.len(),
270        edges_added: edges_added.len(),
271        edges_removed: edges_removed.len(),
272        net_line_change,
273    };
274
275    Ok(SnapshotDiff {
276        baseline_id: baseline_id.clone(),
277        current_id: current_id.clone(),
278        baseline_timestamp: baseline_id,
279        current_timestamp: current_id,
280        files_added,
281        files_removed,
282        files_modified,
283        edges_added,
284        edges_removed,
285        hotspot_changes,
286        new_cycles,
287        resolved_cycles,
288        island_changes,
289        module_changes,
290        threshold_alerts,
291        summary,
292    })
293}
294
295fn read_meta(conn: &Connection, db: &str, key: &str) -> Result<String> {
296    let sql = format!("SELECT value FROM {}.metadata WHERE key = ?1", db);
297    conn.query_row(&sql, [key], |row| row.get(0))
298        .unwrap_or_else(|_| "unknown".to_string())
299        .pipe(Ok)
300}
301
302// Helper trait for pipe syntax
303trait Pipe: Sized {
304    fn pipe<T>(self, f: impl FnOnce(Self) -> T) -> T {
305        f(self)
306    }
307}
308impl<T> Pipe for T {}
309
310fn query_file_deltas(conn: &Connection, sql: &str) -> Result<Vec<FileDelta>> {
311    let mut stmt = conn.prepare(sql)?;
312    let results = stmt
313        .query_map([], |row| {
314            Ok(FileDelta {
315                path: row.get(0)?,
316                language: row.get(1)?,
317                line_count: row.get::<_, i64>(2)? as usize,
318            })
319        })?
320        .collect::<Result<Vec<_>, _>>()?;
321    Ok(results)
322}
323
324fn query_file_mod_deltas(conn: &Connection, sql: &str) -> Result<Vec<FileModDelta>> {
325    let mut stmt = conn.prepare(sql)?;
326    let results = stmt
327        .query_map([], |row| {
328            Ok(FileModDelta {
329                path: row.get(0)?,
330                language: row.get(1)?,
331                old_line_count: row.get::<_, i64>(2)? as usize,
332                new_line_count: row.get::<_, i64>(3)? as usize,
333            })
334        })?
335        .collect::<Result<Vec<_>, _>>()?;
336    Ok(results)
337}
338
339fn query_edge_deltas(conn: &Connection, sql: &str) -> Result<Vec<EdgeDelta>> {
340    let mut stmt = conn.prepare(sql)?;
341    let results = stmt
342        .query_map([], |row| {
343            Ok(EdgeDelta {
344                source_path: row.get(0)?,
345                target_path: row.get(1)?,
346                import_type: row.get(2)?,
347            })
348        })?
349        .collect::<Result<Vec<_>, _>>()?;
350    Ok(results)
351}
352
353fn query_module_deltas(conn: &Connection) -> Result<Vec<ModuleMetricsDelta>> {
354    // SQLite doesn't support FULL OUTER JOIN directly, simulate with UNION
355    let sql = "SELECT module_path, old_file_count, new_file_count, old_total_lines, new_total_lines FROM (
356        SELECT
357            COALESCE(b.module_path, c.module_path) AS module_path,
358            b.file_count AS old_file_count, c.file_count AS new_file_count,
359            b.total_lines AS old_total_lines, c.total_lines AS new_total_lines
360        FROM baseline.metrics b
361        LEFT JOIN current.metrics c ON b.module_path = c.module_path
362        UNION ALL
363        SELECT
364            c.module_path,
365            NULL AS old_file_count, c.file_count AS new_file_count,
366            NULL AS old_total_lines, c.total_lines AS new_total_lines
367        FROM current.metrics c
368        LEFT JOIN baseline.metrics b ON c.module_path = b.module_path
369        WHERE b.module_path IS NULL
370    )
371    WHERE old_file_count IS NULL OR new_file_count IS NULL
372       OR old_file_count != new_file_count OR old_total_lines != new_total_lines
373    ORDER BY module_path";
374
375    let mut stmt = conn.prepare(sql)?;
376    let results = stmt
377        .query_map([], |row| {
378            Ok(ModuleMetricsDelta {
379                module_path: row.get(0)?,
380                old_file_count: row.get::<_, Option<i64>>(1)?.map(|v| v as usize),
381                new_file_count: row.get::<_, Option<i64>>(2)?.map(|v| v as usize),
382                old_total_lines: row.get::<_, Option<i64>>(3)?.map(|v| v as usize),
383                new_total_lines: row.get::<_, Option<i64>>(4)?.map(|v| v as usize),
384            })
385        })?
386        .collect::<Result<Vec<_>, _>>()?;
387    Ok(results)
388}
389
390fn compute_hotspot_changes(
391    baseline_deps: &DependencyIndex,
392    current_deps: &DependencyIndex,
393    baseline_hotspots: &[(i64, usize)],
394    current_hotspots: &[(i64, usize)],
395) -> Vec<HotspotDelta> {
396    let mut changes = Vec::new();
397
398    // Build path-based maps for comparison
399    let baseline_map: std::collections::HashMap<String, usize> = baseline_hotspots
400        .iter()
401        .filter_map(|(id, count)| {
402            baseline_deps
403                .get_file_paths(&[*id])
404                .ok()
405                .and_then(|paths| paths.get(id).map(|p| (p.clone(), *count)))
406        })
407        .collect();
408
409    let current_map: std::collections::HashMap<String, usize> = current_hotspots
410        .iter()
411        .filter_map(|(id, count)| {
412            current_deps
413                .get_file_paths(&[*id])
414                .ok()
415                .and_then(|paths| paths.get(id).map(|p| (p.clone(), *count)))
416        })
417        .collect();
418
419    // Find changes
420    for (path, &new_count) in &current_map {
421        let old_count = baseline_map.get(path).copied().unwrap_or(0);
422        if old_count != new_count {
423            changes.push(HotspotDelta {
424                path: path.clone(),
425                old_fan_in: old_count,
426                new_fan_in: new_count,
427            });
428        }
429    }
430
431    // Sort by fan-in change magnitude
432    changes.sort_by(|a, b| {
433        let a_delta = (a.new_fan_in as i64 - a.old_fan_in as i64).unsigned_abs();
434        let b_delta = (b.new_fan_in as i64 - b.old_fan_in as i64).unsigned_abs();
435        b_delta.cmp(&a_delta)
436    });
437
438    changes
439}
440
441fn compute_cycle_changes(
442    baseline_deps: &DependencyIndex,
443    current_deps: &DependencyIndex,
444    baseline_cycles: &[Vec<i64>],
445    current_cycles: &[Vec<i64>],
446) -> (Vec<Vec<String>>, Vec<Vec<String>>) {
447    // Convert cycles to path-based representation for comparison
448    let to_path_cycle = |deps: &DependencyIndex, cycle: &[i64]| -> Option<Vec<String>> {
449        let paths = deps.get_file_paths(cycle).ok()?;
450        let path_cycle: Vec<String> = cycle
451            .iter()
452            .filter_map(|id| paths.get(id).cloned())
453            .collect();
454        if path_cycle.len() == cycle.len() {
455            Some(path_cycle)
456        } else {
457            None
458        }
459    };
460
461    let baseline_set: HashSet<Vec<String>> = baseline_cycles
462        .iter()
463        .filter_map(|c| to_path_cycle(baseline_deps, c))
464        .map(|mut c| {
465            c.sort();
466            c
467        })
468        .collect();
469
470    let current_set: HashSet<Vec<String>> = current_cycles
471        .iter()
472        .filter_map(|c| to_path_cycle(current_deps, c))
473        .map(|mut c| {
474            c.sort();
475            c
476        })
477        .collect();
478
479    let new_cycles: Vec<Vec<String>> = current_set.difference(&baseline_set).cloned().collect();
480    let resolved_cycles: Vec<Vec<String>> =
481        baseline_set.difference(&current_set).cloned().collect();
482
483    (new_cycles, resolved_cycles)
484}
485
486fn compute_threshold_alerts(
487    thresholds: &super::config::ThresholdConfig,
488    current_hotspots: &[(i64, usize)],
489    current_deps: &DependencyIndex,
490    current_cycles: &[Vec<i64>],
491    module_changes: &[ModuleMetricsDelta],
492    files_modified: &[FileModDelta],
493) -> Vec<ThresholdAlert> {
494    let mut alerts = Vec::new();
495
496    // Fan-in alerts
497    for &(file_id, count) in current_hotspots {
498        if count >= thresholds.fan_in_critical {
499            let path = current_deps
500                .get_file_paths(&[file_id])
501                .ok()
502                .and_then(|paths| paths.get(&file_id).cloned());
503            alerts.push(ThresholdAlert {
504                severity: AlertSeverity::Critical,
505                category: "fan_in".to_string(),
506                message: format!(
507                    "Critical fan-in: {} imports ({} threshold)",
508                    count, thresholds.fan_in_critical
509                ),
510                path,
511            });
512        } else if count >= thresholds.fan_in_warning {
513            let path = current_deps
514                .get_file_paths(&[file_id])
515                .ok()
516                .and_then(|paths| paths.get(&file_id).cloned());
517            alerts.push(ThresholdAlert {
518                severity: AlertSeverity::Warning,
519                category: "fan_in".to_string(),
520                message: format!(
521                    "High fan-in: {} imports ({} threshold)",
522                    count, thresholds.fan_in_warning
523                ),
524                path,
525            });
526        }
527    }
528
529    // Cycle length alerts
530    for cycle in current_cycles {
531        if cycle.len() >= thresholds.cycle_length {
532            alerts.push(ThresholdAlert {
533                severity: AlertSeverity::Warning,
534                category: "circular_dependency".to_string(),
535                message: format!("Circular dependency chain of length {}", cycle.len()),
536                path: None,
537            });
538        }
539    }
540
541    // Module size alerts
542    for change in module_changes {
543        if let Some(count) = change.new_file_count {
544            if count >= thresholds.module_file_count {
545                alerts.push(ThresholdAlert {
546                    severity: AlertSeverity::Warning,
547                    category: "module_size".to_string(),
548                    message: format!(
549                        "Module has {} files (threshold: {})",
550                        count, thresholds.module_file_count
551                    ),
552                    path: Some(change.module_path.clone()),
553                });
554            }
555        }
556    }
557
558    // Line count growth alerts
559    for file in files_modified {
560        if file.old_line_count > 0 {
561            let growth = file.new_line_count as f64 / file.old_line_count as f64;
562            if growth >= thresholds.line_count_growth {
563                alerts.push(ThresholdAlert {
564                    severity: AlertSeverity::Warning,
565                    category: "line_growth".to_string(),
566                    message: format!(
567                        "Line count grew {:.1}x ({} -> {})",
568                        growth, file.old_line_count, file.new_line_count
569                    ),
570                    path: Some(file.path.clone()),
571                });
572            }
573        }
574    }
575
576    alerts
577}