Skip to main content

reflex/pulse/
diff.rs

1//! Diff engine: cross-snapshot structural comparison
2//!
3//! Takes two snapshot databases and produces a deterministic delta of all
4//! structural changes: files added/removed/modified, dependency edges
5//! added/removed, hotspot shifts, cycle changes, and threshold alerts.
6
7use anyhow::{Context, Result};
8use rusqlite::Connection;
9use serde::{Deserialize, Serialize};
10use std::collections::HashSet;
11use std::path::Path;
12
13use crate::dependency::DependencyIndex;
14
15/// A file that was added or removed
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct FileDelta {
18    pub path: String,
19    pub language: Option<String>,
20    pub line_count: usize,
21}
22
23/// A file that was modified between snapshots
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct FileModDelta {
26    pub path: String,
27    pub language: Option<String>,
28    pub old_line_count: usize,
29    pub new_line_count: usize,
30}
31
32/// A dependency edge change
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct EdgeDelta {
35    pub source_path: String,
36    pub target_path: String,
37    pub import_type: String,
38}
39
40/// A hotspot that changed fan-in
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct HotspotDelta {
43    pub path: String,
44    pub old_fan_in: usize,
45    pub new_fan_in: usize,
46}
47
48/// Changes in disconnected components
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct IslandDelta {
51    pub old_count: usize,
52    pub new_count: usize,
53}
54
55/// Module-level metrics change
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct ModuleMetricsDelta {
58    pub module_path: String,
59    pub old_file_count: Option<usize>,
60    pub new_file_count: Option<usize>,
61    pub old_total_lines: Option<usize>,
62    pub new_total_lines: Option<usize>,
63}
64
65/// A threshold alert triggered by a metric crossing a boundary
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct ThresholdAlert {
68    pub severity: AlertSeverity,
69    pub category: String,
70    pub message: String,
71    pub path: Option<String>,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub enum AlertSeverity {
76    Warning,
77    Critical,
78}
79
80/// Complete diff between two snapshots
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct SnapshotDiff {
83    pub baseline_id: String,
84    pub current_id: String,
85    pub baseline_timestamp: String,
86    pub current_timestamp: String,
87
88    // File-level changes
89    pub files_added: Vec<FileDelta>,
90    pub files_removed: Vec<FileDelta>,
91    pub files_modified: Vec<FileModDelta>,
92
93    // Dependency graph changes
94    pub edges_added: Vec<EdgeDelta>,
95    pub edges_removed: Vec<EdgeDelta>,
96
97    // Structural analysis deltas
98    pub hotspot_changes: Vec<HotspotDelta>,
99    pub new_cycles: Vec<Vec<String>>,
100    pub resolved_cycles: Vec<Vec<String>>,
101    pub island_changes: IslandDelta,
102
103    // Module metrics
104    pub module_changes: Vec<ModuleMetricsDelta>,
105
106    // Threshold alerts
107    pub threshold_alerts: Vec<ThresholdAlert>,
108
109    // Summary stats
110    pub summary: DiffSummary,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct DiffSummary {
115    pub files_added: usize,
116    pub files_removed: usize,
117    pub files_modified: usize,
118    pub edges_added: usize,
119    pub edges_removed: usize,
120    pub net_line_change: i64,
121}
122
123/// Compute the diff between two snapshot databases
124pub fn compute_diff(
125    baseline_path: &Path,
126    current_path: &Path,
127    thresholds: &super::config::ThresholdConfig,
128) -> Result<SnapshotDiff> {
129    // Open in-memory connection and attach both snapshots
130    let conn = Connection::open_in_memory()
131        .context("Failed to open in-memory database")?;
132
133    conn.execute(
134        "ATTACH DATABASE ?1 AS baseline",
135        [baseline_path.to_str().unwrap()],
136    )?;
137    conn.execute(
138        "ATTACH DATABASE ?1 AS current",
139        [current_path.to_str().unwrap()],
140    )?;
141
142    // Read metadata
143    let baseline_id = read_meta(&conn, "baseline", "timestamp")?;
144    let current_id = read_meta(&conn, "current", "timestamp")?;
145
146    // File diffs
147    let files_added = query_file_deltas(&conn,
148        "SELECT c.path, c.language, COALESCE(c.line_count, 0)
149         FROM current.files c
150         LEFT JOIN baseline.files b ON c.path = b.path
151         WHERE b.path IS NULL
152         ORDER BY c.path"
153    )?;
154
155    let files_removed = query_file_deltas(&conn,
156        "SELECT b.path, b.language, COALESCE(b.line_count, 0)
157         FROM baseline.files b
158         LEFT JOIN current.files c ON b.path = c.path
159         WHERE c.path IS NULL
160         ORDER BY b.path"
161    )?;
162
163    let files_modified = query_file_mod_deltas(&conn,
164        "SELECT b.path, b.language, COALESCE(b.line_count, 0), COALESCE(c.line_count, 0)
165         FROM baseline.files b
166         JOIN current.files c ON b.path = c.path
167         WHERE b.line_count != c.line_count OR b.language != c.language
168         ORDER BY b.path"
169    )?;
170
171    // Edge diffs (compare by path, not by file ID)
172    let edges_added = query_edge_deltas(&conn,
173        "SELECT sf.path, tf.path, ce.import_type
174         FROM current.dependency_edges ce
175         JOIN current.files sf ON ce.source_file_id = sf.id
176         JOIN current.files tf ON ce.target_file_id = tf.id
177         WHERE NOT EXISTS (
178             SELECT 1 FROM baseline.dependency_edges be
179             JOIN baseline.files bsf ON be.source_file_id = bsf.id
180             JOIN baseline.files btf ON be.target_file_id = btf.id
181             WHERE bsf.path = sf.path AND btf.path = tf.path
182         )
183         ORDER BY sf.path, tf.path"
184    )?;
185
186    let edges_removed = query_edge_deltas(&conn,
187        "SELECT sf.path, tf.path, be.import_type
188         FROM baseline.dependency_edges be
189         JOIN baseline.files sf ON be.source_file_id = sf.id
190         JOIN baseline.files tf ON be.target_file_id = tf.id
191         WHERE NOT EXISTS (
192             SELECT 1 FROM current.dependency_edges ce
193             JOIN current.files csf ON ce.source_file_id = csf.id
194             JOIN current.files ctf ON ce.target_file_id = ctf.id
195             WHERE csf.path = sf.path AND ctf.path = tf.path
196         )
197         ORDER BY sf.path, tf.path"
198    )?;
199
200    // Module metric diffs
201    let module_changes = query_module_deltas(&conn)?;
202
203    // Hotspot analysis via DependencyIndex on each snapshot
204    let baseline_deps = DependencyIndex::from_db_path(baseline_path);
205    let current_deps = DependencyIndex::from_db_path(current_path);
206
207    let baseline_hotspots = baseline_deps.find_hotspots(None, 1).unwrap_or_default();
208    let current_hotspots = current_deps.find_hotspots(None, 1).unwrap_or_default();
209
210    let hotspot_changes = compute_hotspot_changes(
211        &baseline_deps, &current_deps,
212        &baseline_hotspots, &current_hotspots,
213    );
214
215    // Cycle analysis
216    let baseline_cycles = baseline_deps.detect_circular_dependencies().unwrap_or_default();
217    let current_cycles = current_deps.detect_circular_dependencies().unwrap_or_default();
218
219    let (new_cycles, resolved_cycles) = compute_cycle_changes(
220        &baseline_deps, &current_deps,
221        &baseline_cycles, &current_cycles,
222    );
223
224    // Island analysis
225    let baseline_islands = baseline_deps.find_islands().unwrap_or_default();
226    let current_islands = current_deps.find_islands().unwrap_or_default();
227
228    let island_changes = IslandDelta {
229        old_count: baseline_islands.len(),
230        new_count: current_islands.len(),
231    };
232
233    // Compute net line change
234    let net_line_change: i64 = files_added.iter().map(|f| f.line_count as i64).sum::<i64>()
235        - files_removed.iter().map(|f| f.line_count as i64).sum::<i64>()
236        + files_modified.iter().map(|f| f.new_line_count as i64 - f.old_line_count as i64).sum::<i64>();
237
238    // Threshold alerts
239    let threshold_alerts = compute_threshold_alerts(
240        thresholds,
241        &current_hotspots,
242        &current_deps,
243        &current_cycles,
244        &module_changes,
245        &files_modified,
246    );
247
248    let summary = DiffSummary {
249        files_added: files_added.len(),
250        files_removed: files_removed.len(),
251        files_modified: files_modified.len(),
252        edges_added: edges_added.len(),
253        edges_removed: edges_removed.len(),
254        net_line_change,
255    };
256
257    Ok(SnapshotDiff {
258        baseline_id: baseline_id.clone(),
259        current_id: current_id.clone(),
260        baseline_timestamp: baseline_id,
261        current_timestamp: current_id,
262        files_added,
263        files_removed,
264        files_modified,
265        edges_added,
266        edges_removed,
267        hotspot_changes,
268        new_cycles,
269        resolved_cycles,
270        island_changes,
271        module_changes,
272        threshold_alerts,
273        summary,
274    })
275}
276
277fn read_meta(conn: &Connection, db: &str, key: &str) -> Result<String> {
278    let sql = format!("SELECT value FROM {}.metadata WHERE key = ?1", db);
279    conn.query_row(&sql, [key], |row| row.get(0))
280        .unwrap_or_else(|_| "unknown".to_string())
281        .pipe(Ok)
282}
283
284// Helper trait for pipe syntax
285trait Pipe: Sized {
286    fn pipe<T>(self, f: impl FnOnce(Self) -> T) -> T {
287        f(self)
288    }
289}
290impl<T> Pipe for T {}
291
292fn query_file_deltas(conn: &Connection, sql: &str) -> Result<Vec<FileDelta>> {
293    let mut stmt = conn.prepare(sql)?;
294    let results = stmt.query_map([], |row| {
295        Ok(FileDelta {
296            path: row.get(0)?,
297            language: row.get(1)?,
298            line_count: row.get::<_, i64>(2)? as usize,
299        })
300    })?
301    .collect::<Result<Vec<_>, _>>()?;
302    Ok(results)
303}
304
305fn query_file_mod_deltas(conn: &Connection, sql: &str) -> Result<Vec<FileModDelta>> {
306    let mut stmt = conn.prepare(sql)?;
307    let results = stmt.query_map([], |row| {
308        Ok(FileModDelta {
309            path: row.get(0)?,
310            language: row.get(1)?,
311            old_line_count: row.get::<_, i64>(2)? as usize,
312            new_line_count: row.get::<_, i64>(3)? as usize,
313        })
314    })?
315    .collect::<Result<Vec<_>, _>>()?;
316    Ok(results)
317}
318
319fn query_edge_deltas(conn: &Connection, sql: &str) -> Result<Vec<EdgeDelta>> {
320    let mut stmt = conn.prepare(sql)?;
321    let results = stmt.query_map([], |row| {
322        Ok(EdgeDelta {
323            source_path: row.get(0)?,
324            target_path: row.get(1)?,
325            import_type: row.get(2)?,
326        })
327    })?
328    .collect::<Result<Vec<_>, _>>()?;
329    Ok(results)
330}
331
332fn query_module_deltas(conn: &Connection) -> Result<Vec<ModuleMetricsDelta>> {
333    // SQLite doesn't support FULL OUTER JOIN directly, simulate with UNION
334    let sql = "SELECT module_path, old_file_count, new_file_count, old_total_lines, new_total_lines FROM (
335        SELECT
336            COALESCE(b.module_path, c.module_path) AS module_path,
337            b.file_count AS old_file_count, c.file_count AS new_file_count,
338            b.total_lines AS old_total_lines, c.total_lines AS new_total_lines
339        FROM baseline.metrics b
340        LEFT JOIN current.metrics c ON b.module_path = c.module_path
341        UNION ALL
342        SELECT
343            c.module_path,
344            NULL AS old_file_count, c.file_count AS new_file_count,
345            NULL AS old_total_lines, c.total_lines AS new_total_lines
346        FROM current.metrics c
347        LEFT JOIN baseline.metrics b ON c.module_path = b.module_path
348        WHERE b.module_path IS NULL
349    )
350    WHERE old_file_count IS NULL OR new_file_count IS NULL
351       OR old_file_count != new_file_count OR old_total_lines != new_total_lines
352    ORDER BY module_path";
353
354    let mut stmt = conn.prepare(sql)?;
355    let results = stmt.query_map([], |row| {
356        Ok(ModuleMetricsDelta {
357            module_path: row.get(0)?,
358            old_file_count: row.get::<_, Option<i64>>(1)?.map(|v| v as usize),
359            new_file_count: row.get::<_, Option<i64>>(2)?.map(|v| v as usize),
360            old_total_lines: row.get::<_, Option<i64>>(3)?.map(|v| v as usize),
361            new_total_lines: row.get::<_, Option<i64>>(4)?.map(|v| v as usize),
362        })
363    })?
364    .collect::<Result<Vec<_>, _>>()?;
365    Ok(results)
366}
367
368fn compute_hotspot_changes(
369    baseline_deps: &DependencyIndex,
370    current_deps: &DependencyIndex,
371    baseline_hotspots: &[(i64, usize)],
372    current_hotspots: &[(i64, usize)],
373) -> Vec<HotspotDelta> {
374    let mut changes = Vec::new();
375
376    // Build path-based maps for comparison
377    let baseline_map: std::collections::HashMap<String, usize> = baseline_hotspots.iter()
378        .filter_map(|(id, count)| {
379            baseline_deps.get_file_paths(&[*id]).ok()
380                .and_then(|paths| paths.get(id).map(|p| (p.clone(), *count)))
381        })
382        .collect();
383
384    let current_map: std::collections::HashMap<String, usize> = current_hotspots.iter()
385        .filter_map(|(id, count)| {
386            current_deps.get_file_paths(&[*id]).ok()
387                .and_then(|paths| paths.get(id).map(|p| (p.clone(), *count)))
388        })
389        .collect();
390
391    // Find changes
392    for (path, &new_count) in &current_map {
393        let old_count = baseline_map.get(path).copied().unwrap_or(0);
394        if old_count != new_count {
395            changes.push(HotspotDelta {
396                path: path.clone(),
397                old_fan_in: old_count,
398                new_fan_in: new_count,
399            });
400        }
401    }
402
403    // Sort by fan-in change magnitude
404    changes.sort_by(|a, b| {
405        let a_delta = (a.new_fan_in as i64 - a.old_fan_in as i64).unsigned_abs();
406        let b_delta = (b.new_fan_in as i64 - b.old_fan_in as i64).unsigned_abs();
407        b_delta.cmp(&a_delta)
408    });
409
410    changes
411}
412
413fn compute_cycle_changes(
414    baseline_deps: &DependencyIndex,
415    current_deps: &DependencyIndex,
416    baseline_cycles: &[Vec<i64>],
417    current_cycles: &[Vec<i64>],
418) -> (Vec<Vec<String>>, Vec<Vec<String>>) {
419    // Convert cycles to path-based representation for comparison
420    let to_path_cycle = |deps: &DependencyIndex, cycle: &[i64]| -> Option<Vec<String>> {
421        let paths = deps.get_file_paths(cycle).ok()?;
422        let path_cycle: Vec<String> = cycle.iter()
423            .filter_map(|id| paths.get(id).cloned())
424            .collect();
425        if path_cycle.len() == cycle.len() { Some(path_cycle) } else { None }
426    };
427
428    let baseline_set: HashSet<Vec<String>> = baseline_cycles.iter()
429        .filter_map(|c| to_path_cycle(baseline_deps, c))
430        .map(|mut c| { c.sort(); c })
431        .collect();
432
433    let current_set: HashSet<Vec<String>> = current_cycles.iter()
434        .filter_map(|c| to_path_cycle(current_deps, c))
435        .map(|mut c| { c.sort(); c })
436        .collect();
437
438    let new_cycles: Vec<Vec<String>> = current_set.difference(&baseline_set).cloned().collect();
439    let resolved_cycles: Vec<Vec<String>> = baseline_set.difference(&current_set).cloned().collect();
440
441    (new_cycles, resolved_cycles)
442}
443
444fn compute_threshold_alerts(
445    thresholds: &super::config::ThresholdConfig,
446    current_hotspots: &[(i64, usize)],
447    current_deps: &DependencyIndex,
448    current_cycles: &[Vec<i64>],
449    module_changes: &[ModuleMetricsDelta],
450    files_modified: &[FileModDelta],
451) -> Vec<ThresholdAlert> {
452    let mut alerts = Vec::new();
453
454    // Fan-in alerts
455    for &(file_id, count) in current_hotspots {
456        if count >= thresholds.fan_in_critical {
457            let path = current_deps.get_file_paths(&[file_id]).ok()
458                .and_then(|paths| paths.get(&file_id).cloned());
459            alerts.push(ThresholdAlert {
460                severity: AlertSeverity::Critical,
461                category: "fan_in".to_string(),
462                message: format!("Critical fan-in: {} imports ({} threshold)", count, thresholds.fan_in_critical),
463                path,
464            });
465        } else if count >= thresholds.fan_in_warning {
466            let path = current_deps.get_file_paths(&[file_id]).ok()
467                .and_then(|paths| paths.get(&file_id).cloned());
468            alerts.push(ThresholdAlert {
469                severity: AlertSeverity::Warning,
470                category: "fan_in".to_string(),
471                message: format!("High fan-in: {} imports ({} threshold)", count, thresholds.fan_in_warning),
472                path,
473            });
474        }
475    }
476
477    // Cycle length alerts
478    for cycle in current_cycles {
479        if cycle.len() >= thresholds.cycle_length {
480            alerts.push(ThresholdAlert {
481                severity: AlertSeverity::Warning,
482                category: "circular_dependency".to_string(),
483                message: format!("Circular dependency chain of length {}", cycle.len()),
484                path: None,
485            });
486        }
487    }
488
489    // Module size alerts
490    for change in module_changes {
491        if let Some(count) = change.new_file_count {
492            if count >= thresholds.module_file_count {
493                alerts.push(ThresholdAlert {
494                    severity: AlertSeverity::Warning,
495                    category: "module_size".to_string(),
496                    message: format!("Module has {} files (threshold: {})", count, thresholds.module_file_count),
497                    path: Some(change.module_path.clone()),
498                });
499            }
500        }
501    }
502
503    // Line count growth alerts
504    for file in files_modified {
505        if file.old_line_count > 0 {
506            let growth = file.new_line_count as f64 / file.old_line_count as f64;
507            if growth >= thresholds.line_count_growth {
508                alerts.push(ThresholdAlert {
509                    severity: AlertSeverity::Warning,
510                    category: "line_growth".to_string(),
511                    message: format!(
512                        "Line count grew {:.1}x ({} -> {})",
513                        growth, file.old_line_count, file.new_line_count
514                    ),
515                    path: Some(file.path.clone()),
516                });
517            }
518        }
519    }
520
521    alerts
522}