fossil-mcp 0.1.8

Multi-language static analysis toolkit with MCP server. Detects dead code, code clones, and AI scaffolding.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
//! Clone evolution tracking across codebase snapshots.
//!
//! Compares function fingerprints between two snapshots to detect how clone
//! relationships change over time: new clones appearing, existing clones diverging,
//! independent functions converging, or functions being added/removed entirely.
//!
//! The primary consumer is maintenance tooling that needs to flag diverging clones
//! (functions that were once similar but are drifting apart), since those represent
//! the highest risk of inconsistent bug fixes.

use std::collections::{HashMap, HashSet};

use super::fingerprint_store::{FingerprintStore, FunctionFingerprint, SnapshotId};
use super::minhash::MinHashDetector;

/// The type of evolution event detected between two snapshots.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EvolutionEventType {
    /// A new function appeared in the later snapshot (not present in the earlier one).
    Appeared,
    /// A function from the earlier snapshot is absent in the later one.
    Disappeared,
    /// A function pair that was a clone (similarity >= threshold) has dropped below
    /// the threshold -- they are diverging and may need synchronized maintenance.
    Diverged,
    /// A function pair that was NOT a clone has risen above the threshold --
    /// independently-written code is converging (possible copy-paste in progress).
    Converged,
    /// A function pair that was a clone and remains a clone (above threshold in both
    /// snapshots). Similarity may have changed slightly but the pair is still coupled.
    Unchanged,
}

/// A single clone evolution event between two snapshots.
#[derive(Debug, Clone)]
pub struct CloneEvolutionEvent {
    /// The pair of functions involved, identified as `(file::name, file::name)`.
    pub clone_pair: (String, String),
    /// What happened to this pair between the two snapshots.
    pub event_type: EvolutionEventType,
    /// Earlier snapshot ID.
    pub snapshot_from: SnapshotId,
    /// Later snapshot ID.
    pub snapshot_to: SnapshotId,
    /// Jaccard similarity of the pair in the earlier snapshot (0.0 if not present).
    pub similarity_before: f64,
    /// Jaccard similarity of the pair in the later snapshot (0.0 if not present).
    pub similarity_after: f64,
}

/// Tracks how clone relationships evolve between codebase snapshots.
///
/// Given a [`FingerprintStore`] containing multiple snapshots, this tracker
/// compares function pairs across two snapshots and emits [`CloneEvolutionEvent`]s
/// describing what changed.
pub struct CloneEvolutionTracker {
    /// The underlying fingerprint store with snapshot data.
    pub store: FingerprintStore,
    /// Jaccard similarity threshold for considering a pair a "clone".
    similarity_threshold: f64,
}

impl CloneEvolutionTracker {
    /// Creates a new tracker with an empty store and the given similarity threshold.
    ///
    /// # Arguments
    ///
    /// * `similarity_threshold` - Jaccard similarity at or above which a function
    ///   pair is considered a clone. Typical values: 0.5-0.8.
    pub fn new(similarity_threshold: f64) -> Self {
        Self {
            store: FingerprintStore::new(),
            similarity_threshold,
        }
    }

    /// Compares two snapshots and returns all evolution events.
    ///
    /// # Algorithm
    ///
    /// 1. Build a lookup of functions by `(file, name)` for both snapshots.
    /// 2. Identify functions that appeared or disappeared.
    /// 3. For every pair of functions present in BOTH snapshots, compute Jaccard
    ///    similarity (via `MinHashDetector::exact_jaccard` on the minhash sketches)
    ///    in both the "before" and "after" snapshots.
    /// 4. Classify each pair as Diverged, Converged, or Unchanged based on whether
    ///    the pair crosses the similarity threshold.
    ///
    /// Returns an empty `Vec` if either snapshot ID is not found.
    pub fn track_evolution(&self, snapshot_a: &str, snapshot_b: &str) -> Vec<CloneEvolutionEvent> {
        let fps_a = match self.store.get_snapshot(snapshot_a) {
            Some(fps) => fps,
            None => return Vec::new(),
        };
        let fps_b = match self.store.get_snapshot(snapshot_b) {
            Some(fps) => fps,
            None => return Vec::new(),
        };

        // Index fingerprints by (file, name) for O(1) lookup.
        let index_a = build_function_index(fps_a);
        let index_b = build_function_index(fps_b);

        let keys_a: HashSet<&(String, String)> = index_a.keys().collect();
        let keys_b: HashSet<&(String, String)> = index_b.keys().collect();

        let mut events = Vec::new();

        // Functions that appeared (in B but not in A).
        for key in keys_b.difference(&keys_a) {
            events.push(CloneEvolutionEvent {
                clone_pair: (format!("{}::{}", key.0, key.1), String::new()),
                event_type: EvolutionEventType::Appeared,
                snapshot_from: snapshot_a.to_string(),
                snapshot_to: snapshot_b.to_string(),
                similarity_before: 0.0,
                similarity_after: 0.0,
            });
        }

        // Functions that disappeared (in A but not in B).
        for key in keys_a.difference(&keys_b) {
            events.push(CloneEvolutionEvent {
                clone_pair: (format!("{}::{}", key.0, key.1), String::new()),
                event_type: EvolutionEventType::Disappeared,
                snapshot_from: snapshot_a.to_string(),
                snapshot_to: snapshot_b.to_string(),
                similarity_before: 0.0,
                similarity_after: 0.0,
            });
        }

        // Functions present in both snapshots -- compare all pairs.
        let common_keys: Vec<&(String, String)> = keys_a.intersection(&keys_b).copied().collect();

        // For each unique pair of common functions, compare their evolution.
        for i in 0..common_keys.len() {
            for j in (i + 1)..common_keys.len() {
                let key_i = common_keys[i];
                let key_j = common_keys[j];

                let fp_i_a = &index_a[key_i];
                let fp_j_a = &index_a[key_j];
                let fp_i_b = &index_b[key_i];
                let fp_j_b = &index_b[key_j];

                let sim_before =
                    MinHashDetector::exact_jaccard(&fp_i_a.minhash_sketch, &fp_j_a.minhash_sketch);
                let sim_after =
                    MinHashDetector::exact_jaccard(&fp_i_b.minhash_sketch, &fp_j_b.minhash_sketch);

                let was_clone = sim_before >= self.similarity_threshold;
                let is_clone = sim_after >= self.similarity_threshold;

                let event_type = match (was_clone, is_clone) {
                    (true, false) => EvolutionEventType::Diverged,
                    (false, true) => EvolutionEventType::Converged,
                    (true, true) => EvolutionEventType::Unchanged,
                    // Both below threshold in both snapshots -- not interesting.
                    (false, false) => continue,
                };

                events.push(CloneEvolutionEvent {
                    clone_pair: (
                        format!("{}::{}", key_i.0, key_i.1),
                        format!("{}::{}", key_j.0, key_j.1),
                    ),
                    event_type,
                    snapshot_from: snapshot_a.to_string(),
                    snapshot_to: snapshot_b.to_string(),
                    similarity_before: sim_before,
                    similarity_after: sim_after,
                });
            }
        }

        events
    }

    /// Returns only the Diverged events between two snapshots.
    ///
    /// Diverging clones are the highest-priority maintenance risk: code that was
    /// once duplicated and similar is drifting apart, meaning bug fixes applied to
    /// one copy may be missing from the other.
    pub fn detect_diverging_clones(
        &self,
        snapshot_a: &str,
        snapshot_b: &str,
    ) -> Vec<CloneEvolutionEvent> {
        self.track_evolution(snapshot_a, snapshot_b)
            .into_iter()
            .filter(|e| e.event_type == EvolutionEventType::Diverged)
            .collect()
    }
}

/// Builds a lookup from `(file, name)` to the corresponding fingerprint.
///
/// If multiple fingerprints share the same `(file, name)`, the last one wins.
fn build_function_index(
    fingerprints: &[FunctionFingerprint],
) -> HashMap<(String, String), &FunctionFingerprint> {
    let mut index = HashMap::new();
    for fp in fingerprints {
        index.insert((fp.file.clone(), fp.name.clone()), fp);
    }
    index
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::clones::fingerprint_store::FunctionFingerprint;

    /// Helper: creates a fingerprint with the given minhash sketch.
    fn make_fp(file: &str, name: &str, minhash: Vec<u64>) -> FunctionFingerprint {
        FunctionFingerprint {
            file: file.to_string(),
            name: name.to_string(),
            start_line: 1,
            end_line: 10,
            minhash_sketch: minhash,
            simhash: 0,
            content_hash: 0,
            timestamp: "t0".to_string(),
        }
    }

    #[test]
    fn test_function_appeared() {
        let mut tracker = CloneEvolutionTracker::new(0.5);

        // Snapshot A has one function; snapshot B adds a second.
        tracker
            .store
            .add_snapshot("a".to_string(), vec![make_fp("f.py", "foo", vec![1, 2, 3])]);
        tracker.store.add_snapshot(
            "b".to_string(),
            vec![
                make_fp("f.py", "foo", vec![1, 2, 3]),
                make_fp("f.py", "bar", vec![10, 20, 30]),
            ],
        );

        let events = tracker.track_evolution("a", "b");
        let appeared: Vec<_> = events
            .iter()
            .filter(|e| e.event_type == EvolutionEventType::Appeared)
            .collect();

        assert_eq!(appeared.len(), 1);
        assert!(appeared[0].clone_pair.0.contains("bar"));
    }

    #[test]
    fn test_function_disappeared() {
        let mut tracker = CloneEvolutionTracker::new(0.5);

        tracker.store.add_snapshot(
            "a".to_string(),
            vec![
                make_fp("f.py", "foo", vec![1, 2, 3]),
                make_fp("f.py", "bar", vec![10, 20, 30]),
            ],
        );
        tracker
            .store
            .add_snapshot("b".to_string(), vec![make_fp("f.py", "foo", vec![1, 2, 3])]);

        let events = tracker.track_evolution("a", "b");
        let disappeared: Vec<_> = events
            .iter()
            .filter(|e| e.event_type == EvolutionEventType::Disappeared)
            .collect();

        assert_eq!(disappeared.len(), 1);
        assert!(disappeared[0].clone_pair.0.contains("bar"));
    }

    #[test]
    fn test_clone_pair_diverged() {
        let mut tracker = CloneEvolutionTracker::new(0.5);

        // In snapshot A, foo and bar share most shingles (high Jaccard).
        // In snapshot B, bar has completely different shingles (low Jaccard).
        let shared: Vec<u64> = (0..100).collect();
        let diverged: Vec<u64> = (500..600).collect();

        tracker.store.add_snapshot(
            "a".to_string(),
            vec![
                make_fp("f.py", "foo", shared.clone()),
                make_fp("f.py", "bar", shared.clone()),
            ],
        );
        tracker.store.add_snapshot(
            "b".to_string(),
            vec![
                make_fp("f.py", "foo", shared),
                make_fp("f.py", "bar", diverged),
            ],
        );

        let events = tracker.track_evolution("a", "b");
        let diverged_events: Vec<_> = events
            .iter()
            .filter(|e| e.event_type == EvolutionEventType::Diverged)
            .collect();

        assert_eq!(diverged_events.len(), 1);
        assert!(
            diverged_events[0].similarity_before >= 0.5,
            "Before similarity should be >= threshold, got {}",
            diverged_events[0].similarity_before
        );
        assert!(
            diverged_events[0].similarity_after < 0.5,
            "After similarity should be < threshold, got {}",
            diverged_events[0].similarity_after
        );
    }

    #[test]
    fn test_clone_pair_converged() {
        let mut tracker = CloneEvolutionTracker::new(0.5);

        // In snapshot A, foo and bar are completely different.
        // In snapshot B, bar has become similar to foo.
        let set_a: Vec<u64> = (0..100).collect();
        let set_b_different: Vec<u64> = (500..600).collect();

        tracker.store.add_snapshot(
            "a".to_string(),
            vec![
                make_fp("f.py", "foo", set_a.clone()),
                make_fp("f.py", "bar", set_b_different),
            ],
        );
        tracker.store.add_snapshot(
            "b".to_string(),
            vec![
                make_fp("f.py", "foo", set_a.clone()),
                make_fp("f.py", "bar", set_a),
            ],
        );

        let events = tracker.track_evolution("a", "b");
        let converged: Vec<_> = events
            .iter()
            .filter(|e| e.event_type == EvolutionEventType::Converged)
            .collect();

        assert_eq!(converged.len(), 1);
        assert!(
            converged[0].similarity_before < 0.5,
            "Before similarity should be < threshold, got {}",
            converged[0].similarity_before
        );
        assert!(
            converged[0].similarity_after >= 0.5,
            "After similarity should be >= threshold, got {}",
            converged[0].similarity_after
        );
    }

    #[test]
    fn test_clone_pair_unchanged() {
        let mut tracker = CloneEvolutionTracker::new(0.5);

        // In both snapshots, foo and bar are identical clones.
        let shared: Vec<u64> = (0..100).collect();

        tracker.store.add_snapshot(
            "a".to_string(),
            vec![
                make_fp("f.py", "foo", shared.clone()),
                make_fp("f.py", "bar", shared.clone()),
            ],
        );
        tracker.store.add_snapshot(
            "b".to_string(),
            vec![
                make_fp("f.py", "foo", shared.clone()),
                make_fp("f.py", "bar", shared),
            ],
        );

        let events = tracker.track_evolution("a", "b");
        let unchanged: Vec<_> = events
            .iter()
            .filter(|e| e.event_type == EvolutionEventType::Unchanged)
            .collect();

        assert_eq!(unchanged.len(), 1);
        assert!(
            (unchanged[0].similarity_before - 1.0).abs() < f64::EPSILON,
            "Identical clones should have similarity 1.0 before"
        );
    }

    #[test]
    fn test_detect_diverging_clones_filters_correctly() {
        let mut tracker = CloneEvolutionTracker::new(0.5);

        let shared: Vec<u64> = (0..100).collect();
        let diverged: Vec<u64> = (500..600).collect();
        let unrelated_a: Vec<u64> = (1000..1100).collect();
        let unrelated_b: Vec<u64> = (2000..2100).collect();

        tracker.store.add_snapshot(
            "a".to_string(),
            vec![
                make_fp("f.py", "foo", shared.clone()),
                make_fp("f.py", "bar", shared.clone()),
                make_fp("f.py", "baz", unrelated_a),
            ],
        );
        tracker.store.add_snapshot(
            "b".to_string(),
            vec![
                make_fp("f.py", "foo", shared),
                make_fp("f.py", "bar", diverged),
                make_fp("f.py", "baz", unrelated_b),
            ],
        );

        let diverging = tracker.detect_diverging_clones("a", "b");
        assert_eq!(diverging.len(), 1);
        // The diverging pair should be foo and bar.
        let pair = &diverging[0].clone_pair;
        let combined = format!("{} {}", pair.0, pair.1);
        assert!(
            combined.contains("foo") && combined.contains("bar"),
            "Expected foo and bar in diverging pair, got: {combined}"
        );
    }

    #[test]
    fn test_track_evolution_missing_snapshot() {
        let tracker = CloneEvolutionTracker::new(0.5);
        let events = tracker.track_evolution("nonexistent_a", "nonexistent_b");
        assert!(events.is_empty());
    }

    #[test]
    fn test_empty_snapshots() {
        let mut tracker = CloneEvolutionTracker::new(0.5);
        tracker.store.add_snapshot("a".to_string(), vec![]);
        tracker.store.add_snapshot("b".to_string(), vec![]);

        let events = tracker.track_evolution("a", "b");
        assert!(events.is_empty());
    }
}