srcwalk 0.1.0

Tree-sitter indexed lookups — smart code reading for AI agents
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
// Graphwalk-style BFS over the call graph.
// Reuses `find_callers_batch` from the single-hop module — one walk per hop,
// Aho-Corasick multi-pattern.

use std::collections::HashSet;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};

use crate::cache::OutlineCache;
use crate::error::SrcwalkError;

use super::single::{find_callers_batch, TOP_LEVEL};

// ═══════════════════════════════════════════════════════════════════════════
// Graphwalk-style BFS over call graph (v1 — safe, reuses find_callers_batch)
// ═══════════════════════════════════════════════════════════════════════════

/// A single directed edge in the caller graph: `from` is the enclosing
/// function that contains a call to `to` at `from_loc`.
#[derive(Debug, Clone)]
pub struct BfsEdge {
    pub hop: usize,
    pub from: String,
    pub from_file: PathBuf,
    pub from_line: u32,
    pub to: String,
    /// Call-site source text (single line). Matches legacy `--callers` output:
    /// for a call spanning multiple lines, the first line is kept. Empty only
    /// when the underlying `CallerMatch.call_text` was empty.
    pub call_text: String,
}

/// Aggregated BFS result.
#[derive(Debug, Default)]
pub struct BfsStats {
    pub depth_reached: usize,
    pub edges_total: usize,
    pub frontier_cut_hops: Vec<(usize, usize)>, // (hop, frontier_size_before_cut)
    pub edges_cut_at_hop: Option<usize>,
    pub top_level_terminal: usize,
    pub unresolved_symbols: usize, // frontier symbols that produced zero callers
    pub hubs_skipped: Vec<String>, // hub symbols dropped from frontier (user override)
    pub auto_hubs_skipped: Vec<String>, // auto-promoted hub drops on later hops
    pub auto_hubs_promoted: Vec<(String, usize)>, // (symbol, edge_count) promoted this run
    pub per_hop: Vec<HopStats>,
    pub elapsed_ms: u128,
}

#[derive(Debug, Clone, Default)]
pub struct HopStats {
    pub hop: usize,
    pub frontier_size: usize, // after hub-skip + cap
    pub edges: usize,
}

/// Hops where most edges land outside directories seen in the previous hop —
/// strong signal of cross-package name collision (e.g. hop 1 finds `pool.New`,
/// hop 2 then matches every unrelated `errors.New` / `bytes.NewBuffer` / ...).
/// Lang-agnostic: uses filesystem path proximity, no package concept.
#[derive(Debug, Clone)]
pub struct SuspicionInfo {
    pub hop: usize,
    pub total_edges: usize,
    pub related_edges: usize, // share parent dir with any hop N-1 file
}

/// Threshold above which low related-ratio is flagged. Small hops are ignored —
/// 5 spurious edges out of 5 is noise, 490 out of 500 is a real collision.
const SUSPICION_MIN_EDGES: usize = 50;
/// Related-ratio cutoff: if fewer than 1 in 5 edges share a dir with the
/// previous hop, flag the hop as suspect.
const SUSPICION_RELATED_NUM: usize = 1;
const SUSPICION_RELATED_DEN: usize = 5;

/// Scan completed BFS edges for hops whose matches are mostly "far" from the
/// previous hop's file paths. Pure function — unit-testable in isolation.
pub fn compute_suspicious_hops(edges: &[BfsEdge]) -> Vec<SuspicionInfo> {
    use std::collections::{BTreeMap, HashSet};
    let mut by_hop: BTreeMap<usize, Vec<&BfsEdge>> = BTreeMap::new();
    for e in edges {
        by_hop.entry(e.hop).or_default().push(e);
    }
    let mut out = Vec::new();
    for (&hop, list) in &by_hop {
        if hop < 2 {
            continue;
        }
        let total = list.len();
        if total < SUSPICION_MIN_EDGES {
            continue;
        }
        let prev = match by_hop.get(&(hop - 1)) {
            Some(p) if !p.is_empty() => p,
            _ => continue,
        };
        let prev_dirs: HashSet<&Path> = prev
            .iter()
            .filter_map(|e| e.from_file.parent())
            .collect();
        if prev_dirs.is_empty() {
            continue;
        }
        let related = list
            .iter()
            .filter(|e| {
                e.from_file
                    .parent()
                    .is_some_and(|p| prev_dirs.contains(p))
            })
            .count();
        // related / total < NUM / DEN  ⇔  related * DEN < total * NUM
        if related * SUSPICION_RELATED_DEN < total * SUSPICION_RELATED_NUM {
            out.push(SuspicionInfo {
                hop,
                total_edges: total,
                related_edges: related,
            });
        }
    }
    out
}

/// Threshold for auto-hub promotion: if a single symbol produces this many
/// edges in one hop, treat it as a hub and drop from the next frontier.
/// Data-driven, language-agnostic — no hard-coded hub list.
const AUTO_HUB_THRESHOLD: usize = 200;

/// Parse user-provided hub overrides. Returns empty set by default;
/// `--skip-hubs "A,B,C"` adds explicit skips on top of auto-promotion.
fn parse_hubs(skip_hubs: Option<&str>) -> HashSet<String> {
    match skip_hubs {
        None | Some("") => HashSet::new(),
        Some(csv) => csv
            .split(',')
            .map(str::trim)
            .filter(|s| !s.is_empty())
            .map(String::from)
            .collect(),
    }
}

/// BFS backward over callers up to `max_depth` hops.
/// Reuses `find_callers_batch` per hop — one walk per hop, Aho-Corasick multi-pattern.
///
/// Guards:
/// - `skip_hubs`: explicit user override for symbols to drop from frontier.
///   Auto-hub promotion (>= `AUTO_HUB_THRESHOLD` edges/hop) is always on.
/// - `max_frontier`: cap symbols explored per hop (drop lowest-priority excess).
/// - `max_edges`: hard stop on total edges.
/// - Visited set on `(symbol, path)` avoids revisits and loops.
/// - Unsupported-lang files become leaf nodes (inherited from `find_callers_batch`).
#[allow(clippy::too_many_arguments)]
pub fn search_callers_bfs(
    target: &str,
    scope: &Path,
    cache: &OutlineCache,
    bloom: &crate::index::bloom::BloomFilterCache,
    max_depth: usize,
    max_frontier: usize,
    max_edges: usize,
    glob: Option<&str>,
    skip_hubs: Option<&str>,
    json: bool,
) -> Result<String, SrcwalkError> {
    let t_start = std::time::Instant::now();
    let user_hubs = parse_hubs(skip_hubs);
    // Auto-promoted hubs: symbols producing >= AUTO_HUB_THRESHOLD edges per hop.
    // Dropped from frontier in subsequent hops. Data-driven, language-agnostic.
    let mut auto_hubs: HashSet<String> = HashSet::new();

    let mut edges: Vec<BfsEdge> = Vec::new();
    let mut stats = BfsStats::default();
    // Visited: (calling_function, path) — a function body in a given file
    // already enumerated as source of edges at some hop.
    let mut visited: HashSet<(String, PathBuf)> = HashSet::new();

    // Frontier at hop k: set of symbol names whose callers we want next.
    let mut frontier: HashSet<String> = HashSet::from([target.to_string()]);

    'outer: for hop in 1..=max_depth {
        if frontier.is_empty() {
            break;
        }

        // Drop hub symbols from frontier BEFORE the cap — they dominate fan-out.
        // Two sources: explicit user override (`--skip-hubs`) and auto-promoted
        // hubs detected in previous hops (>= AUTO_HUB_THRESHOLD edges/hop).
        // Root symbol is always explored even if it's a hub.
        let mut frontier_vec: Vec<String> = frontier
            .iter()
            .filter(|s| {
                if hop == 1 {
                    true
                } else if user_hubs.contains(s.as_str()) {
                    stats.hubs_skipped.push((*s).clone());
                    false
                } else if auto_hubs.contains(s.as_str()) {
                    stats.auto_hubs_skipped.push((*s).clone());
                    false
                } else {
                    true
                }
            })
            .cloned()
            .collect();
        frontier_vec.sort();
        if frontier_vec.len() > max_frontier {
            stats.frontier_cut_hops.push((hop, frontier_vec.len()));
            frontier_vec.truncate(max_frontier);
        }
        let frontier_this_hop: HashSet<String> = frontier_vec.into_iter().collect();
        let frontier_size = frontier_this_hop.len();

        if frontier_this_hop.is_empty() {
            break;
        }

        let mut hop_matches =
            find_callers_batch(&frontier_this_hop, scope, bloom, glob, Some(cache), None)?;
        // Parallel walker returns matches in thread-scheduling order. Sort
        // deterministically so `--max-edges` truncation is reproducible across runs.
        // Key matches the final edge sort below: (from_file, from_line, callee, caller).
        hop_matches.sort_by(|(a_to, a), (b_to, b)| {
            a.path
                .cmp(&b.path)
                .then(a.line.cmp(&b.line))
                .then(a_to.cmp(b_to))
                .then(a.calling_function.cmp(&b.calling_function))
        });

        let mut next_frontier: HashSet<String> = HashSet::new();
        let mut hit_targets_this_hop: HashSet<String> = HashSet::new();
        let edges_before_hop = edges.len();
        // Count edges per callee in this hop — used for auto-hub promotion.
        let mut per_callee_count: std::collections::HashMap<String, usize> =
            std::collections::HashMap::new();

        for (callee, m) in hop_matches {
            hit_targets_this_hop.insert(callee.clone());
            *per_callee_count.entry(callee.clone()).or_insert(0) += 1;

            let key = (m.calling_function.clone(), m.path.clone());

            if m.calling_function == TOP_LEVEL {
                stats.top_level_terminal += 1;
            } else if visited.insert(key) {
                next_frontier.insert(m.calling_function.clone());
            }

            // Reduce call_text to a single line — multi-line calls collapse to
            // their first line. Mirrors legacy --callers convention; bounds
            // per-edge token cost without truncating mid-token.
            let call_text = m
                .call_text
                .lines()
                .next()
                .unwrap_or("")
                .trim()
                .to_string();

            edges.push(BfsEdge {
                hop,
                from: m.calling_function,
                from_file: m.path,
                from_line: m.line,
                to: callee,
                call_text,
            });

            if edges.len() >= max_edges {
                stats.edges_cut_at_hop = Some(hop);
                stats.depth_reached = hop;
                stats.per_hop.push(HopStats {
                    hop,
                    frontier_size,
                    edges: edges.len() - edges_before_hop,
                });
                break 'outer;
            }
        }

        // Promote high-fan-out symbols to auto-hubs for subsequent hops.
        for (sym, count) in &per_callee_count {
            if *count >= AUTO_HUB_THRESHOLD && auto_hubs.insert(sym.clone()) {
                stats.auto_hubs_promoted.push((sym.clone(), *count));
            }
        }

        // Unresolved = frontier symbols nobody matched in the repo (by-name miss,
        // unsupported-lang file-only, indirect dispatch, etc).
        for s in &frontier_this_hop {
            if !hit_targets_this_hop.contains(s) {
                stats.unresolved_symbols += 1;
            }
        }

        stats.per_hop.push(HopStats {
            hop,
            frontier_size,
            edges: edges.len() - edges_before_hop,
        });
        stats.depth_reached = hop;
        frontier = next_frontier;
    }

    stats.edges_total = edges.len();
    stats.elapsed_ms = t_start.elapsed().as_millis();

    // Deterministic sort across ALL edges (stable for both text + JSON).
    edges.sort_by(|a, b| {
        a.hop
            .cmp(&b.hop)
            .then_with(|| a.from_file.cmp(&b.from_file))
            .then_with(|| a.from_line.cmp(&b.from_line))
            .then_with(|| a.to.cmp(&b.to))
            .then_with(|| a.from.cmp(&b.from))
    });

    if json {
        Ok(format_bfs_json(target, scope, &edges, &stats, max_depth))
    } else {
        Ok(format_bfs(target, scope, &edges, &stats, max_depth))
    }
}

/// Deterministic sort + pretty-print BFS edges grouped by hop.
fn format_bfs(
    target: &str,
    scope: &Path,
    edges: &[BfsEdge],
    stats: &BfsStats,
    max_depth: usize,
) -> String {
    let mut out = String::new();
    let _ = writeln!(
        out,
        "# BFS callers of \"{}\" in {} — depth={}/{}, {} edge{}, {} ms",
        target,
        scope.display(),
        stats.depth_reached,
        max_depth,
        stats.edges_total,
        if stats.edges_total == 1 { "" } else { "s" },
        stats.elapsed_ms
    );

    if edges.is_empty() {
        let _ = writeln!(
            out,
            "\nNo call sites found. Symbol may be invoked via indirect dispatch \
             (trait objects, interfaces, callbacks, reflection, macros) — srcwalk \
             only sees direct by-name calls."
        );
        return out;
    }

    // Group edges by hop, sort deterministically within each group.
    let mut by_hop: std::collections::BTreeMap<usize, Vec<&BfsEdge>> =
        std::collections::BTreeMap::new();
    for e in edges {
        by_hop.entry(e.hop).or_default().push(e);
    }

    for (hop, mut list) in by_hop {
        list.sort_by(|a, b| {
            a.from_file
                .cmp(&b.from_file)
                .then_with(|| a.from_line.cmp(&b.from_line))
                .then_with(|| a.to.cmp(&b.to))
                .then_with(|| a.from.cmp(&b.from))
        });
        let _ = writeln!(out, "\n── hop {} ({} edge{}) ──", hop, list.len(),
            if list.len() == 1 { "" } else { "s" });
        for e in list {
            let rel = e.from_file.strip_prefix(scope).unwrap_or(&e.from_file);
            // Match legacy `--callers` convention: payload is the call-site
            // source text, not the bare callee symbol. Fall back to the
            // symbol only when call_text is unavailable.
            let payload = if e.call_text.is_empty() {
                e.to.as_str()
            } else {
                e.call_text.as_str()
            };
            let _ = writeln!(
                out,
                "  {:<28} {}:{}{}",
                e.from,
                rel.display(),
                e.from_line,
                payload
            );
        }
    }

    // Banner — truthfulness about what was cut.
    let mut notes: Vec<String> = Vec::new();
    if let Some(h) = stats.edges_cut_at_hop {
        notes.push(format!("edges capped at hop {h}"));
    }
    for (h, n) in &stats.frontier_cut_hops {
        notes.push(format!("frontier at hop {h} truncated from {n}"));
    }
    if !stats.auto_hubs_promoted.is_empty() {
        let mut parts: Vec<String> = stats
            .auto_hubs_promoted
            .iter()
            .map(|(s, c)| format!("{s}({c})"))
            .collect();
        parts.sort();
        let preview: Vec<String> = parts.iter().take(6).cloned().collect();
        let more = parts.len().saturating_sub(preview.len());
        let suffix = if more > 0 { format!(", +{more} more") } else { String::new() };
        notes.push(format!(
            "{} symbol(s) auto-promoted to hub (≥{} edges/hop): {}{}",
            parts.len(),
            AUTO_HUB_THRESHOLD,
            preview.join(", "),
            suffix
        ));
    }
    if !stats.hubs_skipped.is_empty() {
        let mut uniq: Vec<&String> = stats.hubs_skipped.iter().collect();
        uniq.sort();
        uniq.dedup();
        let preview: Vec<String> = uniq.iter().take(6).map(|s| (*s).clone()).collect();
        let more = uniq.len().saturating_sub(preview.len());
        let suffix = if more > 0 { format!(", +{more} more") } else { String::new() };
        notes.push(format!(
            "{} hub symbol(s) skipped: {}{}",
            uniq.len(),
            preview.join(","),
            suffix
        ));
    }
    if stats.top_level_terminal > 0 {
        notes.push(format!("{} top-level terminal edge(s)", stats.top_level_terminal));
    }
    if stats.unresolved_symbols > 0 {
        notes.push(format!(
            "{} frontier symbol(s) unresolved (unsupported lang / indirect / orphan)",
            stats.unresolved_symbols
        ));
    }
    let suspicious = compute_suspicious_hops(edges);
    for s in &suspicious {
        notes.push(format!(
            "⚠ hop {}: {} edges, only {} share a directory with hop {} — likely cross-package name collision (try qualifying the target, e.g. `pkg::{}`)",
            s.hop,
            s.total_edges,
            s.related_edges,
            s.hop - 1,
            target
        ));
    }
    if !notes.is_empty() {
        let _ = writeln!(out, "\n── budget ──");
        for n in notes {
            let _ = writeln!(out, "{n}");
        }
    }
    let _ = writeln!(
        out,
        "\nStatic by-name call graph only. May miss indirect dispatch, reflection, macros, \
         and calls from files > 500KB or from languages without a tree-sitter call query."
    );

    let tokens = crate::types::estimate_tokens(out.len() as u64);
    let token_str = if tokens >= 1000 {
        format!("~{}.{}k", tokens / 1000, (tokens % 1000) / 100)
    } else {
        format!("~{tokens}")
    };
    let _ = write!(out, "\n({token_str} tokens)");
    out
}

/// Emit the BFS result as JSON (edge-list schema compatible with GraphWalks-style consumers).
fn format_bfs_json(
    target: &str,
    scope: &Path,
    edges: &[BfsEdge],
    stats: &BfsStats,
    max_depth: usize,
) -> String {
    let edges_json: Vec<serde_json::Value> = edges
        .iter()
        .map(|e| {
            let rel = e
                .from_file
                .strip_prefix(scope)
                .unwrap_or(&e.from_file)
                .display()
                .to_string();
            serde_json::json!({
                "hop": e.hop,
                "from": e.from,
                "from_file": rel,
                "from_line": e.from_line,
                "to": e.to,
                "call_text": e.call_text,
            })
        })
        .collect();

    let per_hop: Vec<serde_json::Value> = stats
        .per_hop
        .iter()
        .map(|h| {
            serde_json::json!({
                "hop": h.hop,
                "frontier_size": h.frontier_size,
                "edges": h.edges,
            })
        })
        .collect();

    let frontier_cuts: Vec<serde_json::Value> = stats
        .frontier_cut_hops
        .iter()
        .map(|(h, n)| serde_json::json!({"hop": h, "frontier_size_before_cut": n}))
        .collect();

    let mut hubs_sorted: Vec<String> = stats.hubs_skipped.clone();
    hubs_sorted.sort();
    hubs_sorted.dedup();

    let auto_hubs_json: Vec<serde_json::Value> = stats
        .auto_hubs_promoted
        .iter()
        .map(|(s, c)| serde_json::json!({"symbol": s, "edges": c}))
        .collect();

    let suspicious_json: Vec<serde_json::Value> = compute_suspicious_hops(edges)
        .into_iter()
        .map(|s| {
            serde_json::json!({
                "hop": s.hop,
                "total_edges": s.total_edges,
                "related_edges": s.related_edges,
                "reason": "name_collision",
            })
        })
        .collect();

    let payload = serde_json::json!({
        "root": target,
        "scope": scope.display().to_string(),
        "max_depth": max_depth,
        "depth_reached": stats.depth_reached,
        "edges_total": stats.edges_total,
        "elapsed_ms": stats.elapsed_ms,
        "edges": edges_json,
        "stats": {
            "per_hop": per_hop,
            "top_level_terminal": stats.top_level_terminal,
            "unresolved_symbols": stats.unresolved_symbols,
            "suspicious_hops": suspicious_json,
        },
        "elided": {
            "edges_cut_at_hop": stats.edges_cut_at_hop,
            "frontier_cuts": frontier_cuts,
            "hubs_skipped": hubs_sorted,
            "auto_hubs_promoted": auto_hubs_json,
            "auto_hub_threshold": AUTO_HUB_THRESHOLD,
        },
        "disclaimer": "Static by-name call graph only. May miss indirect dispatch, reflection, macros, and calls from files > 500KB or from languages without a tree-sitter call query.",
    });

    serde_json::to_string_pretty(&payload)
        .expect("serde_json::Value is always serializable")
}

#[cfg(test)]
mod suspicion_tests {
    use super::*;

    fn edge(hop: usize, file: &str, line: u32, to: &str) -> BfsEdge {
        BfsEdge {
            hop,
            from: "f".into(),
            from_file: PathBuf::from(file),
            from_line: line,
            to: to.into(),
            call_text: String::new(),
        }
    }

    #[test]
    fn ignores_small_hops() {
        // 10 edges, all in distant dirs — below SUSPICION_MIN_EDGES.
        let mut edges = vec![edge(1, "a/root.rs", 1, "x")];
        for i in 0..10 {
            edges.push(edge(2, &format!("z/far{i}.rs"), i, "y"));
        }
        assert!(compute_suspicious_hops(&edges).is_empty());
    }

    #[test]
    fn flags_cross_package_collision() {
        // hop 1 rooted in pkg/a, hop 2 has 60 matches in unrelated dirs.
        let mut edges = vec![edge(1, "pkg/a/root.go", 1, "New")];
        for i in 0..60 {
            edges.push(edge(2, &format!("vendor/errors/err{i}.go"), i, "New"));
        }
        let sus = compute_suspicious_hops(&edges);
        assert_eq!(sus.len(), 1);
        assert_eq!(sus[0].hop, 2);
        assert_eq!(sus[0].total_edges, 60);
        assert_eq!(sus[0].related_edges, 0);
    }

    #[test]
    fn no_flag_when_related_majority() {
        // hop 2 matches all live in same dir as hop 1 callers.
        let mut edges = vec![edge(1, "pkg/a/root.go", 1, "X")];
        for i in 0..60 {
            edges.push(edge(2, &format!("pkg/a/file{i}.go"), i, "Y"));
        }
        assert!(compute_suspicious_hops(&edges).is_empty());
    }

    #[test]
    fn threshold_boundary() {
        // 50 edges, 10 related = 20% = exactly at cutoff (1/5). Should NOT flag.
        let mut edges = vec![edge(1, "pkg/a/root.go", 1, "X")];
        for i in 0..10 {
            edges.push(edge(2, &format!("pkg/a/rel{i}.go"), i, "Y"));
        }
        for i in 0..40 {
            edges.push(edge(2, &format!("far/dir/far{i}.go"), i, "Y"));
        }
        // 10/50 = 0.2, 10*5 = 50 = 50*1 → not strictly less → no flag.
        assert!(compute_suspicious_hops(&edges).is_empty());
        // 9/50 → flag.
        let mut edges = vec![edge(1, "pkg/a/root.go", 1, "X")];
        for i in 0..9 {
            edges.push(edge(2, &format!("pkg/a/rel{i}.go"), i, "Y"));
        }
        for i in 0..41 {
            edges.push(edge(2, &format!("far/dir/far{i}.go"), i, "Y"));
        }
        assert_eq!(compute_suspicious_hops(&edges).len(), 1);
    }

    #[test]
    fn never_flags_hop_1() {
        let mut edges = Vec::new();
        for i in 0..100 {
            edges.push(edge(1, &format!("any/dir{i}/f.go"), i, "X"));
        }
        assert!(compute_suspicious_hops(&edges).is_empty());
    }
}