Skip to main content

har/analysis/
duplicates.rs

1use crate::filter::Filter;
2use crate::grouping::{group_by_fingerprint, group_has_retry};
3use crate::model::Capture;
4use serde::Serialize;
5use std::collections::BTreeMap;
6
7#[derive(Debug, Serialize)]
8pub struct DuplicatesResult {
9    pub groups: Vec<DuplicateGroup>,
10}
11
12#[derive(Debug, Serialize)]
13pub struct DuplicateGroup {
14    pub fingerprint: String,
15    pub count: usize,
16    pub method: String,
17    pub host: String,
18    pub norm_path: String,
19    pub statuses: BTreeMap<String, usize>,
20    pub entry_ids: Vec<String>,
21    pub first_offset_ms: f64,
22    pub last_offset_ms: f64,
23    pub is_retry_pattern: bool,
24}
25
26/// Group repeated requests (count >= 2) by fingerprint. `top` bounds the list.
27pub fn compute_duplicates(cap: &Capture, filter: &Filter, top: usize) -> DuplicatesResult {
28    let entries: Vec<&crate::model::Entry> =
29        cap.entries.iter().filter(|e| filter.matches(e)).collect();
30
31    let mut groups: Vec<DuplicateGroup> = group_by_fingerprint(&entries)
32        .into_iter()
33        .filter(|(_, g)| g.len() >= 2)
34        .map(|(fp, g)| {
35            let mut statuses: BTreeMap<String, usize> = BTreeMap::new();
36            for e in &g {
37                *statuses.entry(e.status.to_string()).or_default() += 1;
38            }
39            DuplicateGroup {
40                fingerprint: fp,
41                count: g.len(),
42                method: g[0].method.to_ascii_uppercase(),
43                host: g[0].host.clone(),
44                norm_path: g[0].norm_path.clone(),
45                statuses,
46                entry_ids: g.iter().map(|e| e.id.clone()).collect(),
47                first_offset_ms: g.first().map(|e| e.started_offset_ms).unwrap_or(0.0),
48                last_offset_ms: g.last().map(|e| e.started_offset_ms).unwrap_or(0.0),
49                is_retry_pattern: group_has_retry(&g),
50            }
51        })
52        .collect();
53
54    groups.sort_by(|a, b| {
55        b.count
56            .cmp(&a.count)
57            .then(a.fingerprint.cmp(&b.fingerprint))
58    });
59    groups.truncate(top);
60    DuplicatesResult { groups }
61}
62
63/// Render duplicates as deterministic terminal text.
64pub fn render_duplicates_text(r: &DuplicatesResult) -> String {
65    let mut out = String::new();
66    out.push_str("== wiretrail duplicates ==\n");
67    for g in &r.groups {
68        let tag = if g.is_retry_pattern {
69            " [retry pattern]"
70        } else {
71            ""
72        };
73        out.push_str(&format!(
74            "\n{:>4}x{}  {} {}{}\n",
75            g.count, tag, g.method, g.host, g.norm_path
76        ));
77        let statuses: Vec<String> = g.statuses.iter().map(|(s, c)| format!("{s}:{c}")).collect();
78        out.push_str(&format!("  statuses: {}\n", statuses.join(" ")));
79        out.push_str(&format!("  entries: {}\n", g.entry_ids.join(", ")));
80    }
81    out
82}
83
84#[cfg(test)]
85mod tests {
86    use super::compute_duplicates;
87    use crate::filter::Filter;
88    use crate::model::{sample_capture, sample_entry};
89
90    fn cap() -> crate::model::Capture {
91        sample_capture(vec![
92            sample_entry(0, "h", "POST", "/resolve", 200),
93            sample_entry(1, "h", "POST", "/resolve", 200),
94            sample_entry(2, "h", "POST", "/resolve", 200),
95            sample_entry(3, "h", "GET", "/once", 200), // unique -> not a duplicate
96        ])
97    }
98
99    #[test]
100    fn reports_only_groups_with_repeats() {
101        let r = compute_duplicates(&cap(), &Filter::parse(&[]).unwrap(), 10);
102        assert_eq!(r.groups.len(), 1);
103        let g = &r.groups[0];
104        assert_eq!(g.count, 3);
105        assert_eq!(g.method, "POST");
106        assert_eq!(g.norm_path, "/resolve");
107        assert_eq!(g.entry_ids, vec!["e000000", "e000001", "e000002"]);
108        assert!(!g.is_retry_pattern); // all 200
109    }
110
111    #[test]
112    fn flags_retry_pattern() {
113        let cap = sample_capture(vec![
114            sample_entry(0, "h", "POST", "/x", 500),
115            sample_entry(1, "h", "POST", "/x", 200),
116        ]);
117        let r = compute_duplicates(&cap, &Filter::parse(&[]).unwrap(), 10);
118        assert!(r.groups[0].is_retry_pattern);
119    }
120}