har/analysis/
duplicates.rs1use crate::filter::Filter;
2use crate::grouping::{group_by_fingerprint, group_has_retry};
3use crate::model::Capture;
4use serde::Serialize;
5use std::collections::BTreeMap;
6
7#[derive(Debug, Serialize)]
8pub struct DuplicatesResult {
9 pub groups: Vec<DuplicateGroup>,
10}
11
12#[derive(Debug, Serialize)]
13pub struct DuplicateGroup {
14 pub fingerprint: String,
15 pub count: usize,
16 pub method: String,
17 pub host: String,
18 pub norm_path: String,
19 pub statuses: BTreeMap<String, usize>,
20 pub entry_ids: Vec<String>,
21 pub first_offset_ms: f64,
22 pub last_offset_ms: f64,
23 pub is_retry_pattern: bool,
24}
25
26pub fn compute_duplicates(cap: &Capture, filter: &Filter, top: usize) -> DuplicatesResult {
28 let entries: Vec<&crate::model::Entry> =
29 cap.entries.iter().filter(|e| filter.matches(e)).collect();
30
31 let mut groups: Vec<DuplicateGroup> = group_by_fingerprint(&entries)
32 .into_iter()
33 .filter(|(_, g)| g.len() >= 2)
34 .map(|(fp, g)| {
35 let mut statuses: BTreeMap<String, usize> = BTreeMap::new();
36 for e in &g {
37 *statuses.entry(e.status.to_string()).or_default() += 1;
38 }
39 DuplicateGroup {
40 fingerprint: fp,
41 count: g.len(),
42 method: g[0].method.to_ascii_uppercase(),
43 host: g[0].host.clone(),
44 norm_path: g[0].norm_path.clone(),
45 statuses,
46 entry_ids: g.iter().map(|e| e.id.clone()).collect(),
47 first_offset_ms: g.first().map(|e| e.started_offset_ms).unwrap_or(0.0),
48 last_offset_ms: g.last().map(|e| e.started_offset_ms).unwrap_or(0.0),
49 is_retry_pattern: group_has_retry(&g),
50 }
51 })
52 .collect();
53
54 groups.sort_by(|a, b| {
55 b.count
56 .cmp(&a.count)
57 .then(a.fingerprint.cmp(&b.fingerprint))
58 });
59 groups.truncate(top);
60 DuplicatesResult { groups }
61}
62
63pub fn render_duplicates_text(r: &DuplicatesResult) -> String {
65 let mut out = String::new();
66 out.push_str("== wiretrail duplicates ==\n");
67 for g in &r.groups {
68 let tag = if g.is_retry_pattern {
69 " [retry pattern]"
70 } else {
71 ""
72 };
73 out.push_str(&format!(
74 "\n{:>4}x{} {} {}{}\n",
75 g.count, tag, g.method, g.host, g.norm_path
76 ));
77 let statuses: Vec<String> = g.statuses.iter().map(|(s, c)| format!("{s}:{c}")).collect();
78 out.push_str(&format!(" statuses: {}\n", statuses.join(" ")));
79 out.push_str(&format!(" entries: {}\n", g.entry_ids.join(", ")));
80 }
81 out
82}
83
84#[cfg(test)]
85mod tests {
86 use super::compute_duplicates;
87 use crate::filter::Filter;
88 use crate::model::{sample_capture, sample_entry};
89
90 fn cap() -> crate::model::Capture {
91 sample_capture(vec![
92 sample_entry(0, "h", "POST", "/resolve", 200),
93 sample_entry(1, "h", "POST", "/resolve", 200),
94 sample_entry(2, "h", "POST", "/resolve", 200),
95 sample_entry(3, "h", "GET", "/once", 200), ])
97 }
98
99 #[test]
100 fn reports_only_groups_with_repeats() {
101 let r = compute_duplicates(&cap(), &Filter::parse(&[]).unwrap(), 10);
102 assert_eq!(r.groups.len(), 1);
103 let g = &r.groups[0];
104 assert_eq!(g.count, 3);
105 assert_eq!(g.method, "POST");
106 assert_eq!(g.norm_path, "/resolve");
107 assert_eq!(g.entry_ids, vec!["e000000", "e000001", "e000002"]);
108 assert!(!g.is_retry_pattern); }
110
111 #[test]
112 fn flags_retry_pattern() {
113 let cap = sample_capture(vec![
114 sample_entry(0, "h", "POST", "/x", 500),
115 sample_entry(1, "h", "POST", "/x", 200),
116 ]);
117 let r = compute_duplicates(&cap, &Filter::parse(&[]).unwrap(), 10);
118 assert!(r.groups[0].is_retry_pattern);
119 }
120}