Skip to main content

har/
grouping.rs

1use crate::fingerprint::fingerprint;
2use crate::model::Entry;
3use ahash::{AHashMap, AHashSet};
4
5/// Group entries by duplicate fingerprint. Each group's entries are sorted by
6/// (started_offset_ms, index). Groups are returned sorted by descending size,
7/// then fingerprint, for determinism.
8pub fn group_by_fingerprint<'a>(entries: &[&'a Entry]) -> Vec<(String, Vec<&'a Entry>)> {
9    let mut map: AHashMap<String, Vec<&'a Entry>> = AHashMap::new();
10    for e in entries {
11        map.entry(fingerprint(e)).or_default().push(e);
12    }
13    let mut groups: Vec<(String, Vec<&'a Entry>)> = map.into_iter().collect();
14    for (_, g) in groups.iter_mut() {
15        g.sort_by(|a, b| {
16            a.started_offset_ms
17                .partial_cmp(&b.started_offset_ms)
18                .unwrap_or(std::cmp::Ordering::Equal)
19                .then(a.index.cmp(&b.index))
20        });
21    }
22    groups.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then(a.0.cmp(&b.0)));
23    groups
24}
25
26/// A request whose status indicates a transient failure worth retrying.
27pub fn is_retry_trigger(e: &Entry) -> bool {
28    e.status == 0 || e.status == 429 || e.status_class() == 5
29}
30
31/// True if a time-ordered fingerprint group contains an attempt that follows a
32/// failed earlier attempt (i.e. the group exhibits retry behavior).
33pub fn group_has_retry(group: &[&Entry]) -> bool {
34    let mut seen_failure = false;
35    for e in group {
36        if seen_failure {
37            return true;
38        }
39        if is_retry_trigger(e) {
40            seen_failure = true;
41        }
42    }
43    false
44}
45
46/// IDs of entries classified as retries across all fingerprint groups.
47pub fn retry_entry_ids(entries: &[&Entry]) -> AHashSet<String> {
48    let mut out = AHashSet::new();
49    for (_, group) in group_by_fingerprint(entries) {
50        let mut seen_failure = false;
51        for e in &group {
52            if seen_failure {
53                out.insert(e.id.clone());
54            }
55            if is_retry_trigger(e) {
56                seen_failure = true;
57            }
58        }
59    }
60    out
61}
62
63/// Densest sliding window over entries pre-sorted by `started_offset_ms`.
64/// Returns `(count, left_idx, right_idx)` (inclusive) of the most populous
65/// window no wider than `window_ms`. Returns `(0, 0, 0)` for an empty slice.
66pub fn densest_window(entries: &[&Entry], window_ms: f64) -> (usize, usize, usize) {
67    if entries.is_empty() {
68        return (0, 0, 0);
69    }
70    let mut best = (1usize, 0usize, 0usize);
71    let mut l = 0usize;
72    for r in 0..entries.len() {
73        while entries[r].started_offset_ms - entries[l].started_offset_ms > window_ms {
74            l += 1;
75        }
76        let count = r - l + 1;
77        if count > best.0 {
78            best = (count, l, r);
79        }
80    }
81    best
82}
83
84#[cfg(test)]
85mod tests {
86    use super::{densest_window, group_by_fingerprint, group_has_retry, retry_entry_ids};
87    use crate::model::{Entry, sample_capture, sample_entry};
88
89    fn refs(cap: &crate::model::Capture) -> Vec<&Entry> {
90        cap.entries.iter().collect()
91    }
92
93    #[test]
94    fn groups_and_sorts_by_size() {
95        let cap = sample_capture(vec![
96            sample_entry(0, "h", "GET", "/a", 200),
97            sample_entry(1, "h", "GET", "/a", 200),
98            sample_entry(2, "h", "GET", "/b", 200),
99        ]);
100        let groups = group_by_fingerprint(&refs(&cap));
101        // /a group (2) sorts before /b group (1)
102        assert_eq!(groups[0].1.len(), 2);
103        assert_eq!(groups[1].1.len(), 1);
104    }
105
106    #[test]
107    fn retries_need_a_prior_failure() {
108        // three identical calls: 500, 500, 200 -> 2nd and 3rd are retries
109        let cap = sample_capture(vec![
110            sample_entry(0, "h", "POST", "/x", 500),
111            sample_entry(1, "h", "POST", "/x", 500),
112            sample_entry(2, "h", "POST", "/x", 200),
113        ]);
114        let ids = retry_entry_ids(&refs(&cap));
115        assert!(ids.contains("e000001"));
116        assert!(ids.contains("e000002"));
117        assert!(!ids.contains("e000000"));
118    }
119
120    #[test]
121    fn pure_duplicates_are_not_retries() {
122        // all 200 -> wasteful duplicates, no retries
123        let cap = sample_capture(vec![
124            sample_entry(0, "h", "POST", "/x", 200),
125            sample_entry(1, "h", "POST", "/x", 200),
126        ]);
127        assert!(retry_entry_ids(&refs(&cap)).is_empty());
128        let groups = group_by_fingerprint(&refs(&cap));
129        assert!(!group_has_retry(&groups[0].1));
130    }
131
132    fn off(index: usize, offset_ms: f64) -> Entry {
133        let mut e = sample_entry(index, "h", "GET", "/x", 200);
134        e.started_offset_ms = offset_ms;
135        e
136    }
137
138    #[test]
139    fn densest_window_finds_burst() {
140        // offsets 0,50,100,150,1000 with a 200ms window -> densest is the first 4
141        let cap = sample_capture(vec![
142            off(0, 0.0),
143            off(1, 50.0),
144            off(2, 100.0),
145            off(3, 150.0),
146            off(4, 1000.0),
147        ]);
148        let refs: Vec<&Entry> = cap.entries.iter().collect();
149        let (count, l, r) = densest_window(&refs, 200.0);
150        assert_eq!(count, 4);
151        assert_eq!((l, r), (0, 3));
152    }
153}