Skip to main content

har/analysis/
pagination.rs

1use crate::filter::Filter;
2use crate::grouping::densest_window;
3use crate::model::{Capture, Entry};
4use ahash::{AHashMap, AHashSet};
5use serde::Serialize;
6
7const PAGE_KEYS: &[&str] = &[
8    "page",
9    "offset",
10    "cursor",
11    "page_token",
12    "after",
13    "before",
14    "start",
15    "limit",
16    "p",
17    "pagenumber",
18    "page_number",
19];
20
21#[derive(Debug, Serialize)]
22pub struct PaginationResult {
23    pub pages: Vec<PageSeq>,
24    pub nplus1: Vec<NPlusOne>,
25}
26
27#[derive(Debug, Serialize)]
28pub struct PageSeq {
29    pub host: String,
30    pub method: String,
31    pub norm_path: String,
32    pub param_keys: Vec<String>,
33    pub page_count: usize,
34    pub repeated_cursor: bool,
35    pub excessive: bool,
36    pub entry_ids: Vec<String>,
37}
38
39#[derive(Debug, Serialize)]
40pub struct NPlusOne {
41    pub host: String,
42    pub method: String,
43    pub norm_path: String,
44    pub fanout: usize,
45    pub parent_id: Option<String>,
46    pub first_offset_ms: f64,
47    pub last_offset_ms: f64,
48    pub entry_ids: Vec<String>,
49}
50
51fn is_id_bearing(np: &str) -> bool {
52    np.contains("{id}") || np.contains("{blob}")
53}
54
55/// Detect pagination sequences and N+1 fan-out clusters.
56pub fn compute_pagination(
57    cap: &Capture,
58    filter: &Filter,
59    max_pages: usize,
60    fanout_min: usize,
61    window_ms: u64,
62    top: usize,
63) -> PaginationResult {
64    let entries: Vec<&Entry> = cap.entries.iter().filter(|e| filter.matches(e)).collect();
65
66    let mut by_route: AHashMap<(String, String, String), Vec<&Entry>> = AHashMap::new();
67    for e in &entries {
68        by_route
69            .entry((
70                e.method.to_ascii_uppercase(),
71                e.host.clone(),
72                e.norm_path.clone(),
73            ))
74            .or_default()
75            .push(e);
76    }
77
78    let mut pages = Vec::new();
79    let mut nplus1 = Vec::new();
80
81    for ((method, host, np), mut group) in by_route {
82        group.sort_by(|a, b| {
83            a.started_offset_ms
84                .partial_cmp(&b.started_offset_ms)
85                .unwrap_or(std::cmp::Ordering::Equal)
86                .then(a.index.cmp(&b.index))
87        });
88
89        // --- pagination sequence ---
90        // A group of repeated calls to the same route that carry a pagination
91        // key, where nothing *other* than pagination keys varies. Covers both a
92        // normal page walk (cursor varies) and a loop (cursor repeats).
93        if group.len() >= 2 {
94            let varying = varying_query_keys(&group);
95            let non_page_varies = varying
96                .iter()
97                .any(|k| !PAGE_KEYS.contains(&k.to_ascii_lowercase().as_str()));
98            let mut page_keys_present: Vec<String> = page_keys_in(&group);
99            page_keys_present.sort();
100
101            if !page_keys_present.is_empty() && !non_page_varies {
102                let mut values: Vec<String> = Vec::new();
103                for e in &group {
104                    for (k, v) in &e.query {
105                        if page_keys_present.iter().any(|pk| pk == k) {
106                            values.push(v.clone());
107                        }
108                    }
109                }
110                let repeated_cursor = has_duplicate(&values);
111                pages.push(PageSeq {
112                    host: host.clone(),
113                    method: method.clone(),
114                    norm_path: np.clone(),
115                    param_keys: page_keys_present,
116                    page_count: group.len(),
117                    repeated_cursor,
118                    excessive: group.len() > max_pages,
119                    entry_ids: group.iter().map(|e| e.id.clone()).collect(),
120                });
121            }
122        }
123
124        // --- N+1 fan-out ---
125        if is_id_bearing(&np) && group.len() >= fanout_min {
126            let (count, l, r) = densest_window(&group, window_ms as f64);
127            if count >= fanout_min {
128                let win = &group[l..=r];
129                let first = win.first().unwrap().started_offset_ms;
130                let parent_id = parent_list_call(&entries, &host, first);
131                nplus1.push(NPlusOne {
132                    host: host.clone(),
133                    method: method.clone(),
134                    norm_path: np.clone(),
135                    fanout: count,
136                    parent_id,
137                    first_offset_ms: first,
138                    last_offset_ms: win.last().unwrap().started_offset_ms,
139                    entry_ids: win.iter().map(|e| e.id.clone()).collect(),
140                });
141            }
142        }
143    }
144
145    pages.sort_by(|a, b| {
146        b.page_count
147            .cmp(&a.page_count)
148            .then(a.norm_path.cmp(&b.norm_path))
149    });
150    nplus1.sort_by(|a, b| b.fanout.cmp(&a.fanout).then(a.norm_path.cmp(&b.norm_path)));
151    pages.truncate(top);
152    nplus1.truncate(top);
153    PaginationResult { pages, nplus1 }
154}
155
156/// Query keys whose value differs across the group (missing counts as a value).
157fn varying_query_keys(members: &[&Entry]) -> Vec<String> {
158    let all_keys: AHashSet<String> = members
159        .iter()
160        .flat_map(|e| e.query.iter().map(|(k, _)| k.clone()))
161        .collect();
162    let mut varying: Vec<String> = Vec::new();
163    for k in all_keys {
164        let mut vals: AHashSet<String> = AHashSet::new();
165        for e in members {
166            let v = e
167                .query
168                .iter()
169                .find(|(qk, _)| *qk == k)
170                .map(|(_, v)| v.clone())
171                .unwrap_or_default();
172            vals.insert(v);
173        }
174        if vals.len() > 1 {
175            varying.push(k);
176        }
177    }
178    varying.sort();
179    varying
180}
181
182/// Distinct pagination query keys present anywhere in the group.
183fn page_keys_in(members: &[&Entry]) -> Vec<String> {
184    let mut set: AHashSet<String> = AHashSet::new();
185    for e in members {
186        for (k, _) in &e.query {
187            if PAGE_KEYS.contains(&k.to_ascii_lowercase().as_str()) {
188                set.insert(k.clone());
189            }
190        }
191    }
192    set.into_iter().collect()
193}
194
195fn has_duplicate(values: &[String]) -> bool {
196    let mut seen: AHashSet<&String> = AHashSet::new();
197    for v in values {
198        if !seen.insert(v) {
199            return true;
200        }
201    }
202    false
203}
204
205/// The most recent non-id-bearing call to the same host before `before_offset`.
206fn parent_list_call(entries: &[&Entry], host: &str, before_offset: f64) -> Option<String> {
207    entries
208        .iter()
209        .filter(|e| {
210            e.host == host && e.started_offset_ms < before_offset && !is_id_bearing(&e.norm_path)
211        })
212        .max_by(|a, b| {
213            a.started_offset_ms
214                .partial_cmp(&b.started_offset_ms)
215                .unwrap_or(std::cmp::Ordering::Equal)
216        })
217        .map(|e| e.id.clone())
218}
219
220/// Render pagination/N+1 as deterministic terminal text.
221pub fn render_pagination_text(r: &PaginationResult) -> String {
222    let mut out = String::new();
223    out.push_str("== wiretrail pagination ==\n");
224    if !r.pages.is_empty() {
225        out.push_str("\npagination sequences:\n");
226        for p in &r.pages {
227            let mut tags = Vec::new();
228            if p.repeated_cursor {
229                tags.push("repeated-cursor");
230            }
231            if p.excessive {
232                tags.push("excessive");
233            }
234            let tagstr = if tags.is_empty() {
235                String::new()
236            } else {
237                format!(" [{}]", tags.join(", "))
238            };
239            out.push_str(&format!(
240                "  {} pages  {} {}{}  (by {}){}\n",
241                p.page_count,
242                p.method,
243                p.host,
244                p.norm_path,
245                p.param_keys.join(","),
246                tagstr
247            ));
248        }
249    }
250    if !r.nplus1.is_empty() {
251        out.push_str("\nN+1 fan-out:\n");
252        for n in &r.nplus1 {
253            let parent = n.parent_id.as_deref().unwrap_or("-");
254            out.push_str(&format!(
255                "  {}x  {} {}{}  (after {})\n",
256                n.fanout, n.method, n.host, n.norm_path, parent
257            ));
258        }
259    }
260    out
261}
262
263#[cfg(test)]
264mod tests {
265    use super::compute_pagination;
266    use crate::filter::Filter;
267    use crate::model::{Entry, sample_capture, sample_entry};
268
269    fn page(index: usize, page: &str) -> Entry {
270        let mut e = sample_entry(index, "api.x", "GET", "/items", 200);
271        e.query = vec![("page".to_string(), page.to_string())];
272        e.started_offset_ms = index as f64 * 10.0;
273        e
274    }
275
276    #[test]
277    fn detects_pagination_sequence() {
278        let cap = sample_capture(vec![page(0, "1"), page(1, "2"), page(2, "3")]);
279        let r = compute_pagination(&cap, &Filter::parse(&[]).unwrap(), 20, 5, 2000, 10);
280        assert_eq!(r.pages.len(), 1);
281        let p = &r.pages[0];
282        assert_eq!(p.page_count, 3);
283        assert_eq!(p.param_keys, vec!["page".to_string()]);
284        assert!(!p.repeated_cursor);
285        assert!(!p.excessive);
286    }
287
288    #[test]
289    fn flags_repeated_cursor() {
290        let cap = sample_capture(vec![page(0, "abc"), page(1, "abc")]);
291        let r = compute_pagination(&cap, &Filter::parse(&[]).unwrap(), 20, 5, 2000, 10);
292        assert!(r.pages[0].repeated_cursor);
293    }
294
295    #[test]
296    fn detects_nplus1_fanout() {
297        // one list call, then 5 detail calls to an {id} endpoint within window
298        let mut es = Vec::new();
299        let mut list = sample_entry(0, "api.x", "GET", "/items", 200);
300        list.started_offset_ms = 0.0;
301        es.push(list);
302        for i in 1..=5 {
303            let mut e = sample_entry(i, "api.x", "GET", "/items/{id}", 200);
304            e.started_offset_ms = i as f64 * 20.0;
305            es.push(e);
306        }
307        let r = compute_pagination(
308            &sample_capture(es),
309            &Filter::parse(&[]).unwrap(),
310            20,
311            5,
312            2000,
313            10,
314        );
315        assert_eq!(r.nplus1.len(), 1);
316        let n = &r.nplus1[0];
317        assert_eq!(n.fanout, 5);
318        assert_eq!(n.norm_path, "/items/{id}");
319        assert_eq!(n.parent_id.as_deref(), Some("e000000"));
320    }
321}