1use crate::filter::Filter;
2use crate::grouping::densest_window;
3use crate::model::{Capture, Entry};
4use ahash::{AHashMap, AHashSet};
5use serde::Serialize;
6
7const PAGE_KEYS: &[&str] = &[
8 "page",
9 "offset",
10 "cursor",
11 "page_token",
12 "after",
13 "before",
14 "start",
15 "limit",
16 "p",
17 "pagenumber",
18 "page_number",
19];
20
21#[derive(Debug, Serialize)]
22pub struct PaginationResult {
23 pub pages: Vec<PageSeq>,
24 pub nplus1: Vec<NPlusOne>,
25}
26
27#[derive(Debug, Serialize)]
28pub struct PageSeq {
29 pub host: String,
30 pub method: String,
31 pub norm_path: String,
32 pub param_keys: Vec<String>,
33 pub page_count: usize,
34 pub repeated_cursor: bool,
35 pub excessive: bool,
36 pub entry_ids: Vec<String>,
37}
38
39#[derive(Debug, Serialize)]
40pub struct NPlusOne {
41 pub host: String,
42 pub method: String,
43 pub norm_path: String,
44 pub fanout: usize,
45 pub parent_id: Option<String>,
46 pub first_offset_ms: f64,
47 pub last_offset_ms: f64,
48 pub entry_ids: Vec<String>,
49}
50
51fn is_id_bearing(np: &str) -> bool {
52 np.contains("{id}") || np.contains("{blob}")
53}
54
55pub fn compute_pagination(
57 cap: &Capture,
58 filter: &Filter,
59 max_pages: usize,
60 fanout_min: usize,
61 window_ms: u64,
62 top: usize,
63) -> PaginationResult {
64 let entries: Vec<&Entry> = cap.entries.iter().filter(|e| filter.matches(e)).collect();
65
66 let mut by_route: AHashMap<(String, String, String), Vec<&Entry>> = AHashMap::new();
67 for e in &entries {
68 by_route
69 .entry((
70 e.method.to_ascii_uppercase(),
71 e.host.clone(),
72 e.norm_path.clone(),
73 ))
74 .or_default()
75 .push(e);
76 }
77
78 let mut pages = Vec::new();
79 let mut nplus1 = Vec::new();
80
81 for ((method, host, np), mut group) in by_route {
82 group.sort_by(|a, b| {
83 a.started_offset_ms
84 .partial_cmp(&b.started_offset_ms)
85 .unwrap_or(std::cmp::Ordering::Equal)
86 .then(a.index.cmp(&b.index))
87 });
88
89 if group.len() >= 2 {
94 let varying = varying_query_keys(&group);
95 let non_page_varies = varying
96 .iter()
97 .any(|k| !PAGE_KEYS.contains(&k.to_ascii_lowercase().as_str()));
98 let mut page_keys_present: Vec<String> = page_keys_in(&group);
99 page_keys_present.sort();
100
101 if !page_keys_present.is_empty() && !non_page_varies {
102 let mut values: Vec<String> = Vec::new();
103 for e in &group {
104 for (k, v) in &e.query {
105 if page_keys_present.iter().any(|pk| pk == k) {
106 values.push(v.clone());
107 }
108 }
109 }
110 let repeated_cursor = has_duplicate(&values);
111 pages.push(PageSeq {
112 host: host.clone(),
113 method: method.clone(),
114 norm_path: np.clone(),
115 param_keys: page_keys_present,
116 page_count: group.len(),
117 repeated_cursor,
118 excessive: group.len() > max_pages,
119 entry_ids: group.iter().map(|e| e.id.clone()).collect(),
120 });
121 }
122 }
123
124 if is_id_bearing(&np) && group.len() >= fanout_min {
126 let (count, l, r) = densest_window(&group, window_ms as f64);
127 if count >= fanout_min {
128 let win = &group[l..=r];
129 let first = win.first().unwrap().started_offset_ms;
130 let parent_id = parent_list_call(&entries, &host, first);
131 nplus1.push(NPlusOne {
132 host: host.clone(),
133 method: method.clone(),
134 norm_path: np.clone(),
135 fanout: count,
136 parent_id,
137 first_offset_ms: first,
138 last_offset_ms: win.last().unwrap().started_offset_ms,
139 entry_ids: win.iter().map(|e| e.id.clone()).collect(),
140 });
141 }
142 }
143 }
144
145 pages.sort_by(|a, b| {
146 b.page_count
147 .cmp(&a.page_count)
148 .then(a.norm_path.cmp(&b.norm_path))
149 });
150 nplus1.sort_by(|a, b| b.fanout.cmp(&a.fanout).then(a.norm_path.cmp(&b.norm_path)));
151 pages.truncate(top);
152 nplus1.truncate(top);
153 PaginationResult { pages, nplus1 }
154}
155
156fn varying_query_keys(members: &[&Entry]) -> Vec<String> {
158 let all_keys: AHashSet<String> = members
159 .iter()
160 .flat_map(|e| e.query.iter().map(|(k, _)| k.clone()))
161 .collect();
162 let mut varying: Vec<String> = Vec::new();
163 for k in all_keys {
164 let mut vals: AHashSet<String> = AHashSet::new();
165 for e in members {
166 let v = e
167 .query
168 .iter()
169 .find(|(qk, _)| *qk == k)
170 .map(|(_, v)| v.clone())
171 .unwrap_or_default();
172 vals.insert(v);
173 }
174 if vals.len() > 1 {
175 varying.push(k);
176 }
177 }
178 varying.sort();
179 varying
180}
181
182fn page_keys_in(members: &[&Entry]) -> Vec<String> {
184 let mut set: AHashSet<String> = AHashSet::new();
185 for e in members {
186 for (k, _) in &e.query {
187 if PAGE_KEYS.contains(&k.to_ascii_lowercase().as_str()) {
188 set.insert(k.clone());
189 }
190 }
191 }
192 set.into_iter().collect()
193}
194
195fn has_duplicate(values: &[String]) -> bool {
196 let mut seen: AHashSet<&String> = AHashSet::new();
197 for v in values {
198 if !seen.insert(v) {
199 return true;
200 }
201 }
202 false
203}
204
205fn parent_list_call(entries: &[&Entry], host: &str, before_offset: f64) -> Option<String> {
207 entries
208 .iter()
209 .filter(|e| {
210 e.host == host && e.started_offset_ms < before_offset && !is_id_bearing(&e.norm_path)
211 })
212 .max_by(|a, b| {
213 a.started_offset_ms
214 .partial_cmp(&b.started_offset_ms)
215 .unwrap_or(std::cmp::Ordering::Equal)
216 })
217 .map(|e| e.id.clone())
218}
219
220pub fn render_pagination_text(r: &PaginationResult) -> String {
222 let mut out = String::new();
223 out.push_str("== wiretrail pagination ==\n");
224 if !r.pages.is_empty() {
225 out.push_str("\npagination sequences:\n");
226 for p in &r.pages {
227 let mut tags = Vec::new();
228 if p.repeated_cursor {
229 tags.push("repeated-cursor");
230 }
231 if p.excessive {
232 tags.push("excessive");
233 }
234 let tagstr = if tags.is_empty() {
235 String::new()
236 } else {
237 format!(" [{}]", tags.join(", "))
238 };
239 out.push_str(&format!(
240 " {} pages {} {}{} (by {}){}\n",
241 p.page_count,
242 p.method,
243 p.host,
244 p.norm_path,
245 p.param_keys.join(","),
246 tagstr
247 ));
248 }
249 }
250 if !r.nplus1.is_empty() {
251 out.push_str("\nN+1 fan-out:\n");
252 for n in &r.nplus1 {
253 let parent = n.parent_id.as_deref().unwrap_or("-");
254 out.push_str(&format!(
255 " {}x {} {}{} (after {})\n",
256 n.fanout, n.method, n.host, n.norm_path, parent
257 ));
258 }
259 }
260 out
261}
262
263#[cfg(test)]
264mod tests {
265 use super::compute_pagination;
266 use crate::filter::Filter;
267 use crate::model::{Entry, sample_capture, sample_entry};
268
269 fn page(index: usize, page: &str) -> Entry {
270 let mut e = sample_entry(index, "api.x", "GET", "/items", 200);
271 e.query = vec![("page".to_string(), page.to_string())];
272 e.started_offset_ms = index as f64 * 10.0;
273 e
274 }
275
276 #[test]
277 fn detects_pagination_sequence() {
278 let cap = sample_capture(vec![page(0, "1"), page(1, "2"), page(2, "3")]);
279 let r = compute_pagination(&cap, &Filter::parse(&[]).unwrap(), 20, 5, 2000, 10);
280 assert_eq!(r.pages.len(), 1);
281 let p = &r.pages[0];
282 assert_eq!(p.page_count, 3);
283 assert_eq!(p.param_keys, vec!["page".to_string()]);
284 assert!(!p.repeated_cursor);
285 assert!(!p.excessive);
286 }
287
288 #[test]
289 fn flags_repeated_cursor() {
290 let cap = sample_capture(vec![page(0, "abc"), page(1, "abc")]);
291 let r = compute_pagination(&cap, &Filter::parse(&[]).unwrap(), 20, 5, 2000, 10);
292 assert!(r.pages[0].repeated_cursor);
293 }
294
295 #[test]
296 fn detects_nplus1_fanout() {
297 let mut es = Vec::new();
299 let mut list = sample_entry(0, "api.x", "GET", "/items", 200);
300 list.started_offset_ms = 0.0;
301 es.push(list);
302 for i in 1..=5 {
303 let mut e = sample_entry(i, "api.x", "GET", "/items/{id}", 200);
304 e.started_offset_ms = i as f64 * 20.0;
305 es.push(e);
306 }
307 let r = compute_pagination(
308 &sample_capture(es),
309 &Filter::parse(&[]).unwrap(),
310 20,
311 5,
312 2000,
313 10,
314 );
315 assert_eq!(r.nplus1.len(), 1);
316 let n = &r.nplus1[0];
317 assert_eq!(n.fanout, 5);
318 assert_eq!(n.norm_path, "/items/{id}");
319 assert_eq!(n.parent_id.as_deref(), Some("e000000"));
320 }
321}