1use crate::filter::Filter;
2use crate::model::{Capture, Entry};
3use crate::redact::redact_query_value;
4use ahash::{AHashMap, AHashSet};
5use serde::Serialize;
6
7const VOLATILE_KEYS: &[&str] = &[
8 "timestamp",
9 "ts",
10 "nonce",
11 "date",
12 "cb",
13 "cachebuster",
14 "requestid",
15 "request_id",
16 "_",
17];
18const SAMPLE_CAP: usize = 3;
19
20#[derive(Debug, Serialize)]
21pub struct DiffResult {
22 pub groups: Vec<DiffGroup>,
23}
24
25#[derive(Debug, Serialize)]
26pub struct DiffGroup {
27 pub host: String,
28 pub method: String,
29 pub norm_path: String,
30 pub count: usize,
31 pub varying_query: Vec<QueryVariance>,
32 pub varying_header_names: Vec<String>,
33 pub body_verdict: String,
34 pub entry_ids: Vec<String>,
35}
36
37#[derive(Debug, Serialize)]
38pub struct QueryVariance {
39 pub key: String,
40 pub samples: Vec<String>,
41}
42
43pub fn compute_diff(
45 cap: &Capture,
46 filter: &Filter,
47 top: usize,
48 unsafe_include: bool,
49) -> DiffResult {
50 let entries: Vec<&Entry> = cap.entries.iter().filter(|e| filter.matches(e)).collect();
51
52 let mut by_route: AHashMap<(String, String, String), Vec<&Entry>> = AHashMap::new();
53 for e in &entries {
54 by_route
55 .entry((
56 e.method.to_ascii_uppercase(),
57 e.host.clone(),
58 e.norm_path.clone(),
59 ))
60 .or_default()
61 .push(e);
62 }
63
64 let mut groups: Vec<DiffGroup> = Vec::new();
65 for ((method, host, norm_path), mut g) in by_route {
66 if g.len() < 2 {
67 continue;
68 }
69 g.sort_by_key(|a| a.index);
70
71 let varying_query = varying_query(&g, unsafe_include);
72 let varying_header_names = varying_header_names(&g);
73 let body_verdict = body_verdict(&g);
74
75 let has_variance = !varying_query.is_empty()
76 || !varying_header_names.is_empty()
77 || body_verdict == "volatile-only"
78 || body_verdict == "meaningful";
79 if !has_variance {
80 continue;
81 }
82
83 groups.push(DiffGroup {
84 host,
85 method,
86 norm_path,
87 count: g.len(),
88 varying_query,
89 varying_header_names,
90 body_verdict,
91 entry_ids: g.iter().map(|e| e.id.clone()).collect(),
92 });
93 }
94
95 groups.sort_by(|a, b| {
96 b.count
97 .cmp(&a.count)
98 .then(a.host.cmp(&b.host))
99 .then(a.norm_path.cmp(&b.norm_path))
100 });
101 groups.truncate(top);
102 DiffResult { groups }
103}
104
105fn varying_query(members: &[&Entry], unsafe_include: bool) -> Vec<QueryVariance> {
106 let all_keys: AHashSet<String> = members
107 .iter()
108 .flat_map(|e| e.query.iter().map(|(k, _)| k.clone()))
109 .collect();
110 let mut out = Vec::new();
111 for k in all_keys {
112 let mut values: Vec<String> = Vec::new();
113 let mut distinct: AHashSet<String> = AHashSet::new();
114 for e in members {
115 let v = e
116 .query
117 .iter()
118 .find(|(qk, _)| *qk == k)
119 .map(|(_, v)| v.clone())
120 .unwrap_or_default();
121 if distinct.insert(v.clone()) {
122 values.push(redact_query_value(&k, &v, unsafe_include));
123 }
124 }
125 if distinct.len() > 1 {
126 values.truncate(SAMPLE_CAP);
127 out.push(QueryVariance {
128 key: k,
129 samples: values,
130 });
131 }
132 }
133 out.sort_by(|a, b| a.key.cmp(&b.key));
134 out
135}
136
137fn varying_header_names(members: &[&Entry]) -> Vec<String> {
138 let all_names: AHashSet<String> = members
139 .iter()
140 .flat_map(|e| e.req_headers.iter().map(|(n, _)| n.to_ascii_lowercase()))
141 .collect();
142 let mut out = Vec::new();
143 for name in all_names {
144 let mut distinct: AHashSet<String> = AHashSet::new();
145 for e in members {
146 let v = e
147 .req_headers
148 .iter()
149 .find(|(n, _)| n.eq_ignore_ascii_case(&name))
150 .map(|(_, v)| v.clone())
151 .unwrap_or_default();
152 distinct.insert(v);
153 }
154 if distinct.len() > 1 {
155 out.push(name);
156 }
157 }
158 out.sort();
159 out
160}
161
162fn is_volatile(key: &str) -> bool {
163 let lk = key.to_ascii_lowercase();
164 VOLATILE_KEYS.iter().any(|v| lk == *v || lk.contains(v))
165}
166
167fn body_verdict(members: &[&Entry]) -> String {
168 let bodies: Vec<&String> = members
169 .iter()
170 .filter_map(|e| e.req_body.as_ref().filter(|b| !b.is_empty()))
171 .collect();
172 if bodies.len() < 2 {
173 return "none".to_string();
174 }
175 if bodies.iter().all(|b| *b == bodies[0]) {
176 return "identical".to_string();
177 }
178 let objs: Option<Vec<serde_json::Map<String, serde_json::Value>>> = bodies
180 .iter()
181 .map(|b| {
182 serde_json::from_str::<serde_json::Value>(b)
183 .ok()
184 .and_then(|v| v.as_object().cloned())
185 })
186 .collect();
187 if let Some(objs) = objs {
188 let mut keys: AHashSet<String> = AHashSet::new();
189 for o in &objs {
190 for k in o.keys() {
191 keys.insert(k.clone());
192 }
193 }
194 let mut differing: Vec<String> = Vec::new();
195 for k in &keys {
196 let mut distinct: AHashSet<String> = AHashSet::new();
197 for o in &objs {
198 distinct.insert(o.get(k).map(|v| v.to_string()).unwrap_or_default());
199 }
200 if distinct.len() > 1 {
201 differing.push(k.clone());
202 }
203 }
204 if differing.iter().all(|k| is_volatile(k)) {
205 return "volatile-only".to_string();
206 }
207 return "meaningful".to_string();
208 }
209 "meaningful".to_string()
210}
211
212pub fn render_diff_text(r: &DiffResult) -> String {
214 let mut out = String::new();
215 out.push_str("== wiretrail diff ==\n");
216 for g in &r.groups {
217 out.push_str(&format!(
218 "\n{} {}{} ({} calls, body: {})\n",
219 g.method, g.host, g.norm_path, g.count, g.body_verdict
220 ));
221 for q in &g.varying_query {
222 out.push_str(&format!(
223 " query {} varies: {}\n",
224 q.key,
225 q.samples.join(", ")
226 ));
227 }
228 if !g.varying_header_names.is_empty() {
229 out.push_str(&format!(
230 " headers vary: {}\n",
231 g.varying_header_names.join(", ")
232 ));
233 }
234 }
235 out
236}
237
238#[cfg(test)]
239mod tests {
240 use super::compute_diff;
241 use crate::filter::Filter;
242 use crate::model::{Entry, sample_capture, sample_entry};
243
244 fn post(index: usize, body: &str) -> Entry {
245 let mut e = sample_entry(index, "api.x", "POST", "/items", 200);
246 e.req_body = Some(body.to_string());
247 e
248 }
249
250 #[test]
251 fn body_volatile_only() {
252 let cap = sample_capture(vec![
253 post(0, r#"{"name":"a","ts":1}"#),
254 post(1, r#"{"name":"a","ts":2}"#),
255 ]);
256 let r = compute_diff(&cap, &Filter::parse(&[]).unwrap(), 10, false);
257 let g = r.groups.iter().find(|g| g.norm_path == "/items").unwrap();
258 assert_eq!(g.body_verdict, "volatile-only");
259 }
260
261 #[test]
262 fn body_meaningful() {
263 let cap = sample_capture(vec![post(0, r#"{"name":"a"}"#), post(1, r#"{"name":"b"}"#)]);
264 let r = compute_diff(&cap, &Filter::parse(&[]).unwrap(), 10, false);
265 let g = r.groups.iter().find(|g| g.norm_path == "/items").unwrap();
266 assert_eq!(g.body_verdict, "meaningful");
267 }
268
269 #[test]
270 fn varying_query_is_reported_and_redacted() {
271 let mut a = sample_entry(0, "api.x", "GET", "/y", 200);
272 a.query = vec![("page".into(), "1".into()), ("token".into(), "AAA".into())];
273 let mut b = sample_entry(1, "api.x", "GET", "/y", 200);
274 b.query = vec![("page".into(), "2".into()), ("token".into(), "BBB".into())];
275 let r = compute_diff(
276 &sample_capture(vec![a, b]),
277 &Filter::parse(&[]).unwrap(),
278 10,
279 false,
280 );
281 let g = r.groups.iter().find(|g| g.norm_path == "/y").unwrap();
282 let keys: Vec<&str> = g.varying_query.iter().map(|q| q.key.as_str()).collect();
283 assert!(keys.contains(&"page"));
284 let tok = g.varying_query.iter().find(|q| q.key == "token").unwrap();
286 assert!(tok.samples.iter().all(|s| s == "<redacted>"));
287 }
288
289 #[test]
290 fn identical_group_is_skipped() {
291 let cap = sample_capture(vec![post(0, r#"{"name":"a"}"#), post(1, r#"{"name":"a"}"#)]);
292 let r = compute_diff(&cap, &Filter::parse(&[]).unwrap(), 10, false);
293 assert!(r.groups.is_empty()); }
295}