Skip to main content

har/analysis/
diff.rs

1use crate::filter::Filter;
2use crate::model::{Capture, Entry};
3use crate::redact::redact_query_value;
4use ahash::{AHashMap, AHashSet};
5use serde::Serialize;
6
7const VOLATILE_KEYS: &[&str] = &[
8    "timestamp",
9    "ts",
10    "nonce",
11    "date",
12    "cb",
13    "cachebuster",
14    "requestid",
15    "request_id",
16    "_",
17];
18const SAMPLE_CAP: usize = 3;
19
20#[derive(Debug, Serialize)]
21pub struct DiffResult {
22    pub groups: Vec<DiffGroup>,
23}
24
25#[derive(Debug, Serialize)]
26pub struct DiffGroup {
27    pub host: String,
28    pub method: String,
29    pub norm_path: String,
30    pub count: usize,
31    pub varying_query: Vec<QueryVariance>,
32    pub varying_header_names: Vec<String>,
33    pub body_verdict: String,
34    pub entry_ids: Vec<String>,
35}
36
37#[derive(Debug, Serialize)]
38pub struct QueryVariance {
39    pub key: String,
40    pub samples: Vec<String>,
41}
42
43/// Show what varies across repeated calls to the same (method, host, norm_path).
44pub fn compute_diff(
45    cap: &Capture,
46    filter: &Filter,
47    top: usize,
48    unsafe_include: bool,
49) -> DiffResult {
50    let entries: Vec<&Entry> = cap.entries.iter().filter(|e| filter.matches(e)).collect();
51
52    let mut by_route: AHashMap<(String, String, String), Vec<&Entry>> = AHashMap::new();
53    for e in &entries {
54        by_route
55            .entry((
56                e.method.to_ascii_uppercase(),
57                e.host.clone(),
58                e.norm_path.clone(),
59            ))
60            .or_default()
61            .push(e);
62    }
63
64    let mut groups: Vec<DiffGroup> = Vec::new();
65    for ((method, host, norm_path), mut g) in by_route {
66        if g.len() < 2 {
67            continue;
68        }
69        g.sort_by_key(|a| a.index);
70
71        let varying_query = varying_query(&g, unsafe_include);
72        let varying_header_names = varying_header_names(&g);
73        let body_verdict = body_verdict(&g);
74
75        let has_variance = !varying_query.is_empty()
76            || !varying_header_names.is_empty()
77            || body_verdict == "volatile-only"
78            || body_verdict == "meaningful";
79        if !has_variance {
80            continue;
81        }
82
83        groups.push(DiffGroup {
84            host,
85            method,
86            norm_path,
87            count: g.len(),
88            varying_query,
89            varying_header_names,
90            body_verdict,
91            entry_ids: g.iter().map(|e| e.id.clone()).collect(),
92        });
93    }
94
95    groups.sort_by(|a, b| {
96        b.count
97            .cmp(&a.count)
98            .then(a.host.cmp(&b.host))
99            .then(a.norm_path.cmp(&b.norm_path))
100    });
101    groups.truncate(top);
102    DiffResult { groups }
103}
104
105fn varying_query(members: &[&Entry], unsafe_include: bool) -> Vec<QueryVariance> {
106    let all_keys: AHashSet<String> = members
107        .iter()
108        .flat_map(|e| e.query.iter().map(|(k, _)| k.clone()))
109        .collect();
110    let mut out = Vec::new();
111    for k in all_keys {
112        let mut values: Vec<String> = Vec::new();
113        let mut distinct: AHashSet<String> = AHashSet::new();
114        for e in members {
115            let v = e
116                .query
117                .iter()
118                .find(|(qk, _)| *qk == k)
119                .map(|(_, v)| v.clone())
120                .unwrap_or_default();
121            if distinct.insert(v.clone()) {
122                values.push(redact_query_value(&k, &v, unsafe_include));
123            }
124        }
125        if distinct.len() > 1 {
126            values.truncate(SAMPLE_CAP);
127            out.push(QueryVariance {
128                key: k,
129                samples: values,
130            });
131        }
132    }
133    out.sort_by(|a, b| a.key.cmp(&b.key));
134    out
135}
136
137fn varying_header_names(members: &[&Entry]) -> Vec<String> {
138    let all_names: AHashSet<String> = members
139        .iter()
140        .flat_map(|e| e.req_headers.iter().map(|(n, _)| n.to_ascii_lowercase()))
141        .collect();
142    let mut out = Vec::new();
143    for name in all_names {
144        let mut distinct: AHashSet<String> = AHashSet::new();
145        for e in members {
146            let v = e
147                .req_headers
148                .iter()
149                .find(|(n, _)| n.eq_ignore_ascii_case(&name))
150                .map(|(_, v)| v.clone())
151                .unwrap_or_default();
152            distinct.insert(v);
153        }
154        if distinct.len() > 1 {
155            out.push(name);
156        }
157    }
158    out.sort();
159    out
160}
161
162fn is_volatile(key: &str) -> bool {
163    let lk = key.to_ascii_lowercase();
164    VOLATILE_KEYS.iter().any(|v| lk == *v || lk.contains(v))
165}
166
167fn body_verdict(members: &[&Entry]) -> String {
168    let bodies: Vec<&String> = members
169        .iter()
170        .filter_map(|e| e.req_body.as_ref().filter(|b| !b.is_empty()))
171        .collect();
172    if bodies.len() < 2 {
173        return "none".to_string();
174    }
175    if bodies.iter().all(|b| *b == bodies[0]) {
176        return "identical".to_string();
177    }
178    // Try parsing every body as a JSON object.
179    let objs: Option<Vec<serde_json::Map<String, serde_json::Value>>> = bodies
180        .iter()
181        .map(|b| {
182            serde_json::from_str::<serde_json::Value>(b)
183                .ok()
184                .and_then(|v| v.as_object().cloned())
185        })
186        .collect();
187    if let Some(objs) = objs {
188        let mut keys: AHashSet<String> = AHashSet::new();
189        for o in &objs {
190            for k in o.keys() {
191                keys.insert(k.clone());
192            }
193        }
194        let mut differing: Vec<String> = Vec::new();
195        for k in &keys {
196            let mut distinct: AHashSet<String> = AHashSet::new();
197            for o in &objs {
198                distinct.insert(o.get(k).map(|v| v.to_string()).unwrap_or_default());
199            }
200            if distinct.len() > 1 {
201                differing.push(k.clone());
202            }
203        }
204        if differing.iter().all(|k| is_volatile(k)) {
205            return "volatile-only".to_string();
206        }
207        return "meaningful".to_string();
208    }
209    "meaningful".to_string()
210}
211
212/// Render diff groups as deterministic terminal text.
213pub fn render_diff_text(r: &DiffResult) -> String {
214    let mut out = String::new();
215    out.push_str("== wiretrail diff ==\n");
216    for g in &r.groups {
217        out.push_str(&format!(
218            "\n{} {}{}  ({} calls, body: {})\n",
219            g.method, g.host, g.norm_path, g.count, g.body_verdict
220        ));
221        for q in &g.varying_query {
222            out.push_str(&format!(
223                "  query {} varies: {}\n",
224                q.key,
225                q.samples.join(", ")
226            ));
227        }
228        if !g.varying_header_names.is_empty() {
229            out.push_str(&format!(
230                "  headers vary: {}\n",
231                g.varying_header_names.join(", ")
232            ));
233        }
234    }
235    out
236}
237
238#[cfg(test)]
239mod tests {
240    use super::compute_diff;
241    use crate::filter::Filter;
242    use crate::model::{Entry, sample_capture, sample_entry};
243
244    fn post(index: usize, body: &str) -> Entry {
245        let mut e = sample_entry(index, "api.x", "POST", "/items", 200);
246        e.req_body = Some(body.to_string());
247        e
248    }
249
250    #[test]
251    fn body_volatile_only() {
252        let cap = sample_capture(vec![
253            post(0, r#"{"name":"a","ts":1}"#),
254            post(1, r#"{"name":"a","ts":2}"#),
255        ]);
256        let r = compute_diff(&cap, &Filter::parse(&[]).unwrap(), 10, false);
257        let g = r.groups.iter().find(|g| g.norm_path == "/items").unwrap();
258        assert_eq!(g.body_verdict, "volatile-only");
259    }
260
261    #[test]
262    fn body_meaningful() {
263        let cap = sample_capture(vec![post(0, r#"{"name":"a"}"#), post(1, r#"{"name":"b"}"#)]);
264        let r = compute_diff(&cap, &Filter::parse(&[]).unwrap(), 10, false);
265        let g = r.groups.iter().find(|g| g.norm_path == "/items").unwrap();
266        assert_eq!(g.body_verdict, "meaningful");
267    }
268
269    #[test]
270    fn varying_query_is_reported_and_redacted() {
271        let mut a = sample_entry(0, "api.x", "GET", "/y", 200);
272        a.query = vec![("page".into(), "1".into()), ("token".into(), "AAA".into())];
273        let mut b = sample_entry(1, "api.x", "GET", "/y", 200);
274        b.query = vec![("page".into(), "2".into()), ("token".into(), "BBB".into())];
275        let r = compute_diff(
276            &sample_capture(vec![a, b]),
277            &Filter::parse(&[]).unwrap(),
278            10,
279            false,
280        );
281        let g = r.groups.iter().find(|g| g.norm_path == "/y").unwrap();
282        let keys: Vec<&str> = g.varying_query.iter().map(|q| q.key.as_str()).collect();
283        assert!(keys.contains(&"page"));
284        // sensitive query value is redacted in samples
285        let tok = g.varying_query.iter().find(|q| q.key == "token").unwrap();
286        assert!(tok.samples.iter().all(|s| s == "<redacted>"));
287    }
288
289    #[test]
290    fn identical_group_is_skipped() {
291        let cap = sample_capture(vec![post(0, r#"{"name":"a"}"#), post(1, r#"{"name":"a"}"#)]);
292        let r = compute_diff(&cap, &Filter::parse(&[]).unwrap(), 10, false);
293        assert!(r.groups.is_empty()); // no variance -> not reported (duplicates covers it)
294    }
295}