Skip to main content

har/analysis/
compare.rs

1use crate::filter::Filter;
2use crate::model::{Capture, Entry};
3use crate::stats::percentiles;
4use ahash::{AHashMap, AHashSet};
5use serde::Serialize;
6
7#[derive(Debug, Serialize)]
8pub struct CompareResult {
9    pub new_hosts: Vec<String>,
10    pub removed_hosts: Vec<String>,
11    pub new_endpoints: Vec<String>,
12    pub removed_endpoints: Vec<String>,
13    pub new_errors: Vec<EndpointDelta>,
14    pub latency_regressions: Vec<LatencyDelta>,
15    pub payload_growth: Vec<SizeDelta>,
16    pub max_severity: String,
17}
18
19#[derive(Debug, Serialize)]
20pub struct EndpointDelta {
21    pub endpoint: String,
22    pub status: i64,
23    pub count: usize,
24    pub severity: String,
25}
26
27#[derive(Debug, Serialize)]
28pub struct LatencyDelta {
29    pub endpoint: String,
30    pub base_p50_ms: f64,
31    pub new_p50_ms: f64,
32    pub severity: String,
33}
34
35#[derive(Debug, Serialize)]
36pub struct SizeDelta {
37    pub endpoint: String,
38    pub base_bytes: i64,
39    pub new_bytes: i64,
40    pub severity: String,
41}
42
43/// Severity ordering shared with the CLI `--fail-on` gate.
44pub fn sev_rank(s: &str) -> u8 {
45    match s {
46        "critical" => 3,
47        "high" => 2,
48        "medium" => 1,
49        _ => 0,
50    }
51}
52
53#[derive(Default)]
54struct Agg {
55    durations: Vec<f64>,
56    bytes: Vec<f64>,
57    error_statuses: Vec<i64>,
58}
59
60fn endpoint_key(e: &Entry) -> String {
61    format!(
62        "{} {}{}",
63        e.method.to_ascii_uppercase(),
64        e.host,
65        e.norm_path
66    )
67}
68
69fn aggregate(cap: &Capture, filter: &Filter) -> (AHashSet<String>, AHashMap<String, Agg>) {
70    let mut hosts = AHashSet::new();
71    let mut map: AHashMap<String, Agg> = AHashMap::new();
72    for e in cap.entries.iter().filter(|e| filter.matches(e)) {
73        hosts.insert(e.host.clone());
74        let a = map.entry(endpoint_key(e)).or_default();
75        a.durations.push(e.duration_ms);
76        a.bytes
77            .push(e.sizes.resp_content.max(e.sizes.resp_body).max(0) as f64);
78        let cls = e.status_class();
79        if cls == 4 || cls == 5 {
80            a.error_statuses.push(e.status);
81        }
82    }
83    (hosts, map)
84}
85
86/// Diff a new capture against a baseline; severity-score the regressions.
87pub fn compute_compare(
88    new: &Capture,
89    base: &Capture,
90    filter: &Filter,
91    top: usize,
92) -> CompareResult {
93    let (new_hosts_set, new_map) = aggregate(new, filter);
94    let (base_hosts_set, base_map) = aggregate(base, filter);
95
96    let mut new_hosts: Vec<String> = new_hosts_set.difference(&base_hosts_set).cloned().collect();
97    let mut removed_hosts: Vec<String> =
98        base_hosts_set.difference(&new_hosts_set).cloned().collect();
99
100    let new_keys: AHashSet<&String> = new_map.keys().collect();
101    let base_keys: AHashSet<&String> = base_map.keys().collect();
102    let mut new_endpoints: Vec<String> = new_keys
103        .difference(&base_keys)
104        .map(|s| (*s).clone())
105        .collect();
106    let mut removed_endpoints: Vec<String> = base_keys
107        .difference(&new_keys)
108        .map(|s| (*s).clone())
109        .collect();
110
111    let mut new_errors = Vec::new();
112    let mut latency_regressions = Vec::new();
113    let mut payload_growth = Vec::new();
114
115    for (ep, a) in &new_map {
116        // new errors: 4xx/5xx present in new but not in baseline for this endpoint
117        if !a.error_statuses.is_empty() {
118            let base_had = base_map
119                .get(ep)
120                .map(|b| !b.error_statuses.is_empty())
121                .unwrap_or(false);
122            if !base_had {
123                let worst = *a.error_statuses.iter().max().unwrap();
124                let severity = if worst / 100 == 5 { "high" } else { "medium" };
125                new_errors.push(EndpointDelta {
126                    endpoint: ep.clone(),
127                    status: worst,
128                    count: a.error_statuses.len(),
129                    severity: severity.into(),
130                });
131            }
132        }
133
134        if let Some(b) = base_map.get(ep) {
135            let np = percentiles(&a.durations).p50;
136            let bp = percentiles(&b.durations).p50;
137            if bp > 0.0 && np > bp * 2.0 && (np - bp) > 200.0 {
138                latency_regressions.push(LatencyDelta {
139                    endpoint: ep.clone(),
140                    base_p50_ms: bp,
141                    new_p50_ms: np,
142                    severity: "medium".into(),
143                });
144            }
145
146            let nb = percentiles(&a.bytes).p50;
147            let bb = percentiles(&b.bytes).p50;
148            if bb > 0.0 && nb > bb * 2.0 {
149                payload_growth.push(SizeDelta {
150                    endpoint: ep.clone(),
151                    base_bytes: bb as i64,
152                    new_bytes: nb as i64,
153                    severity: "low".into(),
154                });
155            }
156        }
157    }
158
159    new_hosts.sort();
160    removed_hosts.sort();
161    new_endpoints.sort();
162    removed_endpoints.sort();
163    new_errors.sort_by(|a, b| {
164        sev_rank(&b.severity)
165            .cmp(&sev_rank(&a.severity))
166            .then(b.count.cmp(&a.count))
167            .then(a.endpoint.cmp(&b.endpoint))
168    });
169    latency_regressions.sort_by(|a, b| {
170        (b.new_p50_ms - b.base_p50_ms)
171            .partial_cmp(&(a.new_p50_ms - a.base_p50_ms))
172            .unwrap_or(std::cmp::Ordering::Equal)
173            .then(a.endpoint.cmp(&b.endpoint))
174    });
175    payload_growth.sort_by(|a, b| {
176        (b.new_bytes - b.base_bytes)
177            .cmp(&(a.new_bytes - a.base_bytes))
178            .then(a.endpoint.cmp(&b.endpoint))
179    });
180
181    new_hosts.truncate(top);
182    removed_hosts.truncate(top);
183    new_endpoints.truncate(top);
184    removed_endpoints.truncate(top);
185    new_errors.truncate(top);
186    latency_regressions.truncate(top);
187    payload_growth.truncate(top);
188
189    let mut rank = 0u8;
190    for s in new_errors
191        .iter()
192        .map(|d| d.severity.as_str())
193        .chain(latency_regressions.iter().map(|d| d.severity.as_str()))
194        .chain(payload_growth.iter().map(|d| d.severity.as_str()))
195    {
196        rank = rank.max(sev_rank(s));
197    }
198    let any =
199        !new_errors.is_empty() || !latency_regressions.is_empty() || !payload_growth.is_empty();
200    let max_severity = match rank {
201        3 => "critical",
202        2 => "high",
203        1 => "medium",
204        _ if any => "low",
205        _ => "none",
206    }
207    .to_string();
208
209    CompareResult {
210        new_hosts,
211        removed_hosts,
212        new_endpoints,
213        removed_endpoints,
214        new_errors,
215        latency_regressions,
216        payload_growth,
217        max_severity,
218    }
219}
220
221/// Render the comparison as deterministic terminal text.
222pub fn render_compare_text(r: &CompareResult) -> String {
223    let mut out = String::new();
224    out.push_str("== wiretrail compare ==\n");
225    out.push_str(&format!("max severity: {}\n", r.max_severity));
226    if !r.new_hosts.is_empty() {
227        out.push_str(&format!("new hosts: {}\n", r.new_hosts.join(", ")));
228    }
229    if !r.removed_hosts.is_empty() {
230        out.push_str(&format!("removed hosts: {}\n", r.removed_hosts.join(", ")));
231    }
232    if !r.new_endpoints.is_empty() {
233        out.push_str(&format!("new endpoints: {}\n", r.new_endpoints.len()));
234    }
235    if !r.removed_endpoints.is_empty() {
236        out.push_str(&format!(
237            "removed endpoints: {}\n",
238            r.removed_endpoints.len()
239        ));
240    }
241    if !r.new_errors.is_empty() {
242        out.push_str("\nnew errors:\n");
243        for d in &r.new_errors {
244            out.push_str(&format!(
245                "  [{}] {} -> {} ({}x)\n",
246                d.severity, d.endpoint, d.status, d.count
247            ));
248        }
249    }
250    if !r.latency_regressions.is_empty() {
251        out.push_str("\nlatency regressions:\n");
252        for d in &r.latency_regressions {
253            out.push_str(&format!(
254                "  [{}] {} p50 {:.0}ms -> {:.0}ms\n",
255                d.severity, d.endpoint, d.base_p50_ms, d.new_p50_ms
256            ));
257        }
258    }
259    if !r.payload_growth.is_empty() {
260        out.push_str("\npayload growth:\n");
261        for d in &r.payload_growth {
262            out.push_str(&format!(
263                "  [{}] {} {}B -> {}B\n",
264                d.severity, d.endpoint, d.base_bytes, d.new_bytes
265            ));
266        }
267    }
268    out
269}
270
271#[cfg(test)]
272mod tests {
273    use super::compute_compare;
274    use crate::filter::Filter;
275    use crate::model::{Entry, sample_capture, sample_entry};
276
277    fn no_filter() -> Filter {
278        Filter::parse(&[]).unwrap()
279    }
280
281    #[test]
282    fn detects_new_host() {
283        let base = sample_capture(vec![sample_entry(0, "api.x", "GET", "/a", 200)]);
284        let new = sample_capture(vec![
285            sample_entry(0, "api.x", "GET", "/a", 200),
286            sample_entry(1, "api.y", "GET", "/b", 200),
287        ]);
288        let r = compute_compare(&new, &base, &no_filter(), 50);
289        assert!(r.new_hosts.contains(&"api.y".to_string()));
290        assert!(!r.new_hosts.contains(&"api.x".to_string()));
291    }
292
293    #[test]
294    fn detects_new_5xx_as_high() {
295        let base = sample_capture(vec![sample_entry(0, "api.x", "GET", "/a", 200)]);
296        let new = sample_capture(vec![sample_entry(0, "api.x", "GET", "/a", 500)]);
297        let r = compute_compare(&new, &base, &no_filter(), 50);
298        assert!(
299            r.new_errors
300                .iter()
301                .any(|d| d.status == 500 && d.severity == "high")
302        );
303        assert_eq!(r.max_severity, "high");
304    }
305
306    #[test]
307    fn detects_latency_regression() {
308        let mut b = sample_entry(0, "api.x", "GET", "/a", 200);
309        b.duration_ms = 100.0;
310        let mut n = sample_entry(0, "api.x", "GET", "/a", 200);
311        n.duration_ms = 900.0; // > 2x and > 200ms over baseline
312        let r = compute_compare(
313            &sample_capture(vec![n]),
314            &sample_capture(vec![b]),
315            &no_filter(),
316            50,
317        );
318        assert_eq!(r.latency_regressions.len(), 1);
319        assert_eq!(r.latency_regressions[0].severity, "medium");
320    }
321
322    #[test]
323    fn detects_payload_growth() {
324        let mut b: Entry = sample_entry(0, "api.x", "GET", "/a", 200);
325        b.sizes.resp_content = 100;
326        let mut n: Entry = sample_entry(0, "api.x", "GET", "/a", 200);
327        n.sizes.resp_content = 500; // > 2x
328        let r = compute_compare(
329            &sample_capture(vec![n]),
330            &sample_capture(vec![b]),
331            &no_filter(),
332            50,
333        );
334        assert_eq!(r.payload_growth.len(), 1);
335        assert_eq!(r.payload_growth[0].severity, "low");
336    }
337
338    #[test]
339    fn no_findings_is_none() {
340        let base = sample_capture(vec![sample_entry(0, "api.x", "GET", "/a", 200)]);
341        let new = sample_capture(vec![sample_entry(0, "api.x", "GET", "/a", 200)]);
342        let r = compute_compare(&new, &base, &no_filter(), 50);
343        assert_eq!(r.max_severity, "none");
344    }
345}