Skip to main content

har/
assemble.rs

1use crate::classify::classify;
2use crate::correlate::extract_correlation;
3use crate::model::{Capture, CaptureMeta, Entry, Phases, Sizes, format_entry_id};
4use crate::normalize::normalize_path;
5use crate::raw::{RawDoc, RawEntry, RawNameValue};
6use chrono::DateTime;
7
8/// Transform a parsed raw document into the normalized analysis model.
9pub fn assemble(doc: RawDoc) -> Capture {
10    let log = doc.log;
11
12    // First pass: epoch-ms timestamps, to compute capture window + offsets.
13    let starts: Vec<Option<i64>> = log
14        .entries
15        .iter()
16        .map(|e| parse_epoch_ms(&e.started_date_time))
17        .collect();
18
19    let capture_start = starts.iter().flatten().copied().min();
20    let mut capture_end: Option<i64> = None;
21    for (e, s) in log.entries.iter().zip(&starts) {
22        if let Some(s) = s {
23            let end = s + e.time.round() as i64;
24            capture_end = Some(capture_end.map_or(end, |c: i64| c.max(end)));
25        }
26    }
27
28    let entries: Vec<Entry> = log
29        .entries
30        .iter()
31        .enumerate()
32        .map(|(i, e)| build_entry(i, e, starts[i], capture_start))
33        .collect();
34
35    let meta = CaptureMeta {
36        har_version: log.version,
37        creator: log.creator.name,
38        creator_version: log.creator.version,
39        browser: log.browser.map(|b| b.name),
40        entry_count: entries.len(),
41        start_ms: capture_start,
42        end_ms: capture_end,
43        duration_ms: match (capture_start, capture_end) {
44            (Some(s), Some(end)) => (end - s) as f64,
45            _ => 0.0,
46        },
47    };
48
49    Capture { meta, entries }
50}
51
52fn build_entry(
53    index: usize,
54    e: &RawEntry,
55    start_ms: Option<i64>,
56    capture_start: Option<i64>,
57) -> Entry {
58    let url = e.request.url.clone();
59    let (host, path, query) = split_url(&url);
60    let content_type = e.response.content.mime_type.clone();
61    let resource_type = classify(content_type.as_deref(), &url);
62
63    let req_headers = name_values(&e.request.headers);
64    let resp_headers = name_values(&e.response.headers);
65    let correlation = extract_correlation(&resp_headers);
66
67    let started_offset_ms = match (start_ms, capture_start) {
68        (Some(s), Some(c)) => (s - c) as f64,
69        _ => 0.0,
70    };
71
72    Entry {
73        id: format_entry_id(index),
74        index,
75        started_offset_ms,
76        duration_ms: e.time,
77        method: e.request.method.clone(),
78        norm_path: normalize_path(&path),
79        path,
80        host,
81        query,
82        url,
83        status: e.response.status,
84        status_text: e.response.status_text.clone(),
85        resource_type,
86        content_type,
87        req_headers,
88        resp_headers,
89        req_body: e.request.post_data.as_ref().and_then(|p| p.text.clone()),
90        resp_body: e.response.content.text.clone(),
91        timings: Phases {
92            blocked: clamp_phase(e.timings.blocked),
93            dns: clamp_phase(e.timings.dns),
94            connect: clamp_phase(e.timings.connect),
95            ssl: clamp_phase(e.timings.ssl),
96            send: e.timings.send.max(0.0),
97            wait: e.timings.wait.max(0.0),
98            receive: e.timings.receive.max(0.0),
99        },
100        sizes: Sizes {
101            req_body: e.request.body_size,
102            resp_body: e.response.body_size,
103            resp_content: e.response.content.size,
104            resp_headers: e.response.headers_size,
105        },
106        server_ip: e.server_ip_address.clone(),
107        http_version: e.response.http_version_or_request(&e.request.http_version),
108        redirect_url: e.response.redirect_url.clone().filter(|s| !s.is_empty()),
109        correlation,
110    }
111}
112
113/// HAR timing phases use -1 to mean "not applicable"; map negatives to None.
114fn clamp_phase(v: Option<f64>) -> Option<f64> {
115    match v {
116        Some(x) if x >= 0.0 => Some(x),
117        _ => None,
118    }
119}
120
121fn name_values(items: &[RawNameValue]) -> Vec<(String, String)> {
122    items
123        .iter()
124        .map(|h| (h.name.clone(), h.value.clone()))
125        .collect()
126}
127
128fn split_url(url: &str) -> (String, String, Vec<(String, String)>) {
129    match url::Url::parse(url) {
130        Ok(u) => {
131            let host = u.host_str().unwrap_or("").to_string();
132            let path = u.path().to_string();
133            let query = u
134                .query_pairs()
135                .map(|(k, v)| (k.into_owned(), v.into_owned()))
136                .collect();
137            (host, path, query)
138        }
139        Err(_) => (String::new(), url.to_string(), Vec::new()),
140    }
141}
142
143fn parse_epoch_ms(s: &str) -> Option<i64> {
144    DateTime::parse_from_rfc3339(s)
145        .ok()
146        .map(|dt| dt.timestamp_millis())
147}
148
149#[cfg(test)]
150mod tests {
151    use super::assemble;
152    use crate::loader::load;
153
154    fn fixture(name: &str) -> std::path::PathBuf {
155        std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
156            .join("tests/fixtures")
157            .join(name)
158    }
159
160    #[test]
161    fn assembles_capture_with_ids_and_hosts() {
162        let doc = load(&fixture("someapi123.har")).unwrap();
163        let cap = assemble(doc);
164        assert_eq!(cap.meta.entry_count, cap.entries.len());
165        assert_eq!(cap.entries[0].id, "e000000");
166        // first entry offset is always 0
167        assert_eq!(cap.entries[0].started_offset_ms, 0.0);
168        // every entry has a parsed host
169        assert!(cap.entries.iter().all(|e| !e.host.is_empty()));
170    }
171}