huginn_net_http/
http1_process.rs

1use crate::error::HuginnNetHttpError;
2use crate::http::Header;
3use crate::http_common::HttpProcessor;
4use crate::observable::{ObservableHttpRequest, ObservableHttpResponse};
5use crate::{http, http1_parser, http2_parser, http2_process, http_common, http_languages};
6use tracing::debug;
7
8/// HTTP/1.x Protocol Processor
9///
10/// Implements the HttpProcessor trait for HTTP/1.0 and HTTP/1.1 protocols.
11/// Handles both request and response processing with proper protocol detection.
12/// Contains a parser instance that is created once and reused.
13pub struct Http1Processor {
14    parser: http1_parser::Http1Parser,
15}
16
17impl Http1Processor {
18    pub fn new() -> Self {
19        Self {
20            parser: http1_parser::Http1Parser::new(),
21        }
22    }
23}
24
25impl Default for Http1Processor {
26    fn default() -> Self {
27        Self::new()
28    }
29}
30
31impl HttpProcessor for Http1Processor {
32    fn can_process_request(&self, data: &[u8]) -> bool {
33        if data.len() < 16 {
34            // Minimum for "GET / HTTP/1.1\r\n"
35            return false;
36        }
37
38        // VERY SPECIFIC: Must NOT be HTTP/2 first
39        if http2_parser::is_http2_traffic(data) {
40            return false;
41        }
42
43        let data_str = String::from_utf8_lossy(data);
44        let first_line = data_str.lines().next().unwrap_or("");
45
46        // SPECIFIC: Must be exact HTTP/1.x request line format
47        let parts: Vec<&str> = first_line.split_whitespace().collect();
48        if parts.len() != 3 {
49            return false;
50        }
51
52        // SPECIFIC: Valid HTTP/1.x methods only
53        let methods = [
54            "GET",
55            "POST",
56            "PUT",
57            "DELETE",
58            "HEAD",
59            "OPTIONS",
60            "PATCH",
61            "TRACE",
62            "CONNECT",
63            "PROPFIND",
64            "PROPPATCH",
65            "MKCOL",
66            "COPY",
67            "MOVE",
68            "LOCK",
69            "UNLOCK",
70        ];
71
72        // SPECIFIC: Must be exact HTTP/1.0 or HTTP/1.1
73        methods.contains(&parts[0])
74            && (parts[2] == "HTTP/1.0" || parts[2] == "HTTP/1.1")
75            && !parts[1].is_empty() // Must have URI
76    }
77
78    fn can_process_response(&self, data: &[u8]) -> bool {
79        if data.len() < 12 {
80            // Minimum for "HTTP/1.1 200"
81            return false;
82        }
83
84        // VERY SPECIFIC: Must NOT look like HTTP/2 frames
85        if data.len() >= 9 && http2_process::looks_like_http2_response(data) {
86            return false;
87        }
88
89        let data_str = String::from_utf8_lossy(data);
90        let first_line = data_str.lines().next().unwrap_or("");
91
92        // SPECIFIC: Must be exact HTTP/1.x response line format
93        let parts: Vec<&str> = first_line.splitn(3, ' ').collect();
94        if parts.len() < 2 {
95            return false;
96        }
97
98        // SPECIFIC: Must be exact HTTP/1.0 or HTTP/1.1 with valid status code
99        (parts[0] == "HTTP/1.0" || parts[0] == "HTTP/1.1")
100            && parts[1].len() == 3  // Status code must be 3 digits
101            && parts[1].chars().all(|c| c.is_ascii_digit()) // Must be numeric
102    }
103
104    fn has_complete_data(&self, data: &[u8]) -> bool {
105        has_complete_headers(data)
106    }
107
108    fn process_request(
109        &self,
110        data: &[u8],
111    ) -> Result<Option<ObservableHttpRequest>, HuginnNetHttpError> {
112        parse_http1_request(data, &self.parser)
113    }
114
115    fn process_response(
116        &self,
117        data: &[u8],
118    ) -> Result<Option<ObservableHttpResponse>, HuginnNetHttpError> {
119        parse_http1_response(data, &self.parser)
120    }
121
122    fn supported_version(&self) -> http::Version {
123        http::Version::V11 // Primary version, but also supports V10
124    }
125
126    fn name(&self) -> &'static str {
127        "HTTP/1.x"
128    }
129}
130
131/// Check if HTTP/1.x headers are complete (lightweight verification)
132fn has_complete_headers(data: &[u8]) -> bool {
133    // Fast byte-level check for \r\n\r\n
134    if data.len() < 4 {
135        return false;
136    }
137
138    // Look for the header separator pattern
139    for i in 0..data.len().saturating_sub(3) {
140        if data[i] == b'\r'
141            && data.get(i.saturating_add(1)) == Some(&b'\n')
142            && data.get(i.saturating_add(2)) == Some(&b'\r')
143            && data.get(i.saturating_add(3)) == Some(&b'\n')
144        {
145            return true;
146        }
147    }
148    false
149}
150
151fn convert_http1_request_to_observable(req: http1_parser::Http1Request) -> ObservableHttpRequest {
152    let lang = req
153        .accept_language
154        .and_then(http_languages::get_highest_quality_language);
155
156    let headers_in_order = convert_headers_to_http_format(&req.headers, true);
157    let headers_absent = build_absent_headers_from_new_parser(&req.headers, true);
158
159    ObservableHttpRequest {
160        matching: huginn_net_db::observable_signals::HttpRequestObservation {
161            version: req.version,
162            horder: headers_in_order,
163            habsent: headers_absent,
164            expsw: extract_traffic_classification(req.user_agent.as_deref()),
165        },
166        lang,
167        user_agent: req.user_agent.clone(),
168        headers: req.headers,
169        cookies: req.cookies.clone(),
170        referer: req.referer.clone(),
171        method: Some(req.method),
172        uri: Some(req.uri),
173    }
174}
175
176fn convert_http1_response_to_observable(
177    res: http1_parser::Http1Response,
178) -> ObservableHttpResponse {
179    let headers_in_order = convert_headers_to_http_format(&res.headers, false);
180    let headers_absent = build_absent_headers_from_new_parser(&res.headers, false);
181
182    ObservableHttpResponse {
183        matching: huginn_net_db::observable_signals::HttpResponseObservation {
184            version: res.version,
185            horder: headers_in_order,
186            habsent: headers_absent,
187            expsw: extract_traffic_classification(res.server.as_deref()),
188        },
189        headers: res.headers,
190        status_code: Some(res.status_code),
191    }
192}
193
194fn convert_headers_to_http_format(
195    headers: &[http_common::HttpHeader],
196    is_request: bool,
197) -> Vec<Header> {
198    let mut headers_in_order: Vec<Header> = Vec::new();
199    let optional_list = if is_request {
200        http::request_optional_headers()
201    } else {
202        http::response_optional_headers()
203    };
204    let skip_value_list = if is_request {
205        http::request_skip_value_headers()
206    } else {
207        http::response_skip_value_headers()
208    };
209
210    for header in headers {
211        if optional_list.contains(&header.name.as_str()) {
212            headers_in_order.push(http::Header::new(&header.name).optional());
213        } else if skip_value_list.contains(&header.name.as_str()) {
214            headers_in_order.push(http::Header::new(&header.name));
215        } else {
216            headers_in_order
217                .push(Header::new(&header.name).with_optional_value(header.value.clone()));
218        }
219    }
220
221    headers_in_order
222}
223
224fn build_absent_headers_from_new_parser(
225    headers: &[http_common::HttpHeader],
226    is_request: bool,
227) -> Vec<Header> {
228    let mut headers_absent: Vec<http::Header> = Vec::new();
229    let common_list: Vec<&str> = if is_request {
230        http::request_common_headers()
231    } else {
232        http::response_common_headers()
233    };
234    let current_headers: Vec<String> = headers.iter().map(|h| h.name.to_lowercase()).collect();
235
236    for header in &common_list {
237        if !current_headers.contains(&header.to_lowercase()) {
238            headers_absent.push(http::Header::new(header));
239        }
240    }
241    headers_absent
242}
243
244fn parse_http1_request(
245    data: &[u8],
246    parser: &http1_parser::Http1Parser,
247) -> Result<Option<ObservableHttpRequest>, HuginnNetHttpError> {
248    match parser.parse_request(data) {
249        Ok(Some(req)) => {
250            let observable = convert_http1_request_to_observable(req);
251            Ok(Some(observable))
252        }
253        Ok(None) => {
254            debug!("Incomplete HTTP/1.x request data");
255            Ok(None)
256        }
257        Err(e) => {
258            debug!("Failed to parse HTTP/1.x request: {}", e);
259            Err(HuginnNetHttpError::Parse(format!(
260                "Failed to parse HTTP/1.x request: {e}"
261            )))
262        }
263    }
264}
265
266fn parse_http1_response(
267    data: &[u8],
268    parser: &http1_parser::Http1Parser,
269) -> Result<Option<ObservableHttpResponse>, HuginnNetHttpError> {
270    match parser.parse_response(data) {
271        Ok(Some(res)) => {
272            let observable = convert_http1_response_to_observable(res);
273            Ok(Some(observable))
274        }
275        Ok(None) => {
276            debug!("Incomplete HTTP/1.x response data");
277            Ok(None)
278        }
279        Err(e) => {
280            debug!("Failed to parse HTTP/1.x response: {}", e);
281            Err(HuginnNetHttpError::Parse(format!(
282                "Failed to parse HTTP/1.x response: {e}"
283            )))
284        }
285    }
286}
287
288fn extract_traffic_classification(value: Option<&str>) -> String {
289    value.unwrap_or("???").to_string()
290}
291
292/// Check if data looks like HTTP/1.x response
293pub fn looks_like_http1_response(data: &[u8]) -> bool {
294    if data.len() < 12 {
295        // Minimum for "HTTP/1.1 200"
296        return false;
297    }
298
299    // Must NOT look like HTTP/2 frames
300    if data.len() >= 9 && http2_process::looks_like_http2_response(data) {
301        return false;
302    }
303
304    let data_str = String::from_utf8_lossy(data);
305    let first_line = data_str.lines().next().unwrap_or("");
306
307    // Must be exact HTTP/1.x response line format
308    let parts: Vec<&str> = first_line.split_whitespace().collect();
309    if parts.len() < 2 {
310        return false;
311    }
312
313    // Check HTTP version
314    let version_str = parts[0];
315    if version_str != "HTTP/1.0" && version_str != "HTTP/1.1" {
316        return false;
317    }
318
319    // Check status code (must be 3 digits)
320    let status_str = parts[1];
321    status_str.len() == 3 && status_str.chars().all(|c| c.is_ascii_digit())
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327    use huginn_net_db;
328
329    #[test]
330    fn test_parse_http1_request() {
331        let valid_request = b"GET / HTTP/1.1\r\n\
332        Host: example.com\r\n\
333        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\r\n\
334        Accept-Language: en-US,en;q=0.9,es;q=0.8\r\n\
335        Cache-Control: max-age=0\r\n\
336        Connection: keep-alive\r\n\
337        If-Modified-Since: Thu, 17 Oct 2019 07:18:26 GMT\r\n\
338        If-None-Match: \"3147526947\"\r\n\
339        Upgrade-Insecure-Requests: 1\r\n\
340        User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36\r\n\
341        \r\n";
342        let parser = http1_parser::Http1Parser::new();
343        match parse_http1_request(valid_request, &parser) {
344            Ok(Some(request)) => {
345                assert_eq!(request.lang, Some("English".to_string()));
346                assert_eq!(request.user_agent, Some("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36".to_string()));
347                assert_eq!(request.matching.version, http::Version::V11);
348
349                let expected_horder = vec![
350                    http::Header::new("Host"),
351                    http::Header::new("Accept").with_value("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"),
352                    http::Header::new("Accept-Language").with_value("en-US,en;q=0.9,es;q=0.8"),
353                    http::Header::new("Cache-Control").optional(),
354                    http::Header::new("Connection").with_value("keep-alive"),
355                    http::Header::new("If-Modified-Since").optional(),
356                    Header::new("If-None-Match").optional(),
357                    http::Header::new("Upgrade-Insecure-Requests").with_value("1"),
358                    http::Header::new("User-Agent"),
359                ];
360                assert_eq!(request.matching.horder, expected_horder);
361
362                let expected_habsent = vec![
363                    http::Header::new("Accept-Encoding"),
364                    http::Header::new("Accept-Charset"),
365                    http::Header::new("Keep-Alive"),
366                ];
367                assert_eq!(request.matching.habsent, expected_habsent);
368
369                assert_eq!(request.matching.expsw, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36");
370            }
371            Ok(None) => panic!("Incomplete HTTP request"),
372            Err(e) => panic!("Failed to parse HTTP request: {e}"),
373        }
374    }
375
376    #[test]
377    fn test_parse_http1_response() {
378        let valid_response = b"HTTP/1.1 200 OK\r\n\
379        Server: Apache\r\n\
380        Content-Type: text/html; charset=UTF-8\r\n\
381        Content-Length: 112\r\n\
382        Connection: keep-alive\r\n\
383        \r\n\
384        <html><body><h1>It works!</h1></body></html>";
385
386        let parser = http1_parser::Http1Parser::new();
387        match parse_http1_response(valid_response, &parser) {
388            Ok(Some(response)) => {
389                assert_eq!(response.matching.expsw, "Apache");
390                assert_eq!(response.matching.version, http::Version::V11);
391
392                let expected_horder = vec![
393                    http::Header::new("Server"),
394                    http::Header::new("Content-Type"),
395                    http::Header::new("Content-Length").optional(),
396                    http::Header::new("Connection").with_value("keep-alive"),
397                ];
398                assert_eq!(response.matching.horder, expected_horder);
399
400                let expected_absent = vec![
401                    http::Header::new("Keep-Alive"),
402                    http::Header::new("Accept-Ranges"),
403                    http::Header::new("Date"),
404                ];
405                assert_eq!(response.matching.habsent, expected_absent);
406            }
407            Ok(None) => panic!("Incomplete HTTP response"),
408            Err(e) => panic!("Failed to parse HTTP response: {e}"),
409        }
410    }
411
412    #[test]
413    fn test_get_diagnostic_for_empty_sw() {
414        let diagnosis: http::HttpDiagnosis = http_common::get_diagnostic(None, None, None);
415        assert_eq!(diagnosis, http::HttpDiagnosis::Anonymous);
416    }
417
418    #[test]
419    fn test_get_diagnostic_with_existing_signature_matcher() {
420        let user_agent: Option<String> = Some("Mozilla/5.0".to_string());
421        let os = "Linux".to_string();
422        let browser = Some("Firefox".to_string());
423        let ua_matcher: Option<(&String, &Option<String>)> = Some((&os, &browser));
424        let label = huginn_net_db::Label {
425            ty: huginn_net_db::Type::Specified,
426            class: None,
427            name: "Linux".to_string(),
428            flavor: None,
429        };
430        let signature_os_matcher: Option<&huginn_net_db::Label> = Some(&label);
431
432        let diagnosis = http_common::get_diagnostic(user_agent, ua_matcher, signature_os_matcher);
433        assert_eq!(diagnosis, http::HttpDiagnosis::Generic);
434    }
435
436    #[test]
437    fn test_get_diagnostic_with_dishonest_user_agent() {
438        let user_agent = Some("Mozilla/5.0".to_string());
439        let os = "Windows".to_string();
440        let browser = Some("Firefox".to_string());
441        let ua_matcher: Option<(&String, &Option<String>)> = Some((&os, &browser));
442        let label = huginn_net_db::Label {
443            ty: huginn_net_db::Type::Specified,
444            class: None,
445            name: "Linux".to_string(),
446            flavor: None,
447        };
448        let signature_os_matcher: Option<&huginn_net_db::Label> = Some(&label);
449
450        let diagnosis = http_common::get_diagnostic(user_agent, ua_matcher, signature_os_matcher);
451        assert_eq!(diagnosis, http::HttpDiagnosis::Dishonest);
452    }
453
454    #[test]
455    fn test_get_diagnostic_without_user_agent_and_signature_matcher() {
456        let user_agent = Some("Mozilla/5.0".to_string());
457
458        let diagnosis = http_common::get_diagnostic(user_agent, None, None);
459        assert_eq!(diagnosis, http::HttpDiagnosis::None);
460    }
461
462    #[test]
463    fn test_incomplete_headers() {
464        let data = b"GET /path HTTP/1.1\r\nHost: example.com\r\n";
465        assert!(!has_complete_headers(data));
466    }
467
468    #[test]
469    fn test_complete_headers() {
470        let data = b"GET /path HTTP/1.1\r\nHost: example.com\r\nCookie: session=abc\r\n\r\n";
471        assert!(has_complete_headers(data));
472    }
473
474    #[test]
475    fn test_complete_headers_with_body() {
476        let data = b"GET /path HTTP/1.1\r\nHost: example.com\r\n\r\nbody data here";
477        assert!(has_complete_headers(data));
478    }
479
480    #[test]
481    fn test_empty_data() {
482        let data = b"";
483        assert!(!has_complete_headers(data));
484    }
485
486    #[test]
487    fn test_too_short_data() {
488        let data = b"GET";
489        assert!(!has_complete_headers(data));
490    }
491
492    #[test]
493    fn test_response_headers() {
494        let data = b"HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nSet-Cookie: id=123\r\n\r\n";
495        assert!(has_complete_headers(data));
496    }
497}