1use crate::error::HuginnNetHttpError;
2use crate::http::Header;
3use crate::http_common::HttpProcessor;
4use crate::observable::{ObservableHttpRequest, ObservableHttpResponse};
5use crate::{http, http1_parser, http2_parser, http2_process, http_common, http_languages};
6use tracing::debug;
7
8pub struct Http1Processor {
14 parser: http1_parser::Http1Parser,
15}
16
17impl Http1Processor {
18 pub fn new() -> Self {
19 Self {
20 parser: http1_parser::Http1Parser::new(),
21 }
22 }
23}
24
25impl Default for Http1Processor {
26 fn default() -> Self {
27 Self::new()
28 }
29}
30
31impl HttpProcessor for Http1Processor {
32 fn can_process_request(&self, data: &[u8]) -> bool {
33 if data.len() < 16 {
34 return false;
36 }
37
38 if http2_parser::is_http2_traffic(data) {
40 return false;
41 }
42
43 let data_str = String::from_utf8_lossy(data);
44 let first_line = data_str.lines().next().unwrap_or("");
45
46 let parts: Vec<&str> = first_line.split_whitespace().collect();
48 if parts.len() != 3 {
49 return false;
50 }
51
52 let methods = [
54 "GET",
55 "POST",
56 "PUT",
57 "DELETE",
58 "HEAD",
59 "OPTIONS",
60 "PATCH",
61 "TRACE",
62 "CONNECT",
63 "PROPFIND",
64 "PROPPATCH",
65 "MKCOL",
66 "COPY",
67 "MOVE",
68 "LOCK",
69 "UNLOCK",
70 ];
71
72 methods.contains(&parts[0])
74 && (parts[2] == "HTTP/1.0" || parts[2] == "HTTP/1.1")
75 && !parts[1].is_empty() }
77
78 fn can_process_response(&self, data: &[u8]) -> bool {
79 if data.len() < 12 {
80 return false;
82 }
83
84 if data.len() >= 9 && http2_process::looks_like_http2_response(data) {
86 return false;
87 }
88
89 let data_str = String::from_utf8_lossy(data);
90 let first_line = data_str.lines().next().unwrap_or("");
91
92 let parts: Vec<&str> = first_line.splitn(3, ' ').collect();
94 if parts.len() < 2 {
95 return false;
96 }
97
98 (parts[0] == "HTTP/1.0" || parts[0] == "HTTP/1.1")
100 && parts[1].len() == 3 && parts[1].chars().all(|c| c.is_ascii_digit()) }
103
104 fn has_complete_data(&self, data: &[u8]) -> bool {
105 has_complete_headers(data)
106 }
107
108 fn process_request(
109 &self,
110 data: &[u8],
111 ) -> Result<Option<ObservableHttpRequest>, HuginnNetHttpError> {
112 parse_http1_request(data, &self.parser)
113 }
114
115 fn process_response(
116 &self,
117 data: &[u8],
118 ) -> Result<Option<ObservableHttpResponse>, HuginnNetHttpError> {
119 parse_http1_response(data, &self.parser)
120 }
121
122 fn supported_version(&self) -> http::Version {
123 http::Version::V11 }
125
126 fn name(&self) -> &'static str {
127 "HTTP/1.x"
128 }
129}
130
131fn has_complete_headers(data: &[u8]) -> bool {
133 if data.len() < 4 {
135 return false;
136 }
137
138 for i in 0..data.len().saturating_sub(3) {
140 if data[i] == b'\r'
141 && data.get(i.saturating_add(1)) == Some(&b'\n')
142 && data.get(i.saturating_add(2)) == Some(&b'\r')
143 && data.get(i.saturating_add(3)) == Some(&b'\n')
144 {
145 return true;
146 }
147 }
148 false
149}
150
151fn convert_http1_request_to_observable(req: http1_parser::Http1Request) -> ObservableHttpRequest {
152 let lang = req
153 .accept_language
154 .and_then(http_languages::get_highest_quality_language);
155
156 let headers_in_order = convert_headers_to_http_format(&req.headers, true);
157 let headers_absent = build_absent_headers_from_new_parser(&req.headers, true);
158
159 ObservableHttpRequest {
160 matching: huginn_net_db::observable_signals::HttpRequestObservation {
161 version: req.version,
162 horder: headers_in_order,
163 habsent: headers_absent,
164 expsw: extract_traffic_classification(req.user_agent.as_deref()),
165 },
166 lang,
167 user_agent: req.user_agent.clone(),
168 headers: req.headers,
169 cookies: req.cookies.clone(),
170 referer: req.referer.clone(),
171 method: Some(req.method),
172 uri: Some(req.uri),
173 }
174}
175
176fn convert_http1_response_to_observable(
177 res: http1_parser::Http1Response,
178) -> ObservableHttpResponse {
179 let headers_in_order = convert_headers_to_http_format(&res.headers, false);
180 let headers_absent = build_absent_headers_from_new_parser(&res.headers, false);
181
182 ObservableHttpResponse {
183 matching: huginn_net_db::observable_signals::HttpResponseObservation {
184 version: res.version,
185 horder: headers_in_order,
186 habsent: headers_absent,
187 expsw: extract_traffic_classification(res.server.as_deref()),
188 },
189 headers: res.headers,
190 status_code: Some(res.status_code),
191 }
192}
193
194fn convert_headers_to_http_format(
195 headers: &[http_common::HttpHeader],
196 is_request: bool,
197) -> Vec<Header> {
198 let mut headers_in_order: Vec<Header> = Vec::new();
199 let optional_list = if is_request {
200 http::request_optional_headers()
201 } else {
202 http::response_optional_headers()
203 };
204 let skip_value_list = if is_request {
205 http::request_skip_value_headers()
206 } else {
207 http::response_skip_value_headers()
208 };
209
210 for header in headers {
211 if optional_list.contains(&header.name.as_str()) {
212 headers_in_order.push(http::Header::new(&header.name).optional());
213 } else if skip_value_list.contains(&header.name.as_str()) {
214 headers_in_order.push(http::Header::new(&header.name));
215 } else {
216 headers_in_order
217 .push(Header::new(&header.name).with_optional_value(header.value.clone()));
218 }
219 }
220
221 headers_in_order
222}
223
224fn build_absent_headers_from_new_parser(
225 headers: &[http_common::HttpHeader],
226 is_request: bool,
227) -> Vec<Header> {
228 let mut headers_absent: Vec<http::Header> = Vec::new();
229 let common_list: Vec<&str> = if is_request {
230 http::request_common_headers()
231 } else {
232 http::response_common_headers()
233 };
234 let current_headers: Vec<String> = headers.iter().map(|h| h.name.to_lowercase()).collect();
235
236 for header in &common_list {
237 if !current_headers.contains(&header.to_lowercase()) {
238 headers_absent.push(http::Header::new(header));
239 }
240 }
241 headers_absent
242}
243
244fn parse_http1_request(
245 data: &[u8],
246 parser: &http1_parser::Http1Parser,
247) -> Result<Option<ObservableHttpRequest>, HuginnNetHttpError> {
248 match parser.parse_request(data) {
249 Ok(Some(req)) => {
250 let observable = convert_http1_request_to_observable(req);
251 Ok(Some(observable))
252 }
253 Ok(None) => {
254 debug!("Incomplete HTTP/1.x request data");
255 Ok(None)
256 }
257 Err(e) => {
258 debug!("Failed to parse HTTP/1.x request: {}", e);
259 Err(HuginnNetHttpError::Parse(format!(
260 "Failed to parse HTTP/1.x request: {e}"
261 )))
262 }
263 }
264}
265
266fn parse_http1_response(
267 data: &[u8],
268 parser: &http1_parser::Http1Parser,
269) -> Result<Option<ObservableHttpResponse>, HuginnNetHttpError> {
270 match parser.parse_response(data) {
271 Ok(Some(res)) => {
272 let observable = convert_http1_response_to_observable(res);
273 Ok(Some(observable))
274 }
275 Ok(None) => {
276 debug!("Incomplete HTTP/1.x response data");
277 Ok(None)
278 }
279 Err(e) => {
280 debug!("Failed to parse HTTP/1.x response: {}", e);
281 Err(HuginnNetHttpError::Parse(format!(
282 "Failed to parse HTTP/1.x response: {e}"
283 )))
284 }
285 }
286}
287
288fn extract_traffic_classification(value: Option<&str>) -> String {
289 value.unwrap_or("???").to_string()
290}
291
292pub fn looks_like_http1_response(data: &[u8]) -> bool {
294 if data.len() < 12 {
295 return false;
297 }
298
299 if data.len() >= 9 && http2_process::looks_like_http2_response(data) {
301 return false;
302 }
303
304 let data_str = String::from_utf8_lossy(data);
305 let first_line = data_str.lines().next().unwrap_or("");
306
307 let parts: Vec<&str> = first_line.split_whitespace().collect();
309 if parts.len() < 2 {
310 return false;
311 }
312
313 let version_str = parts[0];
315 if version_str != "HTTP/1.0" && version_str != "HTTP/1.1" {
316 return false;
317 }
318
319 let status_str = parts[1];
321 status_str.len() == 3 && status_str.chars().all(|c| c.is_ascii_digit())
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use huginn_net_db;
328
329 #[test]
330 fn test_parse_http1_request() {
331 let valid_request = b"GET / HTTP/1.1\r\n\
332 Host: example.com\r\n\
333 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\r\n\
334 Accept-Language: en-US,en;q=0.9,es;q=0.8\r\n\
335 Cache-Control: max-age=0\r\n\
336 Connection: keep-alive\r\n\
337 If-Modified-Since: Thu, 17 Oct 2019 07:18:26 GMT\r\n\
338 If-None-Match: \"3147526947\"\r\n\
339 Upgrade-Insecure-Requests: 1\r\n\
340 User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36\r\n\
341 \r\n";
342 let parser = http1_parser::Http1Parser::new();
343 match parse_http1_request(valid_request, &parser) {
344 Ok(Some(request)) => {
345 assert_eq!(request.lang, Some("English".to_string()));
346 assert_eq!(request.user_agent, Some("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36".to_string()));
347 assert_eq!(request.matching.version, http::Version::V11);
348
349 let expected_horder = vec![
350 http::Header::new("Host"),
351 http::Header::new("Accept").with_value("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"),
352 http::Header::new("Accept-Language").with_value("en-US,en;q=0.9,es;q=0.8"),
353 http::Header::new("Cache-Control").optional(),
354 http::Header::new("Connection").with_value("keep-alive"),
355 http::Header::new("If-Modified-Since").optional(),
356 Header::new("If-None-Match").optional(),
357 http::Header::new("Upgrade-Insecure-Requests").with_value("1"),
358 http::Header::new("User-Agent"),
359 ];
360 assert_eq!(request.matching.horder, expected_horder);
361
362 let expected_habsent = vec![
363 http::Header::new("Accept-Encoding"),
364 http::Header::new("Accept-Charset"),
365 http::Header::new("Keep-Alive"),
366 ];
367 assert_eq!(request.matching.habsent, expected_habsent);
368
369 assert_eq!(request.matching.expsw, "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36");
370 }
371 Ok(None) => panic!("Incomplete HTTP request"),
372 Err(e) => panic!("Failed to parse HTTP request: {e}"),
373 }
374 }
375
376 #[test]
377 fn test_parse_http1_response() {
378 let valid_response = b"HTTP/1.1 200 OK\r\n\
379 Server: Apache\r\n\
380 Content-Type: text/html; charset=UTF-8\r\n\
381 Content-Length: 112\r\n\
382 Connection: keep-alive\r\n\
383 \r\n\
384 <html><body><h1>It works!</h1></body></html>";
385
386 let parser = http1_parser::Http1Parser::new();
387 match parse_http1_response(valid_response, &parser) {
388 Ok(Some(response)) => {
389 assert_eq!(response.matching.expsw, "Apache");
390 assert_eq!(response.matching.version, http::Version::V11);
391
392 let expected_horder = vec![
393 http::Header::new("Server"),
394 http::Header::new("Content-Type"),
395 http::Header::new("Content-Length").optional(),
396 http::Header::new("Connection").with_value("keep-alive"),
397 ];
398 assert_eq!(response.matching.horder, expected_horder);
399
400 let expected_absent = vec![
401 http::Header::new("Keep-Alive"),
402 http::Header::new("Accept-Ranges"),
403 http::Header::new("Date"),
404 ];
405 assert_eq!(response.matching.habsent, expected_absent);
406 }
407 Ok(None) => panic!("Incomplete HTTP response"),
408 Err(e) => panic!("Failed to parse HTTP response: {e}"),
409 }
410 }
411
412 #[test]
413 fn test_get_diagnostic_for_empty_sw() {
414 let diagnosis: http::HttpDiagnosis = http_common::get_diagnostic(None, None, None);
415 assert_eq!(diagnosis, http::HttpDiagnosis::Anonymous);
416 }
417
418 #[test]
419 fn test_get_diagnostic_with_existing_signature_matcher() {
420 let user_agent: Option<String> = Some("Mozilla/5.0".to_string());
421 let os = "Linux".to_string();
422 let browser = Some("Firefox".to_string());
423 let ua_matcher: Option<(&String, &Option<String>)> = Some((&os, &browser));
424 let label = huginn_net_db::Label {
425 ty: huginn_net_db::Type::Specified,
426 class: None,
427 name: "Linux".to_string(),
428 flavor: None,
429 };
430 let signature_os_matcher: Option<&huginn_net_db::Label> = Some(&label);
431
432 let diagnosis = http_common::get_diagnostic(user_agent, ua_matcher, signature_os_matcher);
433 assert_eq!(diagnosis, http::HttpDiagnosis::Generic);
434 }
435
436 #[test]
437 fn test_get_diagnostic_with_dishonest_user_agent() {
438 let user_agent = Some("Mozilla/5.0".to_string());
439 let os = "Windows".to_string();
440 let browser = Some("Firefox".to_string());
441 let ua_matcher: Option<(&String, &Option<String>)> = Some((&os, &browser));
442 let label = huginn_net_db::Label {
443 ty: huginn_net_db::Type::Specified,
444 class: None,
445 name: "Linux".to_string(),
446 flavor: None,
447 };
448 let signature_os_matcher: Option<&huginn_net_db::Label> = Some(&label);
449
450 let diagnosis = http_common::get_diagnostic(user_agent, ua_matcher, signature_os_matcher);
451 assert_eq!(diagnosis, http::HttpDiagnosis::Dishonest);
452 }
453
454 #[test]
455 fn test_get_diagnostic_without_user_agent_and_signature_matcher() {
456 let user_agent = Some("Mozilla/5.0".to_string());
457
458 let diagnosis = http_common::get_diagnostic(user_agent, None, None);
459 assert_eq!(diagnosis, http::HttpDiagnosis::None);
460 }
461
462 #[test]
463 fn test_incomplete_headers() {
464 let data = b"GET /path HTTP/1.1\r\nHost: example.com\r\n";
465 assert!(!has_complete_headers(data));
466 }
467
468 #[test]
469 fn test_complete_headers() {
470 let data = b"GET /path HTTP/1.1\r\nHost: example.com\r\nCookie: session=abc\r\n\r\n";
471 assert!(has_complete_headers(data));
472 }
473
474 #[test]
475 fn test_complete_headers_with_body() {
476 let data = b"GET /path HTTP/1.1\r\nHost: example.com\r\n\r\nbody data here";
477 assert!(has_complete_headers(data));
478 }
479
480 #[test]
481 fn test_empty_data() {
482 let data = b"";
483 assert!(!has_complete_headers(data));
484 }
485
486 #[test]
487 fn test_too_short_data() {
488 let data = b"GET";
489 assert!(!has_complete_headers(data));
490 }
491
492 #[test]
493 fn test_response_headers() {
494 let data = b"HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nSet-Cookie: id=123\r\n\r\n";
495 assert!(has_complete_headers(data));
496 }
497}