huginn_net_db/
observable_http_signals_matching.rs

1use crate::db::HttpIndexKey;
2use crate::db_matching_trait::{DatabaseSignature, MatchQuality};
3use crate::http::{self, Header, HttpMatchQuality, Version};
4use crate::observable_signals::{HttpRequestObservation, HttpResponseObservation};
5
6pub trait HttpDistance {
7    fn get_version(&self) -> Version;
8    fn get_horder(&self) -> &[Header];
9    fn get_habsent(&self) -> &[Header];
10    fn get_expsw(&self) -> &str;
11
12    fn distance_ip_version(&self, other: &http::Signature) -> Option<u32> {
13        if other.version == Version::Any || self.get_version() == other.version {
14            Some(HttpMatchQuality::High.as_score())
15        } else {
16            None
17        }
18    }
19
20    // Compare two header vectors respecting order and allowing optional header skips
21    //
22    // This function implements a sophisticated two-pointer algorithm to compare HTTP headers
23    // from observed traffic against database signatures while preserving order and handling
24    // optional headers that may be missing from the observed traffic.
25    //
26    // Algorithm Overview:
27    // 1. Use two pointers to traverse both lists simultaneously
28    // 2. When headers match perfectly (name + value), advance both pointers
29    // 3. When names match but values differ, count as error only if header is required
30    // 4. When names differ, skip optional signature headers or count required ones as errors
31    // 5. Handle remaining headers at the end of either list
32    //
33    // Parameters:
34    // - observed: Headers from actual HTTP traffic (never marked as optional)
35    // - signature: Headers from database signature (may have optional headers marked with ?)
36    //
37    // Returns:
38    // - Some(score) based on error count converted to quality score
39    // - None if too many errors (unmatchable)
40    fn distance_header(observed: &[Header], signature: &[Header]) -> Option<u32> {
41        let mut obs_idx = 0usize; // Index pointer for observed headers
42        let mut sig_idx = 0usize; // Index pointer for signature headers
43        let mut errors: u32 = 0; // Running count of matching errors
44
45        while obs_idx < observed.len() && sig_idx < signature.len() {
46            let obs_header = &observed[obs_idx];
47            let sig_header = &signature[sig_idx];
48
49            if obs_header.name == sig_header.name && obs_header.value == sig_header.value {
50                obs_idx = obs_idx.saturating_add(1);
51                sig_idx = sig_idx.saturating_add(1);
52            } else if obs_header.name == sig_header.name {
53                if !sig_header.optional {
54                    errors = errors.saturating_add(1);
55                }
56                obs_idx = obs_idx.saturating_add(1);
57                sig_idx = sig_idx.saturating_add(1);
58            } else if sig_header.optional {
59                sig_idx = sig_idx.saturating_add(1);
60            } else {
61                errors = errors.saturating_add(1);
62                sig_idx = sig_idx.saturating_add(1);
63            }
64        }
65
66        while obs_idx < observed.len() {
67            errors = errors.saturating_add(1);
68            obs_idx = obs_idx.saturating_add(1);
69        }
70
71        while sig_idx < signature.len() {
72            if !signature[sig_idx].optional {
73                errors = errors.saturating_add(1);
74            }
75            sig_idx = sig_idx.saturating_add(1);
76        }
77
78        match errors {
79            0..=2 => Some(HttpMatchQuality::High.as_score()), // 0-2 errors: High quality match
80            3..=5 => Some(HttpMatchQuality::Medium.as_score()), // 3-5 errors: Medium quality match
81            6..=8 => Some(HttpMatchQuality::Low.as_score()),  // 6-8 errors: Low quality match
82            9..=11 => Some(HttpMatchQuality::Bad.as_score()), // 9-11 errors: Bad quality match
83            _ => None, // 12+ errors: Too many differences, not a viable match
84        }
85    }
86
87    fn distance_horder(&self, other: &http::Signature) -> Option<u32> {
88        Self::distance_header(self.get_horder(), &other.horder)
89    }
90
91    fn distance_habsent(&self, other: &http::Signature) -> Option<u32> {
92        Self::distance_header(self.get_habsent(), &other.habsent)
93    }
94
95    fn distance_expsw(&self, other: &http::Signature) -> Option<u32> {
96        if other.expsw.as_str().contains(self.get_expsw()) {
97            Some(HttpMatchQuality::High.as_score())
98        } else {
99            Some(HttpMatchQuality::Bad.as_score())
100        }
101    }
102}
103
104impl HttpDistance for HttpRequestObservation {
105    fn get_version(&self) -> Version {
106        self.version
107    }
108    fn get_horder(&self) -> &[Header] {
109        &self.horder
110    }
111    fn get_habsent(&self) -> &[Header] {
112        &self.habsent
113    }
114    fn get_expsw(&self) -> &str {
115        &self.expsw
116    }
117}
118
119impl HttpDistance for HttpResponseObservation {
120    fn get_version(&self) -> Version {
121        self.version
122    }
123    fn get_horder(&self) -> &[Header] {
124        &self.horder
125    }
126    fn get_habsent(&self) -> &[Header] {
127        &self.habsent
128    }
129    fn get_expsw(&self) -> &str {
130        &self.expsw
131    }
132}
133
134trait HttpSignatureHelper {
135    fn calculate_http_distance<T: HttpDistance>(&self, observed: &T) -> Option<u32>;
136
137    fn generate_http_index_keys(&self) -> Vec<HttpIndexKey>;
138
139    /// Returns the quality score based on the distance.
140    ///
141    /// The score is a value between 0.0 and 1.0, where 1.0 is a perfect match.
142    ///
143    /// The score is calculated based on the distance of the observed signal to the database signature.
144    /// The distance is a value between 0 and 12, where 0 is a perfect match and 12 is the maximum possible distance.
145    fn get_quality_score_by_distance(&self, distance: u32) -> f32 {
146        http::HttpMatchQuality::distance_to_score(distance)
147    }
148}
149
150impl HttpSignatureHelper for http::Signature {
151    fn calculate_http_distance<T: HttpDistance>(&self, observed: &T) -> Option<u32> {
152        let signature: &http::Signature = self;
153        let distance = observed
154            .distance_ip_version(signature)?
155            .saturating_add(observed.distance_horder(signature)?)
156            .saturating_add(observed.distance_habsent(signature)?)
157            .saturating_add(observed.distance_expsw(signature)?);
158        Some(distance)
159    }
160    fn generate_http_index_keys(&self) -> Vec<HttpIndexKey> {
161        let mut keys = Vec::new();
162        if self.version == Version::Any {
163            keys.push(HttpIndexKey {
164                http_version_key: Version::V10,
165            });
166            keys.push(HttpIndexKey {
167                http_version_key: Version::V11,
168            });
169        } else {
170            keys.push(HttpIndexKey {
171                http_version_key: self.version,
172            });
173        }
174        keys
175    }
176}
177
178impl DatabaseSignature<HttpRequestObservation> for http::Signature {
179    fn calculate_distance(&self, observed: &HttpRequestObservation) -> Option<u32> {
180        self.calculate_http_distance(observed)
181    }
182    fn get_quality_score(&self, distance: u32) -> f32 {
183        self.get_quality_score_by_distance(distance)
184    }
185    fn generate_index_keys_for_db_entry(&self) -> Vec<HttpIndexKey> {
186        self.generate_http_index_keys()
187    }
188}
189
190impl DatabaseSignature<HttpResponseObservation> for http::Signature {
191    fn calculate_distance(&self, observed: &HttpResponseObservation) -> Option<u32> {
192        self.calculate_http_distance(observed)
193    }
194    fn get_quality_score(&self, distance: u32) -> f32 {
195        self.get_quality_score_by_distance(distance)
196    }
197    fn generate_index_keys_for_db_entry(&self) -> Vec<HttpIndexKey> {
198        self.generate_http_index_keys()
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_distance_header_with_one_optional_header_mismatch() {
208        let a = vec![
209            Header::new("Date"),
210            Header::new("Server"),
211            Header::new("Last-Modified").optional(),
212            Header::new("Accept-Ranges").optional().with_value("bytes"),
213            Header::new("Content-Length").optional(),
214            Header::new("Content-Range").optional(),
215            Header::new("Keep-Alive").optional().with_value("timeout"),
216            Header::new("Connection").with_value("Keep-Alive"),
217            Header::new("Transfer-Encoding")
218                .optional()
219                .with_value("chunked"),
220            Header::new("Content-Type"),
221        ];
222
223        let b = vec![
224            Header::new("Date"),
225            Header::new("Server"),
226            Header::new("Last-Modified").optional(),
227            Header::new("Accept-Ranges").optional().with_value("bytes"),
228            Header::new("Content-Length").optional(),
229            Header::new("Content-Range").optional(),
230            Header::new("Keep-Alive").with_value("timeout"),
231            Header::new("Connection").with_value("Keep-Alive"),
232            Header::new("Transfer-Encoding")
233                .optional()
234                .with_value("chunked"),
235            Header::new("Content-Type"),
236        ];
237
238        assert!(a[6].optional);
239        assert!(!b[6].optional);
240        assert_ne!(a[6], b[6]);
241
242        let result = <HttpResponseObservation as HttpDistance>::distance_header(&a, &b);
243        assert_eq!(
244            result,
245            Some(HttpMatchQuality::High.as_score()),
246            "Expected Medium quality for 1 error in lists of 10"
247        );
248    }
249
250    #[test]
251    fn test_distance_header_optional_skip_in_middle() {
252        let observed = vec![
253            Header::new("Host"),
254            Header::new("User-Agent").with_value("Mozilla/5.0"),
255            Header::new("Connection").with_value("keep-alive"),
256        ];
257
258        let signature = vec![
259            Header::new("Host"),
260            Header::new("Accept-Language")
261                .optional()
262                .with_value("en-US"),
263            Header::new("User-Agent").with_value("Mozilla/5.0"),
264            Header::new("Connection").with_value("keep-alive"),
265        ];
266
267        let result =
268            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
269        assert_eq!(
270            result,
271            Some(HttpMatchQuality::High.as_score()),
272            "Optional header in middle should be skipped for perfect alignment"
273        );
274    }
275
276    #[test]
277    fn test_distance_header_multiple_optional_skips() {
278        let observed = vec![
279            Header::new("Host"),
280            Header::new("Connection").with_value("keep-alive"),
281        ];
282
283        let signature = vec![
284            Header::new("Host"),
285            Header::new("Accept-Language")
286                .optional()
287                .with_value("en-US"),
288            Header::new("Accept-Encoding").optional().with_value("gzip"),
289            Header::new("Connection").with_value("keep-alive"),
290        ];
291
292        let result =
293            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
294        assert_eq!(
295            result,
296            Some(HttpMatchQuality::High.as_score()),
297            "Multiple optional headers should be skipped"
298        );
299    }
300
301    #[test]
302    fn test_distance_header_required_in_middle_causes_error() {
303        // Required header in middle should cause error and misalignment
304        let observed = vec![
305            Header::new("Host"),
306            Header::new("Connection").with_value("keep-alive"),
307        ];
308
309        let signature = vec![
310            Header::new("Host"),
311            Header::new("User-Agent").with_value("Mozilla/5.0"), // Required, missing
312            Header::new("Connection").with_value("keep-alive"),
313        ];
314
315        let result =
316            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
317        assert_eq!(
318            result,
319            Some(HttpMatchQuality::High.as_score()), // 1 error falls in High range (0-2 errors)
320            "Required header missing should cause 1 error"
321        );
322    }
323
324    #[test]
325    fn test_distance_header_realistic_browser_with_optional_skips() {
326        let observed = vec![
327            Header::new("Host"),
328            Header::new("User-Agent").with_value("Mozilla/5.0"),
329            Header::new("Accept").with_value("text/html"),
330            Header::new("Connection").with_value("keep-alive"),
331        ];
332
333        let signature = vec![
334            Header::new("Host"),
335            Header::new("User-Agent").with_value("Mozilla/5.0"),
336            Header::new("Accept").with_value("text/html"),
337            Header::new("Accept-Language")
338                .optional()
339                .with_value("en-US"), // Optional, missing
340            Header::new("Accept-Encoding").optional().with_value("gzip"), // Optional, missing
341            Header::new("Cookie").optional(),                             // Optional, missing
342            Header::new("Connection").with_value("keep-alive"),
343        ];
344
345        let result =
346            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
347        assert_eq!(
348            result,
349            Some(HttpMatchQuality::High.as_score()),
350            "Browser should match signature even with optional headers missing"
351        );
352    }
353
354    #[test]
355    fn test_distance_header_missing_optional_header() {
356        let observed = vec![
357            Header::new("Host"),
358            Header::new("User-Agent").with_value("Mozilla/5.0"),
359        ];
360
361        let signature = vec![
362            Header::new("Host"),
363            Header::new("User-Agent").with_value("Mozilla/5.0"),
364            Header::new("Accept-Language")
365                .optional()
366                .with_value("en-US"), // Missing but optional
367        ];
368
369        let result =
370            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
371        assert_eq!(
372            result,
373            Some(HttpMatchQuality::High.as_score()),
374            "Missing optional headers should not cause errors"
375        );
376    }
377
378    #[test]
379    fn test_distance_header_missing_required_header() {
380        let observed = vec![Header::new("Host")];
381
382        let signature = vec![
383            Header::new("Host"),
384            Header::new("User-Agent").with_value("Mozilla/5.0"), // Missing and NOT optional
385        ];
386
387        let result =
388            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
389        assert_eq!(
390            result,
391            Some(HttpMatchQuality::High.as_score()), // 1 error out of many
392            "Missing required headers should cause errors"
393        );
394    }
395
396    #[test]
397    fn test_distance_header_extra_headers_in_observed() {
398        let observed = vec![
399            Header::new("Host"),
400            Header::new("User-Agent").with_value("Mozilla/5.0"),
401            Header::new("X-Custom-Header").with_value("custom"), // Extra header
402        ];
403
404        let signature = vec![
405            Header::new("Host"),
406            Header::new("User-Agent").with_value("Mozilla/5.0"),
407        ];
408
409        let result =
410            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
411        assert_eq!(
412            result,
413            Some(HttpMatchQuality::High.as_score()), // 1 error for extra header
414            "Extra headers in observed should cause errors"
415        );
416    }
417
418    #[test]
419    fn test_distance_header_optional_header_at_end() {
420        let observed = vec![
421            Header::new("Host"),
422            Header::new("User-Agent").with_value("Mozilla/5.0"),
423        ];
424
425        let signature = vec![
426            Header::new("Host"),
427            Header::new("User-Agent").with_value("Mozilla/5.0"),
428            Header::new("Accept-Language")
429                .optional()
430                .with_value("en-US"), // Optional, missing
431        ];
432
433        let result =
434            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
435        assert_eq!(
436            result,
437            Some(HttpMatchQuality::High.as_score()),
438            "Missing optional headers at end should not cause errors"
439        );
440    }
441
442    #[test]
443    fn test_distance_header_required_header_at_end() {
444        let observed = vec![Header::new("Host")];
445
446        let signature = vec![
447            Header::new("Host"),
448            Header::new("User-Agent").with_value("Mozilla/5.0"), // Required, missing
449        ];
450
451        let result =
452            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
453        assert_eq!(
454            result,
455            Some(HttpMatchQuality::High.as_score()),
456            "Missing required headers should cause 1 error"
457        );
458    }
459
460    #[test]
461    fn test_distance_header_observed_vs_signature_with_optional() {
462        let observed = vec![
463            Header::new("Host"),
464            Header::new("User-Agent").with_value("Mozilla/5.0"),
465            Header::new("Accept").with_value("text/html"),
466            Header::new("Accept-Language").with_value("en-US"),
467        ];
468
469        let signature = vec![
470            Header::new("Host"),
471            Header::new("User-Agent").with_value("Mozilla/5.0"),
472            Header::new("Accept").with_value("text/html"),
473            Header::new("Accept-Language")
474                .optional()
475                .with_value("en-US"), // Optional but value must match
476        ];
477
478        let result =
479            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
480        assert_eq!(
481            result,
482            Some(HttpMatchQuality::High.as_score()),
483            "Should match perfectly: all headers match including values for optional headers"
484        );
485    }
486
487    #[test]
488    fn test_distance_header_value_mismatch_not_optional() {
489        let observed = vec![
490            Header::new("Host"),
491            Header::new("Connection").with_value("keep-alive"),
492        ];
493
494        let signature = vec![
495            Header::new("Host"),
496            Header::new("Connection").with_value("close"), // Different value, not optional
497        ];
498
499        let result =
500            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
501        assert_eq!(
502            result,
503            Some(HttpMatchQuality::High.as_score()),
504            "Should have 1 error out of 2 headers"
505        );
506    }
507
508    #[test]
509    fn test_distance_header_realistic_browser_scenario() {
510        let observed = vec![
511            Header::new("Host"),
512            Header::new("User-Agent")
513                .with_value("Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0"),
514            Header::new("Accept").with_value("text/html,application/xhtml+xml"),
515            Header::new("Accept-Language").with_value("en-US,en;q=0.9"),
516            Header::new("Accept-Encoding").with_value("gzip, deflate"),
517            Header::new("Connection").with_value("keep-alive"),
518        ];
519
520        // Database signature for Chrome
521        let signature = vec![
522            Header::new("Host"),
523            Header::new("User-Agent")
524                .with_value("Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0"),
525            Header::new("Accept").with_value("text/html,application/xhtml+xml"),
526            Header::new("Accept-Language")
527                .optional()
528                .with_value("en-US,en;q=0.9"), // Optional but value must match
529            Header::new("Accept-Encoding").with_value("gzip, deflate"),
530            Header::new("Connection").with_value("keep-alive"),
531        ];
532
533        let result =
534            <HttpRequestObservation as HttpDistance>::distance_header(&observed, &signature);
535        assert_eq!(
536            result,
537            Some(HttpMatchQuality::High.as_score()),
538            "Should match perfectly for realistic Chrome signature with value matching"
539        );
540    }
541}