huginn_net_db/
db.rs

1use crate::db_matching_trait::{DatabaseSignature, FingerprintDb, IndexKey, ObservedFingerprint};
2use crate::http::{self, Version as HttpVersion};
3use crate::observable_signals::{HttpRequestObservation, HttpResponseObservation, TcpObservation};
4use crate::tcp::{self, IpVersion, PayloadSize};
5use std::collections::HashMap;
6use std::fmt;
7use std::fmt::Display;
8use std::marker::PhantomData;
9use std::str::FromStr;
10use tracing::debug;
11
12/// Represents the database used by `P0f` to store signatures and associated metadata.
13/// The database contains signatures for analyzing TCP and HTTP traffic, as well as
14/// other metadata such as MTU mappings and user agent-to-operating system mappings.
15#[derive(Debug)]
16pub struct Database {
17    pub classes: Vec<String>,
18    pub mtu: Vec<(String, Vec<u16>)>,
19    pub ua_os: Vec<(String, Option<String>)>,
20    pub tcp_request: FingerprintCollection<TcpObservation, tcp::Signature, TcpIndexKey>,
21    pub tcp_response: FingerprintCollection<TcpObservation, tcp::Signature, TcpIndexKey>,
22    pub http_request: FingerprintCollection<HttpRequestObservation, http::Signature, HttpIndexKey>,
23    pub http_response:
24        FingerprintCollection<HttpResponseObservation, http::Signature, HttpIndexKey>,
25}
26
27/// Represents a label associated with a signature, which provides metadata about
28/// the signature, such as type, class, name, and optional flavor details.
29#[derive(Clone, Debug, PartialEq)]
30pub struct Label {
31    pub ty: Type,
32    pub class: Option<String>,
33    pub name: String,
34    pub flavor: Option<String>,
35}
36
37/// Enum representing the type of `Label`.
38/// - `Specified`: A specific label with well-defined characteristics.
39/// - `Generic`: A generic label with broader characteristics.
40#[derive(Clone, Debug, PartialEq)]
41pub enum Type {
42    Specified,
43    Generic,
44}
45
46impl fmt::Display for Type {
47    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48        write!(f, "{self:?}")
49    }
50}
51
52impl Database {
53    /// Creates a default instance of the `Database` by parsing an embedded configuration file.
54    /// This file (`config/p0f.fp` relative to the crate root) is expected to define the default
55    /// signatures and mappings used for analysis.
56    ///
57    /// # Errors
58    /// Returns `HuginnNetError::MissConfiguration` if the embedded default fingerprint file
59    /// cannot be parsed. This indicates a critical issue with the bundled fingerprint data
60    /// or the parser itself.
61    pub fn load_default() -> Result<Self, crate::error::DatabaseError> {
62        const DEFAULT_FP_CONTENTS: &str = include_str!("../config/p0f.fp");
63
64        Database::from_str(DEFAULT_FP_CONTENTS).map_err(|e| {
65            crate::error::DatabaseError::InvalidConfiguration(format!(
66                "Failed to parse embedded default p0f database: {e}"
67            ))
68        })
69    }
70}
71
72/// Index key for TCP signatures, used to optimize database lookups.
73///
74/// This key is generated from a `tcp::Signature` and combines several
75/// of its most discriminative fields to allow for a fast initial filtering
76/// of potential matches in the signature database. The goal is to quickly
77/// narrow down the search space before performing more detailed and costly
78/// distance calculations.
79///
80/// The fields included are chosen for their balance of providing good
81/// discrimination while not being overly specific to avoid missing matches
82/// due to minor variations (which are handled by the distance calculation).
83#[derive(Debug, Clone, PartialEq, Eq, Hash)]
84pub struct TcpIndexKey {
85    pub ip_version_key: IpVersion,
86    pub olayout_key: String,
87    pub pclass_key: PayloadSize,
88}
89
90impl IndexKey for TcpIndexKey {}
91
92/// Index key for HTTP signatures, used to optimize database lookups.
93///
94/// This key is generated from a `http::Signature`
95/// to enable faster filtering of HTTP signatures. It combines key characteristics
96/// of an HTTP request or response.
97#[derive(Debug, Clone, PartialEq, Eq, Hash)]
98pub struct HttpIndexKey {
99    pub http_version_key: HttpVersion,
100}
101
102impl IndexKey for HttpIndexKey {}
103
104#[derive(Debug)]
105pub struct FingerprintCollection<OF, DS, K>
106where
107    OF: ObservedFingerprint<Key = K>,
108    DS: DatabaseSignature<OF>,
109    K: IndexKey,
110{
111    pub entries: Vec<(Label, Vec<DS>)>,
112    pub(crate) index: HashMap<K, Vec<(usize, usize)>>,
113    _observed_marker: PhantomData<OF>,
114    _database_sig_marker: PhantomData<DS>,
115    _key_marker: PhantomData<K>,
116}
117
118impl<OF, DS, K> Default for FingerprintCollection<OF, DS, K>
119where
120    OF: ObservedFingerprint<Key = K>,
121    DS: DatabaseSignature<OF>,
122    K: IndexKey,
123{
124    fn default() -> Self {
125        Self {
126            entries: Vec::new(),
127            index: HashMap::new(),
128            _observed_marker: PhantomData,
129            _database_sig_marker: PhantomData,
130            _key_marker: PhantomData,
131        }
132    }
133}
134
135impl<OF, DS, K> FingerprintCollection<OF, DS, K>
136where
137    OF: ObservedFingerprint<Key = K>,
138    DS: DatabaseSignature<OF>,
139    K: IndexKey,
140{
141    /// Creates a new collection and builds an index for it.
142    pub fn new(entries: Vec<(Label, Vec<DS>)>) -> Self {
143        let mut index_map = HashMap::new();
144        for (label_idx, (_label, sig_vec)) in entries.iter().enumerate() {
145            for (sig_idx, db_sig) in sig_vec.iter().enumerate() {
146                for key in db_sig.generate_index_keys_for_db_entry() {
147                    index_map
148                        .entry(key)
149                        .or_insert_with(Vec::new)
150                        .push((label_idx, sig_idx));
151                }
152            }
153        }
154        FingerprintCollection {
155            entries,
156            index: index_map,
157            _observed_marker: PhantomData,
158            _database_sig_marker: PhantomData,
159            _key_marker: PhantomData,
160        }
161    }
162}
163
164impl<OF, DS, K> FingerprintDb<OF, DS> for FingerprintCollection<OF, DS, K>
165where
166    OF: ObservedFingerprint<Key = K>,
167    DS: DatabaseSignature<OF> + Display,
168    K: IndexKey,
169{
170    fn find_best_match(&self, observed: &OF) -> Option<(&Label, &DS, f32)> {
171        let observed_key = observed.generate_index_key();
172
173        let candidate_indices = match self.index.get(&observed_key) {
174            Some(indices) => indices,
175            None => {
176                return None;
177            }
178        };
179
180        if candidate_indices.is_empty() {
181            return None;
182        }
183
184        let mut best_label_ref = None;
185        let mut best_sig_ref = None;
186        let mut min_distance = u32::MAX;
187
188        for &(label_idx, sig_idx) in candidate_indices {
189            let (label, sig_vec) = &self.entries[label_idx];
190            let db_sig = &sig_vec[sig_idx];
191
192            if let Some(distance) = db_sig.calculate_distance(observed) {
193                if distance < min_distance {
194                    min_distance = distance;
195                    best_label_ref = Some(label);
196                    best_sig_ref = Some(db_sig);
197                }
198                debug!(
199                    "distance: {}, label: {}, flavor: {:?}, sig: {}",
200                    distance, label.name, label.flavor, db_sig
201                );
202            }
203        }
204
205        if let (Some(label), Some(sig)) = (best_label_ref, best_sig_ref) {
206            Some((label, sig, sig.get_quality_score(min_distance)))
207        } else {
208            None
209        }
210    }
211}