Skip to main content

nexus_shield/
fingerprint.rs

1// ============================================================================
2// File: fingerprint.rs
3// Description: HTTP request fingerprinting for behavioral analysis and bot detection
4// Author: Andrew Jewell Sr. - AutomataNexus
5// Updated: March 24, 2026
6//
7// DISCLAIMER: This software is provided "as is", without warranty of any kind,
8// express or implied. Use at your own risk. AutomataNexus and the author assume
9// no liability for any damages arising from the use of this software.
10// ============================================================================
11//! Request Fingerprinting — Behavioral analysis and bot detection.
12//!
13//! Extracts features from HTTP requests to build a behavioral fingerprint.
14//! Automated attack tools typically have distinctive patterns: missing standard
15//! headers, unusual header ordering, rapid request cadence, and low entropy
16//! in request parameters.
17
18use axum::http::HeaderMap;
19use parking_lot::RwLock;
20use sha2::{Digest, Sha256};
21use std::collections::HashMap;
22use std::time::Instant;
23
24/// Fingerprint of a single HTTP request.
25#[derive(Debug, Clone)]
26pub struct RequestFingerprint {
27    /// Stable hash of the client's fingerprint signals.
28    pub hash: String,
29    /// Individual signals extracted from the request.
30    pub signals: FingerprintSignals,
31    /// Anomaly score (0.0 = normal, 1.0 = definitely automated/malicious).
32    pub anomaly_score: f64,
33}
34
35#[derive(Debug, Clone)]
36pub struct FingerprintSignals {
37    pub has_user_agent: bool,
38    pub has_accept: bool,
39    pub has_accept_language: bool,
40    pub has_accept_encoding: bool,
41    pub has_referer: bool,
42    pub header_count: usize,
43    /// Hash of header names in order (different tools produce different orderings).
44    pub header_order_hash: String,
45    /// User-Agent string (truncated, for pattern matching).
46    pub user_agent: String,
47}
48
49/// Tracks client behavior patterns over time for anomaly detection.
50struct ClientBehavior {
51    /// Number of requests seen.
52    request_count: u64,
53    /// First request time.
54    first_seen: Instant,
55    /// Last request time.
56    last_seen: Instant,
57    /// Number of distinct endpoints hit.
58    distinct_endpoints: u32,
59    /// Number of 4xx/5xx errors triggered.
60    error_count: u32,
61    /// Distinct source_type values tried in connect requests.
62    distinct_source_types: u32,
63}
64
65pub struct Fingerprinter {
66    behaviors: RwLock<HashMap<String, ClientBehavior>>,
67}
68
69impl Fingerprinter {
70    pub fn new() -> Self {
71        Self {
72            behaviors: RwLock::new(HashMap::new()),
73        }
74    }
75
76    /// Analyze request headers and produce a fingerprint with anomaly score.
77    pub fn analyze(&self, headers: &HeaderMap) -> RequestFingerprint {
78        let signals = extract_signals(headers);
79        let anomaly_score = calculate_anomaly_score(&signals);
80        let hash = compute_fingerprint_hash(&signals);
81
82        RequestFingerprint {
83            hash,
84            signals,
85            anomaly_score,
86        }
87    }
88
89    /// Record a request for behavioral tracking. Call after each request.
90    pub fn record_request(&self, client_ip: &str) {
91        let mut behaviors = self.behaviors.write();
92        let behavior = behaviors
93            .entry(client_ip.to_string())
94            .or_insert_with(|| ClientBehavior {
95                request_count: 0,
96                first_seen: Instant::now(),
97                last_seen: Instant::now(),
98                distinct_endpoints: 1,
99                error_count: 0,
100                distinct_source_types: 0,
101            });
102        behavior.request_count += 1;
103        behavior.last_seen = Instant::now();
104    }
105
106    /// Record an error response for behavioral tracking.
107    pub fn record_error(&self, client_ip: &str) {
108        let mut behaviors = self.behaviors.write();
109        if let Some(behavior) = behaviors.get_mut(client_ip) {
110            behavior.error_count += 1;
111        }
112    }
113
114    /// Get the behavioral anomaly score for a client IP.
115    /// Returns 0.0 for unknown clients (benefit of the doubt).
116    pub fn behavioral_score(&self, client_ip: &str) -> f64 {
117        let behaviors = self.behaviors.read();
118        let behavior = match behaviors.get(client_ip) {
119            Some(b) => b,
120            None => return 0.0,
121        };
122
123        let mut score: f64 = 0.0;
124
125        // High request rate
126        let duration = behavior.last_seen.duration_since(behavior.first_seen).as_secs_f64();
127        if duration > 0.0 {
128            let rps = behavior.request_count as f64 / duration;
129            if rps > 20.0 {
130                score += 0.3;
131            }
132            if rps > 100.0 {
133                score += 0.3;
134            }
135        }
136
137        // High error rate
138        if behavior.request_count > 5 {
139            let error_rate = behavior.error_count as f64 / behavior.request_count as f64;
140            if error_rate > 0.5 {
141                score += 0.3;
142            }
143        }
144
145        // Many requests in very short time (burst)
146        if behavior.request_count > 50 && duration < 5.0 {
147            score += 0.4;
148        }
149
150        // Rapid endpoint scanning (many distinct endpoints in short time)
151        if behavior.distinct_endpoints > 20 && duration < 30.0 {
152            score += 0.2;
153        }
154
155        // Source type enumeration (trying many different source types)
156        if behavior.distinct_source_types > 5 {
157            score += 0.2;
158        }
159
160        score.min(1.0)
161    }
162
163    /// Prune behavioral data for clients not seen recently.
164    pub fn prune_stale(&self, max_age_secs: u64) {
165        let mut behaviors = self.behaviors.write();
166        behaviors.retain(|_, b| b.last_seen.elapsed().as_secs() < max_age_secs);
167    }
168}
169
170fn extract_signals(headers: &HeaderMap) -> FingerprintSignals {
171    let user_agent = headers
172        .get("user-agent")
173        .and_then(|v| v.to_str().ok())
174        .unwrap_or("")
175        .chars()
176        .take(200)
177        .collect::<String>();
178
179    // Hash the header names in order
180    let header_names: Vec<String> = headers.keys().map(|k| k.as_str().to_lowercase()).collect();
181    let order_input = header_names.join("|");
182    let header_order_hash = hex::encode(Sha256::digest(order_input.as_bytes()))[..16].to_string();
183
184    FingerprintSignals {
185        has_user_agent: headers.contains_key("user-agent"),
186        has_accept: headers.contains_key("accept"),
187        has_accept_language: headers.contains_key("accept-language"),
188        has_accept_encoding: headers.contains_key("accept-encoding"),
189        has_referer: headers.contains_key("referer"),
190        header_count: headers.len(),
191        header_order_hash,
192        user_agent,
193    }
194}
195
196fn calculate_anomaly_score(signals: &FingerprintSignals) -> f64 {
197    let mut score: f64 = 0.0;
198
199    // Missing standard headers suggests automated tool
200    if !signals.has_user_agent {
201        score += 0.3;
202    }
203    if !signals.has_accept {
204        score += 0.1;
205    }
206    if !signals.has_accept_language {
207        score += 0.1;
208    }
209    if !signals.has_accept_encoding {
210        score += 0.05;
211    }
212
213    // Very few headers = likely curl, httpie, or attack tool
214    if signals.header_count < 3 {
215        score += 0.25;
216    }
217
218    // Very many headers = possible proxy chain or header stuffing
219    if signals.header_count > 30 {
220        score += 0.15;
221    }
222
223    // Check for known attack tool user agents
224    let ua_lower = signals.user_agent.to_lowercase();
225    let attack_tools = [
226        "sqlmap", "nikto", "nmap", "masscan", "zgrab", "gobuster",
227        "dirbuster", "wfuzz", "ffuf", "nuclei", "httpx",
228        "python-requests", "go-http-client", "java/",
229    ];
230    for tool in &attack_tools {
231        if ua_lower.contains(tool) {
232            score += 0.4;
233            break;
234        }
235    }
236
237    // Empty user agent is suspicious
238    if signals.has_user_agent && signals.user_agent.is_empty() {
239        score += 0.2;
240    }
241
242    score.min(1.0)
243}
244
245fn compute_fingerprint_hash(signals: &FingerprintSignals) -> String {
246    let input = format!(
247        "ua:{}|hdr_count:{}|order:{}|accept:{}|lang:{}",
248        signals.user_agent,
249        signals.header_count,
250        signals.header_order_hash,
251        signals.has_accept,
252        signals.has_accept_language,
253    );
254    hex::encode(Sha256::digest(input.as_bytes()))[..32].to_string()
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260    use axum::http::HeaderValue;
261
262    fn make_normal_headers() -> HeaderMap {
263        let mut h = HeaderMap::new();
264        h.insert("user-agent", HeaderValue::from_static("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"));
265        h.insert("accept", HeaderValue::from_static("text/html,application/json"));
266        h.insert("accept-language", HeaderValue::from_static("en-US,en;q=0.9"));
267        h.insert("accept-encoding", HeaderValue::from_static("gzip, deflate, br"));
268        h
269    }
270
271    fn make_bot_headers() -> HeaderMap {
272        let mut h = HeaderMap::new();
273        h.insert("user-agent", HeaderValue::from_static("sqlmap/1.7"));
274        h
275    }
276
277    #[test]
278    fn normal_browser_low_anomaly() {
279        let fp = Fingerprinter::new();
280        let result = fp.analyze(&make_normal_headers());
281        assert!(result.anomaly_score < 0.2, "Normal browser score should be low: {}", result.anomaly_score);
282    }
283
284    #[test]
285    fn attack_tool_high_anomaly() {
286        let fp = Fingerprinter::new();
287        let result = fp.analyze(&make_bot_headers());
288        assert!(result.anomaly_score > 0.5, "Attack tool score should be high: {}", result.anomaly_score);
289    }
290
291    #[test]
292    fn empty_headers_suspicious() {
293        let fp = Fingerprinter::new();
294        let result = fp.analyze(&HeaderMap::new());
295        assert!(result.anomaly_score > 0.4, "Empty headers should be suspicious: {}", result.anomaly_score);
296    }
297
298    #[test]
299    fn fingerprint_is_stable() {
300        let fp = Fingerprinter::new();
301        let h = make_normal_headers();
302        let r1 = fp.analyze(&h);
303        let r2 = fp.analyze(&h);
304        assert_eq!(r1.hash, r2.hash);
305    }
306
307    // ── Fingerprinter::new() ───────────────────────────────
308
309    #[test]
310    fn fingerprinter_new_creates_empty_behaviors() {
311        let fp = Fingerprinter::new();
312        // A new fingerprinter should have no behavioral data
313        assert_eq!(fp.behavioral_score("1.2.3.4"), 0.0);
314    }
315
316    // ── analyze() with various header patterns ─────────────
317
318    #[test]
319    fn analyze_with_only_user_agent() {
320        let fp = Fingerprinter::new();
321        let mut h = HeaderMap::new();
322        h.insert("user-agent", HeaderValue::from_static("Mozilla/5.0"));
323        let result = fp.analyze(&h);
324        // Has user-agent but missing accept, accept-language, accept-encoding
325        // and low header count (1 < 3) => should have moderate score
326        assert!(result.anomaly_score > 0.2, "Single header should be suspicious: {}", result.anomaly_score);
327    }
328
329    #[test]
330    fn analyze_python_requests_user_agent() {
331        let fp = Fingerprinter::new();
332        let mut h = HeaderMap::new();
333        h.insert("user-agent", HeaderValue::from_static("python-requests/2.28.1"));
334        h.insert("accept", HeaderValue::from_static("*/*"));
335        h.insert("accept-encoding", HeaderValue::from_static("gzip"));
336        let result = fp.analyze(&h);
337        // python-requests is in the attack tools list
338        assert!(result.anomaly_score >= 0.4, "python-requests UA should be flagged: {}", result.anomaly_score);
339    }
340
341    #[test]
342    fn analyze_go_http_client() {
343        let fp = Fingerprinter::new();
344        let mut h = HeaderMap::new();
345        h.insert("user-agent", HeaderValue::from_static("Go-http-client/1.1"));
346        let result = fp.analyze(&h);
347        assert!(result.anomaly_score >= 0.4, "Go http client should be flagged: {}", result.anomaly_score);
348    }
349
350    #[test]
351    fn analyze_nikto_scanner() {
352        let fp = Fingerprinter::new();
353        let mut h = HeaderMap::new();
354        h.insert("user-agent", HeaderValue::from_static("Nikto/2.1.6"));
355        let result = fp.analyze(&h);
356        assert!(result.anomaly_score >= 0.4, "Nikto should be flagged: {}", result.anomaly_score);
357    }
358
359    #[test]
360    fn analyze_nuclei_scanner() {
361        let fp = Fingerprinter::new();
362        let mut h = HeaderMap::new();
363        h.insert("user-agent", HeaderValue::from_static("Nuclei - Open-source project"));
364        h.insert("accept", HeaderValue::from_static("*/*"));
365        let result = fp.analyze(&h);
366        assert!(result.anomaly_score >= 0.4, "Nuclei should be flagged: {}", result.anomaly_score);
367    }
368
369    #[test]
370    fn analyze_many_headers_suspicious() {
371        let fp = Fingerprinter::new();
372        let mut h = HeaderMap::new();
373        h.insert("user-agent", HeaderValue::from_static("Mozilla/5.0"));
374        h.insert("accept", HeaderValue::from_static("*/*"));
375        h.insert("accept-language", HeaderValue::from_static("en"));
376        h.insert("accept-encoding", HeaderValue::from_static("gzip"));
377        // Add many custom headers to exceed 30
378        for i in 0..30 {
379            let name = format!("x-custom-header-{}", i);
380            h.insert(
381                axum::http::HeaderName::from_bytes(name.as_bytes()).unwrap(),
382                HeaderValue::from_static("value"),
383            );
384        }
385        let result = fp.analyze(&h);
386        assert!(result.anomaly_score > 0.0, "Many headers should add some anomaly: {}", result.anomaly_score);
387    }
388
389    #[test]
390    fn analyze_signals_populated_correctly() {
391        let fp = Fingerprinter::new();
392        let h = make_normal_headers();
393        let result = fp.analyze(&h);
394        assert!(result.signals.has_user_agent);
395        assert!(result.signals.has_accept);
396        assert!(result.signals.has_accept_language);
397        assert!(result.signals.has_accept_encoding);
398        assert!(!result.signals.has_referer);
399        assert_eq!(result.signals.header_count, 4);
400    }
401
402    #[test]
403    fn analyze_with_referer() {
404        let fp = Fingerprinter::new();
405        let mut h = make_normal_headers();
406        h.insert("referer", HeaderValue::from_static("https://example.com"));
407        let result = fp.analyze(&h);
408        assert!(result.signals.has_referer);
409    }
410
411    #[test]
412    fn analyze_user_agent_truncated_at_200() {
413        let fp = Fingerprinter::new();
414        let long_ua = "A".repeat(300);
415        let mut h = HeaderMap::new();
416        h.insert("user-agent", HeaderValue::from_str(&long_ua).unwrap());
417        let result = fp.analyze(&h);
418        assert_eq!(result.signals.user_agent.len(), 200);
419    }
420
421    #[test]
422    fn different_headers_produce_different_hashes() {
423        let fp = Fingerprinter::new();
424        let r1 = fp.analyze(&make_normal_headers());
425        let r2 = fp.analyze(&make_bot_headers());
426        assert_ne!(r1.hash, r2.hash);
427    }
428
429    // ── record_request() ───────────────────────────────────
430
431    #[test]
432    fn record_request_increments_count() {
433        let fp = Fingerprinter::new();
434        fp.record_request("10.0.0.1");
435        fp.record_request("10.0.0.1");
436        fp.record_request("10.0.0.1");
437        // After recording requests, behavioral_score should still be low
438        // because requests are spread out in time
439        let score = fp.behavioral_score("10.0.0.1");
440        // With only 3 requests, score should be 0 or very low
441        assert!(score < 1.0, "Few requests should not max out score: {}", score);
442    }
443
444    #[test]
445    fn record_request_creates_new_client() {
446        let fp = Fingerprinter::new();
447        // Before recording, score is 0.0
448        assert_eq!(fp.behavioral_score("new_client"), 0.0);
449        fp.record_request("new_client");
450        // After recording, the client exists (score may still be 0.0 for 1 request)
451        let score = fp.behavioral_score("new_client");
452        assert!(score >= 0.0);
453    }
454
455    #[test]
456    fn record_request_different_ips_independent() {
457        let fp = Fingerprinter::new();
458        for _ in 0..100 {
459            fp.record_request("attacker_ip");
460        }
461        // A different IP should still have 0.0 score
462        assert_eq!(fp.behavioral_score("clean_ip"), 0.0);
463    }
464
465    // ── record_error() ─────────────────────────────────────
466
467    #[test]
468    fn record_error_only_affects_known_clients() {
469        let fp = Fingerprinter::new();
470        // Recording error for unknown IP should not panic
471        fp.record_error("unknown_ip");
472        assert_eq!(fp.behavioral_score("unknown_ip"), 0.0);
473    }
474
475    #[test]
476    fn record_error_after_requests_increases_score() {
477        let fp = Fingerprinter::new();
478        // Record enough requests to pass the request_count > 5 threshold
479        for _ in 0..10 {
480            fp.record_request("error_client");
481        }
482        // Record many errors to push error_rate above 0.5
483        for _ in 0..8 {
484            fp.record_error("error_client");
485        }
486        let score = fp.behavioral_score("error_client");
487        // With 10 requests and 8 errors, error_rate = 0.8 > 0.5 => +0.3
488        assert!(score >= 0.3, "High error rate should increase behavioral score: {}", score);
489    }
490
491    // ── behavioral_score() thresholds ──────────────────────
492
493    #[test]
494    fn behavioral_score_unknown_client_is_zero() {
495        let fp = Fingerprinter::new();
496        assert_eq!(fp.behavioral_score("nonexistent"), 0.0);
497    }
498
499    #[test]
500    fn behavioral_score_capped_at_one() {
501        let fp = Fingerprinter::new();
502        // Create extreme conditions: many requests, many errors
503        for _ in 0..200 {
504            fp.record_request("maxed_out");
505        }
506        for _ in 0..200 {
507            fp.record_error("maxed_out");
508        }
509        let score = fp.behavioral_score("maxed_out");
510        assert!(score <= 1.0, "Score should never exceed 1.0: {}", score);
511    }
512
513    // ── prune_stale() ──────────────────────────────────────
514
515    #[test]
516    fn prune_stale_removes_old_entries() {
517        let fp = Fingerprinter::new();
518        fp.record_request("stale_client");
519        // Prune with 0 seconds max age — everything should be stale
520        fp.prune_stale(0);
521        // After pruning, the client should be gone
522        assert_eq!(fp.behavioral_score("stale_client"), 0.0);
523    }
524
525    #[test]
526    fn prune_stale_keeps_recent_entries() {
527        let fp = Fingerprinter::new();
528        fp.record_request("recent_client");
529        // Prune with large max age — nothing should be removed
530        fp.prune_stale(3600);
531        // Client should still exist (behavioral score may be 0 with only 1 request though)
532        // We verify by recording an error and checking the client is tracked
533        fp.record_error("recent_client");
534        // If the client was pruned, record_error would be a no-op
535        // and we can't easily verify it. But at least no panic.
536    }
537
538    #[test]
539    fn prune_stale_on_empty_is_noop() {
540        let fp = Fingerprinter::new();
541        fp.prune_stale(0); // should not panic
542    }
543}