Skip to main content

synapse_pingora/trends/
signal_extractor.rs

1//! Signal extraction from HTTP requests.
2
3use sha2::{Digest, Sha256};
4
5use super::types::{
6    AuthTokenMetadata, BehavioralMetadata, DeviceMetadata, JwtClaims, NetworkMetadata, Signal,
7    SignalCategory, SignalMetadata, SignalType,
8};
9
10/// Signal extractor for HTTP requests.
11pub struct SignalExtractor;
12
13impl SignalExtractor {
14    /// Extract all signals from request context.
15    pub fn extract(
16        entity_id: &str,
17        session_id: Option<&str>,
18        user_agent: Option<&str>,
19        authorization: Option<&str>,
20        client_ip: Option<&str>,
21        ja4: Option<&str>,
22        ja4h: Option<&str>,
23        last_request_time: Option<i64>,
24    ) -> Vec<Signal> {
25        let mut signals = Vec::new();
26        let now = chrono::Utc::now().timestamp_millis();
27
28        // Network signals
29        if let Some(ip) = client_ip {
30            signals.push(Signal {
31                id: uuid::Uuid::new_v4().to_string(),
32                timestamp: now,
33                category: SignalCategory::Network,
34                signal_type: SignalType::Ip,
35                value: ip.to_string(),
36                entity_id: entity_id.to_string(),
37                session_id: session_id.map(String::from),
38                metadata: SignalMetadata::Network(NetworkMetadata {
39                    ip: ip.to_string(),
40                    ja4: ja4.map(String::from),
41                    ja4h: ja4h.map(String::from),
42                    ..Default::default()
43                }),
44            });
45        }
46
47        // JA4 fingerprint signal
48        if let Some(ja4_fp) = ja4 {
49            signals.push(Signal {
50                id: uuid::Uuid::new_v4().to_string(),
51                timestamp: now,
52                category: SignalCategory::Network,
53                signal_type: SignalType::Ja4,
54                value: ja4_fp.to_string(),
55                entity_id: entity_id.to_string(),
56                session_id: session_id.map(String::from),
57                metadata: SignalMetadata::Network(NetworkMetadata {
58                    ip: client_ip.unwrap_or("").to_string(),
59                    ja4: Some(ja4_fp.to_string()),
60                    ja4h: ja4h.map(String::from),
61                    ..Default::default()
62                }),
63            });
64        }
65
66        // JA4H fingerprint signal
67        if let Some(ja4h_fp) = ja4h {
68            signals.push(Signal {
69                id: uuid::Uuid::new_v4().to_string(),
70                timestamp: now,
71                category: SignalCategory::Network,
72                signal_type: SignalType::Ja4h,
73                value: ja4h_fp.to_string(),
74                entity_id: entity_id.to_string(),
75                session_id: session_id.map(String::from),
76                metadata: SignalMetadata::Network(NetworkMetadata {
77                    ip: client_ip.unwrap_or("").to_string(),
78                    ja4: ja4.map(String::from),
79                    ja4h: Some(ja4h_fp.to_string()),
80                    ..Default::default()
81                }),
82            });
83        }
84
85        // Device signals from User-Agent
86        if let Some(ua) = user_agent {
87            signals.push(Signal {
88                id: uuid::Uuid::new_v4().to_string(),
89                timestamp: now,
90                category: SignalCategory::Device,
91                signal_type: SignalType::HttpFingerprint,
92                value: Self::hash_value(ua),
93                entity_id: entity_id.to_string(),
94                session_id: session_id.map(String::from),
95                metadata: SignalMetadata::Device(DeviceMetadata {
96                    user_agent: ua.to_string(),
97                    ..Default::default()
98                }),
99            });
100        }
101
102        // Auth token signals
103        if let Some(auth) = authorization {
104            if let Some(signal) = Self::extract_auth_signal(auth, entity_id, session_id, now) {
105                signals.push(signal);
106            }
107        }
108
109        // Behavioral signals
110        if let Some(last_time) = last_request_time {
111            let time_delta = now - last_time;
112            signals.push(Signal {
113                id: uuid::Uuid::new_v4().to_string(),
114                timestamp: now,
115                category: SignalCategory::Behavioral,
116                signal_type: SignalType::Timing,
117                value: format!("delta_{}", time_delta),
118                entity_id: entity_id.to_string(),
119                session_id: session_id.map(String::from),
120                metadata: SignalMetadata::Behavioral(BehavioralMetadata {
121                    time_since_last_request: Some(time_delta),
122                    ..Default::default()
123                }),
124            });
125        }
126
127        signals
128    }
129
130    /// Extract auth token signal from Authorization header.
131    fn extract_auth_signal(
132        auth: &str,
133        entity_id: &str,
134        session_id: Option<&str>,
135        timestamp: i64,
136    ) -> Option<Signal> {
137        let (token_type, token) = if auth.starts_with("Bearer ") {
138            (SignalType::Bearer, &auth[7..])
139        } else if auth.starts_with("Basic ") {
140            (SignalType::Basic, &auth[6..])
141        } else {
142            (SignalType::CustomAuth, auth)
143        };
144
145        // Check if it looks like a JWT
146        let (signal_type, jwt_claims) = if token.matches('.').count() == 2 {
147            // Likely a JWT
148            let claims = Self::parse_jwt_claims(token);
149            (SignalType::Jwt, claims)
150        } else {
151            (token_type, None)
152        };
153
154        let token_hash = Self::hash_value(token);
155
156        Some(Signal {
157            id: uuid::Uuid::new_v4().to_string(),
158            timestamp,
159            category: SignalCategory::AuthToken,
160            signal_type,
161            value: token_hash.clone(),
162            entity_id: entity_id.to_string(),
163            session_id: session_id.map(String::from),
164            metadata: SignalMetadata::AuthToken(AuthTokenMetadata {
165                header_name: "Authorization".to_string(),
166                token_prefix: Some(auth.split_whitespace().next().unwrap_or("").to_string()),
167                token_hash,
168                jwt_claims,
169            }),
170        })
171    }
172
173    /// Parse JWT claims (basic parsing, no verification).
174    fn parse_jwt_claims(token: &str) -> Option<JwtClaims> {
175        let parts: Vec<&str> = token.split('.').collect();
176        if parts.len() != 3 {
177            return None;
178        }
179
180        // Decode payload (second part)
181        let payload = match base64::Engine::decode(
182            &base64::engine::general_purpose::URL_SAFE_NO_PAD,
183            parts[1],
184        ) {
185            Ok(bytes) => bytes,
186            Err(_) => return None,
187        };
188
189        let json: serde_json::Value = match serde_json::from_slice(&payload) {
190            Ok(v) => v,
191            Err(_) => return None,
192        };
193
194        Some(JwtClaims {
195            sub: json.get("sub").and_then(|v| v.as_str()).map(String::from),
196            iss: json.get("iss").and_then(|v| v.as_str()).map(String::from),
197            exp: json.get("exp").and_then(|v| v.as_i64()),
198            iat: json.get("iat").and_then(|v| v.as_i64()),
199            aud: json.get("aud").and_then(|v| v.as_str()).map(String::from),
200        })
201    }
202
203    /// Hash a value using SHA-256.
204    fn hash_value(value: &str) -> String {
205        let mut hasher = Sha256::new();
206        hasher.update(value.as_bytes());
207        hex::encode(hasher.finalize())
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    #[test]
216    fn test_extract_ip_signal() {
217        let signals = SignalExtractor::extract(
218            "entity-1",
219            None,
220            None,
221            None,
222            Some("192.168.1.100"),
223            None,
224            None,
225            None,
226        );
227
228        assert!(!signals.is_empty());
229        let ip_signal = signals
230            .iter()
231            .find(|s| s.signal_type == SignalType::Ip)
232            .unwrap();
233        assert_eq!(ip_signal.value, "192.168.1.100");
234    }
235
236    #[test]
237    fn test_extract_ja4_signal() {
238        let signals = SignalExtractor::extract(
239            "entity-1",
240            None,
241            None,
242            None,
243            Some("192.168.1.100"),
244            Some("t13d1516h2_abc123"),
245            None,
246            None,
247        );
248
249        let ja4_signal = signals
250            .iter()
251            .find(|s| s.signal_type == SignalType::Ja4)
252            .unwrap();
253        assert_eq!(ja4_signal.value, "t13d1516h2_abc123");
254    }
255
256    #[test]
257    fn test_extract_bearer_token() {
258        let signals = SignalExtractor::extract(
259            "entity-1",
260            None,
261            None,
262            Some("Bearer my-secret-token"),
263            None,
264            None,
265            None,
266            None,
267        );
268
269        let auth_signal = signals
270            .iter()
271            .find(|s| s.category == SignalCategory::AuthToken)
272            .unwrap();
273        assert_eq!(auth_signal.signal_type, SignalType::Bearer);
274
275        // Value should be a hash, not the raw token
276        assert!(!auth_signal.value.contains("my-secret-token"));
277    }
278
279    #[test]
280    fn test_extract_timing_signal() {
281        let now = chrono::Utc::now().timestamp_millis();
282        let signals = SignalExtractor::extract(
283            "entity-1",
284            None,
285            None,
286            None,
287            None,
288            None,
289            None,
290            Some(now - 5000), // 5 seconds ago
291        );
292
293        let timing_signal = signals
294            .iter()
295            .find(|s| s.signal_type == SignalType::Timing)
296            .unwrap();
297        assert!(timing_signal.value.starts_with("delta_"));
298    }
299}