rama_net/fingerprint/ja4/
http.rs

1//! Ja4H implementation for Rama (in Rust).
2//!
3//! JA4H is part of the Ja4+ is copyrighted
4//! and licensed by FoxIO. See license information below:
5//!
6//! > Copyright 2023 AOL Inc. All rights reserved.
7//! > Portions Copyright 2023 FoxIO
8//! >
9//! > SPDX-License-Identifier: FoxIO License 1.1
10//! >
11//! > This software requires a license to use. See
12//! > - <https://github.com/FoxIO-LLC/ja4#licensing>
13//! > - <https://github.com/FoxIO-LLC/ja4/blob/main/License%20FAQ.md>
14
15use itertools::Itertools as _;
16use std::{
17    borrow::Cow,
18    fmt::{self, Write},
19};
20
21use rama_http_types::{
22    Method, Version,
23    header::{ACCEPT_LANGUAGE, COOKIE, REFERER},
24};
25
26use crate::fingerprint::{HttpRequestInput, HttpRequestProvider};
27
28#[derive(Clone)]
29/// Input data for a "ja4h" hash.
30/// or displaying it.
31///
32/// Computed using [`Ja4H::compute`].
33pub struct Ja4H {
34    req_method: HttpRequestMethod,
35    version: HttpVersion,
36    has_cookie_header: bool,
37    has_referer_header: bool,
38    language: Option<String>,
39    headers: Vec<String>,
40    cookie_pairs: Option<Vec<(String, Option<String>)>>,
41}
42
43impl Ja4H {
44    /// Compute the [`Ja4H`] (hash).
45    ///
46    /// As specified by <https://blog.foxio.io/ja4%2B-network-fingerprinting>
47    /// and reference implementations found at <https://github.com/FoxIO-LLC/ja4>.
48    pub fn compute(req: impl HttpRequestProvider) -> Result<Self, Ja4HComputeError> {
49        let HttpRequestInput {
50            header_map,
51            http_method,
52            version,
53        } = req.http_request_input();
54
55        let req_method = HttpRequestMethod::from(http_method);
56        let version: HttpVersion = version.try_into()?;
57
58        let mut has_cookie_header = false;
59        let mut has_referer_header = false;
60        let mut language = None;
61
62        let mut cookie_pairs = None;
63
64        let headers: Vec<_> = header_map
65            .into_iter()
66            .filter_map(|(name, value)| match *name.header_name() {
67                ACCEPT_LANGUAGE => {
68                    language = std::str::from_utf8(value.as_bytes())
69                        .ok()
70                        .and_then(|s| s.split(',').next())
71                        .and_then(|s| s.split(';').next())
72                        .map(|s| {
73                            s.trim()
74                                .chars()
75                                .filter(|c| c.is_alphabetic())
76                                .take(4)
77                                .map(|c| c.to_ascii_lowercase())
78                                .collect()
79                        });
80                    Some(name.as_str().to_owned())
81                }
82                COOKIE => {
83                    has_cookie_header = true;
84                    // split on ; and then trim to handle different spacing, fixing the sorting issue
85                    if let Ok(s) = std::str::from_utf8(value.as_bytes()) {
86                        let pairs = cookie_pairs.get_or_insert_with(Vec::default);
87                        pairs.extend(s.split(';').map(|cookie| {
88                            let cookie = cookie.trim();
89                            match cookie.split_once('=') {
90                                None => (cookie.to_owned(), None),
91                                Some((name, value)) => (name.to_owned(), Some(value.to_owned())),
92                            }
93                        }));
94                        pairs.sort_unstable();
95                    }
96                    None
97                }
98                REFERER => {
99                    has_referer_header = true;
100                    None
101                }
102                _ => Some(name.as_str().to_owned()),
103            })
104            .collect();
105        if headers.is_empty() {
106            return Err(Ja4HComputeError::MissingHeaders);
107        }
108
109        Ok(Ja4H {
110            req_method,
111            version,
112            has_cookie_header,
113            has_referer_header,
114            language,
115            headers,
116            cookie_pairs,
117        })
118    }
119
120    #[inline]
121    pub fn to_human_string(&self) -> String {
122        format!("{self:?}")
123    }
124
125    fn fmt_as(&self, f: &mut fmt::Formatter<'_>, hash_chunks: bool) -> fmt::Result {
126        let req_method = &self.req_method;
127        let version = self.version;
128        let cookie_marker = if self.has_cookie_header { 'c' } else { 'n' };
129        let referer_marker = if self.has_referer_header { 'r' } else { 'n' };
130        let nr_headers = 99.min(self.headers.len());
131
132        // application fingerprint: part I
133        write!(
134            f,
135            "{req_method}{version}{cookie_marker}{referer_marker}{nr_headers:02}"
136        )?;
137        match self.language.as_deref() {
138            Some(s) => format_str_truncate(4, s, f)?,
139            None => write!(f, "0000")?,
140        }
141
142        // application fingerprint: part II
143        debug_assert!(
144            !self.headers.is_empty(),
145            "validated in Ja4H::compute constructor"
146        );
147        let headers = self.headers.iter().join(",");
148
149        // website cookie fingerprint
150        let cookie_names = joined_cookie_names(self.cookie_pairs.iter().flatten());
151
152        // user cookie fingerprint
153        let cookie_pairs = joined_cookie_pairs(self.cookie_pairs.iter().flatten());
154
155        if hash_chunks {
156            write!(
157                f,
158                "_{}_{}_{}",
159                hash12(headers),
160                hash12(cookie_names),
161                hash12(cookie_pairs),
162            )
163        } else {
164            write!(f, "_{}_{}_{}", headers, cookie_names, cookie_pairs,)
165        }
166    }
167}
168
169impl fmt::Display for Ja4H {
170    #[inline]
171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172        self.fmt_as(f, true)
173    }
174}
175
176impl fmt::Debug for Ja4H {
177    #[inline]
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        self.fmt_as(f, false)
180    }
181}
182
183fn format_str_truncate(n: usize, s: &str, f: &mut fmt::Formatter) -> fmt::Result {
184    let len = s.chars().count();
185    if len > n {
186        f.write_str(&s[..n])?;
187    } else {
188        f.write_str(s)?;
189        for _ in 0..(n - len) {
190            f.write_char('0')?;
191        }
192    }
193    Ok(())
194}
195
196fn joined_cookie_names<'a, I>(cookie_pairs: I) -> String
197where
198    I: IntoIterator<Item = &'a (String, Option<String>)>,
199{
200    cookie_pairs
201        .into_iter()
202        .map(|(name, _)| {
203            debug_assert!(!name.is_empty());
204            name.to_owned()
205        })
206        .join(",")
207}
208
209fn joined_cookie_pairs<'a, I>(cookie_pairs: I) -> String
210where
211    I: IntoIterator<Item = &'a (String, Option<String>)>,
212{
213    cookie_pairs
214        .into_iter()
215        .map(|(name, value)| {
216            debug_assert!(!name.is_empty());
217            match value {
218                None => name.to_owned(),
219                Some(value) => format!("{name}={value}"),
220            }
221        })
222        .join(",")
223}
224
225#[derive(Debug, Clone)]
226/// error identifying a failure in [`Ja4H::compute`]
227pub enum Ja4HComputeError {
228    /// triggered when the request's version is not recognised
229    InvalidHttpVersion,
230    /// no headers detected
231    MissingHeaders,
232}
233
234impl fmt::Display for Ja4HComputeError {
235    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236        match self {
237            Ja4HComputeError::InvalidHttpVersion => {
238                write!(f, "Ja4H Compute Error: unexpected http request version")
239            }
240            Ja4HComputeError::MissingHeaders => {
241                write!(f, "Ja4H Compute Error: missing http headers")
242            }
243        }
244    }
245}
246
247impl std::error::Error for Ja4HComputeError {}
248
249fn hash12(s: impl AsRef<str>) -> Cow<'static, str> {
250    use sha2::{Digest as _, Sha256};
251
252    let s = s.as_ref();
253    if s.is_empty() {
254        "000000000000".into()
255    } else {
256        let sha256 = Sha256::digest(s);
257        hex::encode(&sha256.as_slice()[..6]).into()
258    }
259}
260
261#[derive(Debug, Clone, PartialEq)]
262struct HttpRequestMethod(Method);
263
264impl fmt::Display for HttpRequestMethod {
265    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266        let code = match self.0 {
267            Method::CONNECT => "co",
268            Method::DELETE => "de",
269            Method::GET => "ge",
270            Method::HEAD => "he",
271            Method::OPTIONS => "op",
272            Method::PATCH => "pa",
273            Method::POST => "po",
274            Method::PUT => "pu",
275            Method::TRACE => "tr",
276            _ => {
277                let mut c = self.0.as_str().chars();
278                return write!(
279                    f,
280                    "{}{}",
281                    c.next().map(|c| c.to_ascii_lowercase()).unwrap_or('0'),
282                    c.next().map(|c| c.to_ascii_lowercase()).unwrap_or('0'),
283                );
284            }
285        };
286        f.write_str(code)
287    }
288}
289
290impl From<Method> for HttpRequestMethod {
291    #[inline]
292    fn from(value: Method) -> Self {
293        Self(value)
294    }
295}
296
297#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
298enum HttpVersion {
299    Http1_0,
300    Http1_1,
301    Http2,
302    Http3,
303}
304
305impl TryFrom<Version> for HttpVersion {
306    type Error = Ja4HComputeError;
307
308    fn try_from(value: Version) -> Result<Self, Self::Error> {
309        match value {
310            Version::HTTP_10 => Ok(HttpVersion::Http1_0),
311            Version::HTTP_11 => Ok(HttpVersion::Http1_1),
312            Version::HTTP_2 => Ok(HttpVersion::Http2),
313            Version::HTTP_3 => Ok(HttpVersion::Http3),
314            _ => Err(Ja4HComputeError::InvalidHttpVersion),
315        }
316    }
317}
318
319impl fmt::Display for HttpVersion {
320    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321        let code = match self {
322            Self::Http1_0 => "10",
323            Self::Http1_1 => "11",
324            Self::Http2 => "20",
325            Self::Http3 => "30",
326        };
327        f.write_str(code)
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use rama_http_types::{Request, proto::h1::Http1HeaderMap};
335
336    #[derive(Debug)]
337    struct TestCase {
338        description: &'static str,
339        expected_ja4h_str_debug: &'static str,
340        expected_ja4h_str_hash: &'static str,
341        req: Request<()>,
342    }
343
344    macro_rules! test_case {
345        (
346            description: $description:literal,
347            debug_str: $expected_ja4h_str_debug:literal,
348            hash_str: $expected_ja4h_str_hash:literal,
349            version: $version:expr,
350            method: $method:expr,
351            headers: {$(
352                $header_name:literal: $header_value:literal,
353            )+}
354            $(,)?
355        ) => {
356            {
357                let mut map = Http1HeaderMap::default();
358                $(
359                    map.try_append(
360                        $header_name,
361                        rama_http_types::HeaderValue::from_str($header_value).unwrap()
362                    ).unwrap();
363                )+
364
365                let mut extensions = rama_http_types::dep::http::Extensions::default();
366                let headers = map.consume(&mut extensions);
367
368                let (mut parts, body) = Request::new(()).into_parts();
369                parts.method = $method;
370                parts.version = $version;
371                parts.uri = "/".parse::<rama_http_types::Uri>().unwrap();
372                parts.headers = headers;
373                parts.extensions = extensions;
374
375                let req = Request::from_parts(parts, body);
376
377                TestCase {
378                    description: $description,
379                    expected_ja4h_str_debug: $expected_ja4h_str_debug,
380                    expected_ja4h_str_hash: $expected_ja4h_str_hash,
381                    req,
382                }
383            }
384        };
385    }
386
387    #[test]
388    fn test_ja4h_compute() {
389        let test_cases = [
390            test_case!(
391                description: "rust_ja4_http_test_http_stats_into_out",
392                debug_str: "ge11cr11enus_Host,Sec-Ch-Ua,Sec-Ch-Ua-Mobile,User-Agent,Sec-Ch-Ua-Platform,Accept,Sec-Fetch-Site,Sec-Fetch-Mode,Sec-Fetch-Dest,Accept-Encoding,Accept-Language_FastAB,_dd_s,countryCode,geoData,sato,stateCode,umto,usprivacy_FastAB=0=6859,1=8174,2=4183,3=3319,4=3917,5=2557,6=4259,7=6070,8=0804,9=6453,10=1942,11=4435,12=4143,13=9445,14=6957,15=8682,16=1885,17=1825,18=3760,19=0929,_dd_s=logs=1&id=b5c2d770-eaba-4847-8202-390c4552ff9a&created=1686159462724&expire=1686160422726,countryCode=US,geoData=purcellville|VA|20132|US|NA|-400|broadband|39.160|-77.700|511,sato=1,stateCode=VA,umto=1,usprivacy=1---",
393                hash_str: "ge11cr11enus_974ebe531c03_0f2659b474bf_161698816dab",
394                version: Version::HTTP_11,
395                method: Method::GET,
396                headers: {
397                    "Host": "www.cnn.com",
398                    "Cookie": "FastAB=0=6859,1=8174,2=4183,3=3319,4=3917,5=2557,6=4259,7=6070,8=0804,9=6453,10=1942,11=4435,12=4143,13=9445,14=6957,15=8682,16=1885,17=1825,18=3760,19=0929; sato=1; countryCode=US; stateCode=VA; geoData=purcellville|VA|20132|US|NA|-400|broadband|39.160|-77.700|511; usprivacy=1---; umto=1; _dd_s=logs=1&id=b5c2d770-eaba-4847-8202-390c4552ff9a&created=1686159462724&expire=1686160422726",
399                    "Sec-Ch-Ua": "",
400                    "Sec-Ch-Ua-Mobile": "?0",
401                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.110 Safari/537.36",
402                    "Sec-Ch-Ua-Platform": "\"\"",
403                    "Accept": "*/*",
404                    "Sec-Fetch-Site": "same-origin",
405                    "Sec-Fetch-Mode": "cors",
406                    "Sec-Fetch-Dest": "empty", // should not have duplicated headers
407                    "Referer": "https://www.cnn.com/",
408                    "Accept-Encoding": "gzip, deflate",
409                    "Accept-Language": "en-US,en;q=0.9",
410                },
411            ),
412            test_case!(
413                description: "wireshark_ja4_firefox_133_macos_fp.ramaproxy.org_http11_plain",
414                debug_str: "ge11cr09enus_Host,User-Agent,Accept,Accept-Language,Accept-Encoding,Connection,DNT,Sec-GPC,Priority_rama-fp_rama-fp=ready",
415                hash_str: "ge11cr09enus_df50b14dec48_d733b88e2d70_774e52af4cfe",
416                version: Version::HTTP_11,
417                method: Method::GET,
418                headers: {
419                    "Host": "h1.fp.ramaproxy.org",
420                    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
421                    "Accept": "text/css,*/*;q=0.1",
422                    "Accept-Language": "en-US,en;q=0.5",
423                    "Accept-Encoding": "gzip, deflate",
424                    "Connection": "keep-alive",
425                    "Referer": "http://h1.fp.ramaproxy.org/consent",
426                    "Cookie": "rama-fp=ready",
427                    "DNT": "1",
428                    "Sec-GPC": "1",
429                    "Priority": "u=2",
430                },
431            ),
432            test_case!(
433                description: "curl_ja4h_http2_cookies_different_order",
434                debug_str: "ge20cn030000_authorization,user-agent,accept_alpha,sierra,zulu_alpha=bravo,sierra=echo,zulu=tango",
435                hash_str: "ge20cn030000_a8ea46949477_7efd8825dc5a_f0c5f5a36bc1",
436                version: Version::HTTP_2,
437                method: Method::GET,
438                headers: {
439                    "authorization": "Basic d29yZDp3b3Jk",
440                    "user-agent": "curl/7.81.0",
441                    "accept": "*/*",
442                    "cookie": "sierra=echo;alpha=bravo;zulu=tango",
443                },
444            ),
445        ];
446        for test_case in test_cases {
447            let ja4h = Ja4H::compute(&test_case.req).expect(test_case.description);
448            assert_eq!(
449                test_case.expected_ja4h_str_debug,
450                format!("{ja4h:?}"),
451                "{}",
452                test_case.description
453            );
454            assert_eq!(
455                test_case.expected_ja4h_str_hash,
456                format!("{ja4h}"),
457                "{}",
458                test_case.description
459            );
460        }
461    }
462}