1use itertools::Itertools as _;
16use std::{
17 borrow::Cow,
18 fmt::{self, Write},
19};
20
21use rama_http_types::{
22 Method, Version,
23 header::{ACCEPT_LANGUAGE, COOKIE, REFERER},
24};
25
26use crate::fingerprint::{HttpRequestInput, HttpRequestProvider};
27
28#[derive(Clone)]
29pub struct Ja4H {
34 req_method: HttpRequestMethod,
35 version: HttpVersion,
36 has_cookie_header: bool,
37 has_referer_header: bool,
38 language: Option<String>,
39 headers: Vec<String>,
40 cookie_pairs: Option<Vec<(String, Option<String>)>>,
41}
42
43impl Ja4H {
44 pub fn compute(req: impl HttpRequestProvider) -> Result<Self, Ja4HComputeError> {
49 let HttpRequestInput {
50 header_map,
51 http_method,
52 version,
53 } = req.http_request_input();
54
55 let req_method = HttpRequestMethod::from(http_method);
56 let version: HttpVersion = version.try_into()?;
57
58 let mut has_cookie_header = false;
59 let mut has_referer_header = false;
60 let mut language = None;
61
62 let mut cookie_pairs = None;
63
64 let headers: Vec<_> = header_map
65 .into_iter()
66 .filter_map(|(name, value)| match *name.header_name() {
67 ACCEPT_LANGUAGE => {
68 language = std::str::from_utf8(value.as_bytes())
69 .ok()
70 .and_then(|s| s.split(',').next())
71 .and_then(|s| s.split(';').next())
72 .map(|s| {
73 s.trim()
74 .chars()
75 .filter(|c| c.is_alphabetic())
76 .take(4)
77 .map(|c| c.to_ascii_lowercase())
78 .collect()
79 });
80 Some(name.as_str().to_owned())
81 }
82 COOKIE => {
83 has_cookie_header = true;
84 if let Ok(s) = std::str::from_utf8(value.as_bytes()) {
86 let pairs = cookie_pairs.get_or_insert_with(Vec::default);
87 pairs.extend(s.split(';').map(|cookie| {
88 let cookie = cookie.trim();
89 match cookie.split_once('=') {
90 None => (cookie.to_owned(), None),
91 Some((name, value)) => (name.to_owned(), Some(value.to_owned())),
92 }
93 }));
94 pairs.sort_unstable();
95 }
96 None
97 }
98 REFERER => {
99 has_referer_header = true;
100 None
101 }
102 _ => Some(name.as_str().to_owned()),
103 })
104 .collect();
105 if headers.is_empty() {
106 return Err(Ja4HComputeError::MissingHeaders);
107 }
108
109 Ok(Ja4H {
110 req_method,
111 version,
112 has_cookie_header,
113 has_referer_header,
114 language,
115 headers,
116 cookie_pairs,
117 })
118 }
119
120 #[inline]
121 pub fn to_human_string(&self) -> String {
122 format!("{self:?}")
123 }
124
125 fn fmt_as(&self, f: &mut fmt::Formatter<'_>, hash_chunks: bool) -> fmt::Result {
126 let req_method = &self.req_method;
127 let version = self.version;
128 let cookie_marker = if self.has_cookie_header { 'c' } else { 'n' };
129 let referer_marker = if self.has_referer_header { 'r' } else { 'n' };
130 let nr_headers = 99.min(self.headers.len());
131
132 write!(
134 f,
135 "{req_method}{version}{cookie_marker}{referer_marker}{nr_headers:02}"
136 )?;
137 match self.language.as_deref() {
138 Some(s) => format_str_truncate(4, s, f)?,
139 None => write!(f, "0000")?,
140 }
141
142 debug_assert!(
144 !self.headers.is_empty(),
145 "validated in Ja4H::compute constructor"
146 );
147 let headers = self.headers.iter().join(",");
148
149 let cookie_names = joined_cookie_names(self.cookie_pairs.iter().flatten());
151
152 let cookie_pairs = joined_cookie_pairs(self.cookie_pairs.iter().flatten());
154
155 if hash_chunks {
156 write!(
157 f,
158 "_{}_{}_{}",
159 hash12(headers),
160 hash12(cookie_names),
161 hash12(cookie_pairs),
162 )
163 } else {
164 write!(f, "_{}_{}_{}", headers, cookie_names, cookie_pairs,)
165 }
166 }
167}
168
169impl fmt::Display for Ja4H {
170 #[inline]
171 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172 self.fmt_as(f, true)
173 }
174}
175
176impl fmt::Debug for Ja4H {
177 #[inline]
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 self.fmt_as(f, false)
180 }
181}
182
183fn format_str_truncate(n: usize, s: &str, f: &mut fmt::Formatter) -> fmt::Result {
184 let len = s.chars().count();
185 if len > n {
186 f.write_str(&s[..n])?;
187 } else {
188 f.write_str(s)?;
189 for _ in 0..(n - len) {
190 f.write_char('0')?;
191 }
192 }
193 Ok(())
194}
195
196fn joined_cookie_names<'a, I>(cookie_pairs: I) -> String
197where
198 I: IntoIterator<Item = &'a (String, Option<String>)>,
199{
200 cookie_pairs
201 .into_iter()
202 .map(|(name, _)| {
203 debug_assert!(!name.is_empty());
204 name.to_owned()
205 })
206 .join(",")
207}
208
209fn joined_cookie_pairs<'a, I>(cookie_pairs: I) -> String
210where
211 I: IntoIterator<Item = &'a (String, Option<String>)>,
212{
213 cookie_pairs
214 .into_iter()
215 .map(|(name, value)| {
216 debug_assert!(!name.is_empty());
217 match value {
218 None => name.to_owned(),
219 Some(value) => format!("{name}={value}"),
220 }
221 })
222 .join(",")
223}
224
225#[derive(Debug, Clone)]
226pub enum Ja4HComputeError {
228 InvalidHttpVersion,
230 MissingHeaders,
232}
233
234impl fmt::Display for Ja4HComputeError {
235 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236 match self {
237 Ja4HComputeError::InvalidHttpVersion => {
238 write!(f, "Ja4H Compute Error: unexpected http request version")
239 }
240 Ja4HComputeError::MissingHeaders => {
241 write!(f, "Ja4H Compute Error: missing http headers")
242 }
243 }
244 }
245}
246
247impl std::error::Error for Ja4HComputeError {}
248
249fn hash12(s: impl AsRef<str>) -> Cow<'static, str> {
250 use sha2::{Digest as _, Sha256};
251
252 let s = s.as_ref();
253 if s.is_empty() {
254 "000000000000".into()
255 } else {
256 let sha256 = Sha256::digest(s);
257 hex::encode(&sha256.as_slice()[..6]).into()
258 }
259}
260
261#[derive(Debug, Clone, PartialEq)]
262struct HttpRequestMethod(Method);
263
264impl fmt::Display for HttpRequestMethod {
265 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266 let code = match self.0 {
267 Method::CONNECT => "co",
268 Method::DELETE => "de",
269 Method::GET => "ge",
270 Method::HEAD => "he",
271 Method::OPTIONS => "op",
272 Method::PATCH => "pa",
273 Method::POST => "po",
274 Method::PUT => "pu",
275 Method::TRACE => "tr",
276 _ => {
277 let mut c = self.0.as_str().chars();
278 return write!(
279 f,
280 "{}{}",
281 c.next().map(|c| c.to_ascii_lowercase()).unwrap_or('0'),
282 c.next().map(|c| c.to_ascii_lowercase()).unwrap_or('0'),
283 );
284 }
285 };
286 f.write_str(code)
287 }
288}
289
290impl From<Method> for HttpRequestMethod {
291 #[inline]
292 fn from(value: Method) -> Self {
293 Self(value)
294 }
295}
296
297#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
298enum HttpVersion {
299 Http1_0,
300 Http1_1,
301 Http2,
302 Http3,
303}
304
305impl TryFrom<Version> for HttpVersion {
306 type Error = Ja4HComputeError;
307
308 fn try_from(value: Version) -> Result<Self, Self::Error> {
309 match value {
310 Version::HTTP_10 => Ok(HttpVersion::Http1_0),
311 Version::HTTP_11 => Ok(HttpVersion::Http1_1),
312 Version::HTTP_2 => Ok(HttpVersion::Http2),
313 Version::HTTP_3 => Ok(HttpVersion::Http3),
314 _ => Err(Ja4HComputeError::InvalidHttpVersion),
315 }
316 }
317}
318
319impl fmt::Display for HttpVersion {
320 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321 let code = match self {
322 Self::Http1_0 => "10",
323 Self::Http1_1 => "11",
324 Self::Http2 => "20",
325 Self::Http3 => "30",
326 };
327 f.write_str(code)
328 }
329}
330
331#[cfg(test)]
332mod tests {
333 use super::*;
334 use rama_http_types::{Request, proto::h1::Http1HeaderMap};
335
336 #[derive(Debug)]
337 struct TestCase {
338 description: &'static str,
339 expected_ja4h_str_debug: &'static str,
340 expected_ja4h_str_hash: &'static str,
341 req: Request<()>,
342 }
343
344 macro_rules! test_case {
345 (
346 description: $description:literal,
347 debug_str: $expected_ja4h_str_debug:literal,
348 hash_str: $expected_ja4h_str_hash:literal,
349 version: $version:expr,
350 method: $method:expr,
351 headers: {$(
352 $header_name:literal: $header_value:literal,
353 )+}
354 $(,)?
355 ) => {
356 {
357 let mut map = Http1HeaderMap::default();
358 $(
359 map.try_append(
360 $header_name,
361 rama_http_types::HeaderValue::from_str($header_value).unwrap()
362 ).unwrap();
363 )+
364
365 let mut extensions = rama_http_types::dep::http::Extensions::default();
366 let headers = map.consume(&mut extensions);
367
368 let (mut parts, body) = Request::new(()).into_parts();
369 parts.method = $method;
370 parts.version = $version;
371 parts.uri = "/".parse::<rama_http_types::Uri>().unwrap();
372 parts.headers = headers;
373 parts.extensions = extensions;
374
375 let req = Request::from_parts(parts, body);
376
377 TestCase {
378 description: $description,
379 expected_ja4h_str_debug: $expected_ja4h_str_debug,
380 expected_ja4h_str_hash: $expected_ja4h_str_hash,
381 req,
382 }
383 }
384 };
385 }
386
387 #[test]
388 fn test_ja4h_compute() {
389 let test_cases = [
390 test_case!(
391 description: "rust_ja4_http_test_http_stats_into_out",
392 debug_str: "ge11cr11enus_Host,Sec-Ch-Ua,Sec-Ch-Ua-Mobile,User-Agent,Sec-Ch-Ua-Platform,Accept,Sec-Fetch-Site,Sec-Fetch-Mode,Sec-Fetch-Dest,Accept-Encoding,Accept-Language_FastAB,_dd_s,countryCode,geoData,sato,stateCode,umto,usprivacy_FastAB=0=6859,1=8174,2=4183,3=3319,4=3917,5=2557,6=4259,7=6070,8=0804,9=6453,10=1942,11=4435,12=4143,13=9445,14=6957,15=8682,16=1885,17=1825,18=3760,19=0929,_dd_s=logs=1&id=b5c2d770-eaba-4847-8202-390c4552ff9a&created=1686159462724&expire=1686160422726,countryCode=US,geoData=purcellville|VA|20132|US|NA|-400|broadband|39.160|-77.700|511,sato=1,stateCode=VA,umto=1,usprivacy=1---",
393 hash_str: "ge11cr11enus_974ebe531c03_0f2659b474bf_161698816dab",
394 version: Version::HTTP_11,
395 method: Method::GET,
396 headers: {
397 "Host": "www.cnn.com",
398 "Cookie": "FastAB=0=6859,1=8174,2=4183,3=3319,4=3917,5=2557,6=4259,7=6070,8=0804,9=6453,10=1942,11=4435,12=4143,13=9445,14=6957,15=8682,16=1885,17=1825,18=3760,19=0929; sato=1; countryCode=US; stateCode=VA; geoData=purcellville|VA|20132|US|NA|-400|broadband|39.160|-77.700|511; usprivacy=1---; umto=1; _dd_s=logs=1&id=b5c2d770-eaba-4847-8202-390c4552ff9a&created=1686159462724&expire=1686160422726",
399 "Sec-Ch-Ua": "",
400 "Sec-Ch-Ua-Mobile": "?0",
401 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.110 Safari/537.36",
402 "Sec-Ch-Ua-Platform": "\"\"",
403 "Accept": "*/*",
404 "Sec-Fetch-Site": "same-origin",
405 "Sec-Fetch-Mode": "cors",
406 "Sec-Fetch-Dest": "empty", "Referer": "https://www.cnn.com/",
408 "Accept-Encoding": "gzip, deflate",
409 "Accept-Language": "en-US,en;q=0.9",
410 },
411 ),
412 test_case!(
413 description: "wireshark_ja4_firefox_133_macos_fp.ramaproxy.org_http11_plain",
414 debug_str: "ge11cr09enus_Host,User-Agent,Accept,Accept-Language,Accept-Encoding,Connection,DNT,Sec-GPC,Priority_rama-fp_rama-fp=ready",
415 hash_str: "ge11cr09enus_df50b14dec48_d733b88e2d70_774e52af4cfe",
416 version: Version::HTTP_11,
417 method: Method::GET,
418 headers: {
419 "Host": "h1.fp.ramaproxy.org",
420 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
421 "Accept": "text/css,*/*;q=0.1",
422 "Accept-Language": "en-US,en;q=0.5",
423 "Accept-Encoding": "gzip, deflate",
424 "Connection": "keep-alive",
425 "Referer": "http://h1.fp.ramaproxy.org/consent",
426 "Cookie": "rama-fp=ready",
427 "DNT": "1",
428 "Sec-GPC": "1",
429 "Priority": "u=2",
430 },
431 ),
432 test_case!(
433 description: "curl_ja4h_http2_cookies_different_order",
434 debug_str: "ge20cn030000_authorization,user-agent,accept_alpha,sierra,zulu_alpha=bravo,sierra=echo,zulu=tango",
435 hash_str: "ge20cn030000_a8ea46949477_7efd8825dc5a_f0c5f5a36bc1",
436 version: Version::HTTP_2,
437 method: Method::GET,
438 headers: {
439 "authorization": "Basic d29yZDp3b3Jk",
440 "user-agent": "curl/7.81.0",
441 "accept": "*/*",
442 "cookie": "sierra=echo;alpha=bravo;zulu=tango",
443 },
444 ),
445 ];
446 for test_case in test_cases {
447 let ja4h = Ja4H::compute(&test_case.req).expect(test_case.description);
448 assert_eq!(
449 test_case.expected_ja4h_str_debug,
450 format!("{ja4h:?}"),
451 "{}",
452 test_case.description
453 );
454 assert_eq!(
455 test_case.expected_ja4h_str_hash,
456 format!("{ja4h}"),
457 "{}",
458 test_case.description
459 );
460 }
461 }
462}