1use std::collections::HashMap;
18use std::fmt::Debug;
19use std::rc::Rc;
20
21use async_trait::async_trait;
22use hitbox::{Extractor, KeyPart, KeyParts};
23use hyper::body::Body as HttpBody;
24use jaq_core::box_iter::box_once;
25use jaq_core::load::{Arena, File, Loader};
26use jaq_core::{Bind, Ctx, Exn, Filter, Native, RcIter};
27use jaq_json::Val;
28use regex::Regex;
29use serde_json::Value;
30use tracing::warn;
31
32pub use super::transform::Transform;
33use super::transform::{apply_hash, apply_transform_chain};
34use crate::CacheableHttpRequest;
35
36#[derive(Debug, Clone)]
44pub enum BodyExtraction {
45 Hash,
47 Jq(JqExtraction),
49 Regex(RegexExtraction),
51}
52
53#[derive(Clone)]
69pub struct JqExtraction {
70 filter: Filter<Native<Val>>,
71}
72
73impl Debug for JqExtraction {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 f.debug_struct("JqExtraction").finish_non_exhaustive()
76 }
77}
78
79type JqResult = Result<Val, jaq_core::Error<Val>>;
81
82fn custom_jq_funs() -> impl Iterator<Item = (&'static str, Box<[Bind]>, Native<Val>)> {
84 let v0: Box<[Bind]> = Box::new([]);
85
86 [
87 (
89 "hash",
90 v0,
91 Native::new(|_, cv| {
92 let val = cv.1;
93 let result: JqResult = match &val {
94 Val::Str(s) => {
95 let hash = apply_hash(s);
96 Ok(Val::Str(Rc::new(hash)))
97 }
98 Val::Int(n) => {
99 let hash = apply_hash(&n.to_string());
100 Ok(Val::Str(Rc::new(hash)))
101 }
102 Val::Float(f) => {
103 let hash = apply_hash(&f.to_string());
104 Ok(Val::Str(Rc::new(hash)))
105 }
106 Val::Bool(b) => {
107 let hash = apply_hash(&b.to_string());
108 Ok(Val::Str(Rc::new(hash)))
109 }
110 Val::Null => {
111 let hash = apply_hash("null");
112 Ok(Val::Str(Rc::new(hash)))
113 }
114 Val::Num(n) => {
115 let hash = apply_hash(n);
116 Ok(Val::Str(Rc::new(hash)))
117 }
118 Val::Arr(_) | Val::Obj(_) => {
119 let json: Value = val.clone().into();
121 let hash = apply_hash(&json.to_string());
122 Ok(Val::Str(Rc::new(hash)))
123 }
124 };
125 box_once(result.map_err(Exn::from))
126 }),
127 ),
128 ]
129 .into_iter()
130}
131
132impl JqExtraction {
133 pub fn compile(expression: &str) -> Result<Self, String> {
145 let program = File {
146 code: expression,
147 path: (),
148 };
149 let loader = Loader::new(jaq_std::defs().chain(jaq_json::defs()));
150 let arena = Arena::default();
151 let modules = loader
152 .load(&arena, program)
153 .map_err(|e| format!("jq parse error: {:?}", e))?;
154 let filter = jaq_core::Compiler::default()
155 .with_funs(
156 jaq_std::funs()
157 .chain(jaq_json::funs())
158 .chain(custom_jq_funs()),
159 )
160 .compile(modules)
161 .map_err(|e| format!("jq compile error: {:?}", e))?;
162 Ok(Self { filter })
163 }
164
165 fn apply(&self, input: Value) -> Vec<Value> {
166 let inputs = RcIter::new(core::iter::empty());
167 let out = self.filter.run((Ctx::new([], &inputs), Val::from(input)));
168 out.filter_map(|r| r.ok()).map(|v| v.into()).collect()
169 }
170}
171
172#[derive(Debug, Clone)]
192pub struct RegexExtraction {
193 pub regex: Regex,
195 pub key: Option<String>,
197 pub global: bool,
199 pub transforms: Transforms,
201}
202
203#[derive(Debug, Clone, Default)]
208pub enum Transforms {
209 #[default]
211 None,
212 FullBody(Vec<Transform>),
214 PerKey(HashMap<String, Vec<Transform>>),
216}
217
218#[derive(Debug)]
229pub struct Body<E> {
230 inner: E,
231 extraction: BodyExtraction,
232}
233
234impl<S> Body<super::NeutralExtractor<S>> {
235 pub fn new(extraction: BodyExtraction) -> Self {
237 Self {
238 inner: super::NeutralExtractor::new(),
239 extraction,
240 }
241 }
242}
243
244pub trait BodyExtractor: Sized {
262 fn body(self, extraction: BodyExtraction) -> Body<Self>;
264}
265
266impl<E> BodyExtractor for E
267where
268 E: hitbox::Extractor,
269{
270 fn body(self, extraction: BodyExtraction) -> Body<Self> {
271 Body {
272 inner: self,
273 extraction,
274 }
275 }
276}
277
278fn extract_jq_parts(values: Vec<Value>) -> Vec<KeyPart> {
280 let mut parts = Vec::new();
281
282 for value in values {
283 match value {
284 Value::Object(map) => {
285 for (key, val) in map {
286 let value_str = value_to_string(&val);
287 parts.push(KeyPart::new(key, value_str));
288 }
289 }
290 Value::Array(arr) => {
291 for item in arr {
292 match item {
293 Value::Object(map) => {
294 for (key, val) in map {
295 let value_str = value_to_string(&val);
296 parts.push(KeyPart::new(key, value_str));
297 }
298 }
299 other => {
300 let value_str = value_to_string(&other);
301 parts.push(KeyPart::new("body", value_str));
302 }
303 }
304 }
305 }
306 other => {
307 let value_str = value_to_string(&other);
308 parts.push(KeyPart::new("body", value_str));
309 }
310 }
311 }
312
313 parts
314}
315
316fn value_to_string(value: &Value) -> Option<String> {
318 match value {
319 Value::Null => None,
320 Value::String(s) => Some(s.clone()),
321 Value::Number(n) => Some(n.to_string()),
322 Value::Bool(b) => Some(b.to_string()),
323 other => Some(other.to_string()),
324 }
325}
326
327fn extract_regex_parts(
329 body: &str,
330 regex: &Regex,
331 key: &Option<String>,
332 global: bool,
333 transforms: &Transforms,
334) -> Vec<KeyPart> {
335 let mut parts = Vec::new();
336 let capture_names: Vec<_> = regex.capture_names().flatten().collect();
337 let has_named_groups = !capture_names.is_empty();
338
339 let apply_transforms = |key_name: &str, value: String| -> String {
340 match transforms {
341 Transforms::None => value,
342 Transforms::FullBody(chain) => apply_transform_chain(value, chain),
343 Transforms::PerKey(map) => {
344 if let Some(chain) = map.get(key_name) {
345 apply_transform_chain(value, chain)
346 } else {
347 value
348 }
349 }
350 }
351 };
352
353 if global {
354 for caps in regex.captures_iter(body) {
355 if has_named_groups {
356 for name in &capture_names {
357 if let Some(m) = caps.name(name) {
358 let value = apply_transforms(name, m.as_str().to_string());
359 parts.push(KeyPart::new(*name, Some(value)));
360 }
361 }
362 } else if let Some(m) = caps.get(1).or_else(|| caps.get(0)) {
363 let key_name = key.as_deref().unwrap_or("body");
364 let value = apply_transforms(key_name, m.as_str().to_string());
365 parts.push(KeyPart::new(key_name, Some(value)));
366 }
367 }
368 } else if let Some(caps) = regex.captures(body) {
369 if has_named_groups {
370 for name in &capture_names {
371 if let Some(m) = caps.name(name) {
372 let value = apply_transforms(name, m.as_str().to_string());
373 parts.push(KeyPart::new(*name, Some(value)));
374 }
375 }
376 } else if let Some(m) = caps.get(1).or_else(|| caps.get(0)) {
377 let key_name = key.as_deref().unwrap_or("body");
378 let value = apply_transforms(key_name, m.as_str().to_string());
379 parts.push(KeyPart::new(key_name, Some(value)));
380 }
381 }
382
383 parts
384}
385
386#[async_trait]
387impl<ReqBody, E> Extractor for Body<E>
388where
389 ReqBody: HttpBody + Send + 'static,
390 ReqBody::Error: Send,
391 ReqBody::Data: Send,
392 E: Extractor<Subject = CacheableHttpRequest<ReqBody>> + Send + Sync,
393{
394 type Subject = E::Subject;
395
396 async fn get(&self, subject: Self::Subject) -> KeyParts<Self::Subject> {
397 let (parts, body) = subject.into_parts();
398
399 let payload = match body.collect().await {
401 Ok(bytes) => bytes,
402 Err(error_body) => {
403 let request = CacheableHttpRequest::from_request(http::Request::from_parts(
404 parts, error_body,
405 ));
406 let mut key_parts = self.inner.get(request).await;
407 key_parts.push(KeyPart::new("body", None::<String>));
408 return key_parts;
409 }
410 };
411
412 let body_bytes = payload.to_vec();
413 let body_str = String::from_utf8_lossy(&body_bytes);
414
415 let extracted_parts = match &self.extraction {
416 BodyExtraction::Hash => {
417 let hash = apply_hash(&body_str);
418 vec![KeyPart::new("body", Some(hash))]
419 }
420 BodyExtraction::Jq(jq) => match serde_json::from_str(&body_str) {
421 Ok(json_value) => {
422 let results = jq.apply(json_value);
423 extract_jq_parts(results)
424 }
425 Err(err) => {
426 warn!(%err, "Jq body extraction failed: invalid JSON, falling back to body hash");
427 let hash = apply_hash(&body_str);
428 vec![KeyPart::new("body", Some(hash))]
429 }
430 },
431 BodyExtraction::Regex(regex_ext) => extract_regex_parts(
432 &body_str,
433 ®ex_ext.regex,
434 ®ex_ext.key,
435 regex_ext.global,
436 ®ex_ext.transforms,
437 ),
438 };
439
440 let body = crate::BufferedBody::Complete(Some(payload));
441 let request = CacheableHttpRequest::from_request(http::Request::from_parts(parts, body));
442
443 let mut key_parts = self.inner.get(request).await;
444 for part in extracted_parts {
445 key_parts.push(part);
446 }
447 key_parts
448 }
449}