1use std::collections::HashMap;
18use std::fmt::Debug;
19use std::rc::Rc;
20
21use async_trait::async_trait;
22use hitbox::{Extractor, KeyPart, KeyParts};
23use hyper::body::Body as HttpBody;
24use jaq_core::box_iter::box_once;
25use jaq_core::load::{Arena, File, Loader};
26use jaq_core::{Bind, Ctx, Exn, Filter, Native, RcIter};
27use jaq_json::Val;
28use regex::Regex;
29use serde_json::Value;
30
31pub use super::transform::Transform;
32use super::transform::{apply_hash, apply_transform_chain};
33use crate::CacheableHttpRequest;
34
35#[derive(Debug, Clone)]
43pub enum BodyExtraction {
44 Hash,
46 Jq(JqExtraction),
48 Regex(RegexExtraction),
50}
51
52#[derive(Clone)]
68pub struct JqExtraction {
69 filter: Filter<Native<Val>>,
70}
71
72impl Debug for JqExtraction {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 f.debug_struct("JqExtraction").finish_non_exhaustive()
75 }
76}
77
78type JqResult = Result<Val, jaq_core::Error<Val>>;
80
81fn custom_jq_funs() -> impl Iterator<Item = (&'static str, Box<[Bind]>, Native<Val>)> {
83 let v0: Box<[Bind]> = Box::new([]);
84
85 [
86 (
88 "hash",
89 v0,
90 Native::new(|_, cv| {
91 let val = cv.1;
92 let result: JqResult = match &val {
93 Val::Str(s) => {
94 let hash = apply_hash(s);
95 Ok(Val::Str(Rc::new(hash)))
96 }
97 Val::Int(n) => {
98 let hash = apply_hash(&n.to_string());
99 Ok(Val::Str(Rc::new(hash)))
100 }
101 Val::Float(f) => {
102 let hash = apply_hash(&f.to_string());
103 Ok(Val::Str(Rc::new(hash)))
104 }
105 Val::Bool(b) => {
106 let hash = apply_hash(&b.to_string());
107 Ok(Val::Str(Rc::new(hash)))
108 }
109 Val::Null => {
110 let hash = apply_hash("null");
111 Ok(Val::Str(Rc::new(hash)))
112 }
113 Val::Num(n) => {
114 let hash = apply_hash(n);
115 Ok(Val::Str(Rc::new(hash)))
116 }
117 Val::Arr(_) | Val::Obj(_) => {
118 let json: Value = val.clone().into();
120 let hash = apply_hash(&json.to_string());
121 Ok(Val::Str(Rc::new(hash)))
122 }
123 };
124 box_once(result.map_err(Exn::from))
125 }),
126 ),
127 ]
128 .into_iter()
129}
130
131impl JqExtraction {
132 pub fn compile(expression: &str) -> Result<Self, String> {
144 let program = File {
145 code: expression,
146 path: (),
147 };
148 let loader = Loader::new(jaq_std::defs().chain(jaq_json::defs()));
149 let arena = Arena::default();
150 let modules = loader
151 .load(&arena, program)
152 .map_err(|e| format!("jq parse error: {:?}", e))?;
153 let filter = jaq_core::Compiler::default()
154 .with_funs(
155 jaq_std::funs()
156 .chain(jaq_json::funs())
157 .chain(custom_jq_funs()),
158 )
159 .compile(modules)
160 .map_err(|e| format!("jq compile error: {:?}", e))?;
161 Ok(Self { filter })
162 }
163
164 fn apply(&self, input: Value) -> Vec<Value> {
165 let inputs = RcIter::new(core::iter::empty());
166 let out = self.filter.run((Ctx::new([], &inputs), Val::from(input)));
167 out.filter_map(|r| r.ok()).map(|v| v.into()).collect()
168 }
169}
170
171#[derive(Debug, Clone)]
191pub struct RegexExtraction {
192 pub regex: Regex,
194 pub key: Option<String>,
196 pub global: bool,
198 pub transforms: Transforms,
200}
201
202#[derive(Debug, Clone, Default)]
207pub enum Transforms {
208 #[default]
210 None,
211 FullBody(Vec<Transform>),
213 PerKey(HashMap<String, Vec<Transform>>),
215}
216
217#[derive(Debug)]
228pub struct Body<E> {
229 inner: E,
230 extraction: BodyExtraction,
231}
232
233impl<S> Body<super::NeutralExtractor<S>> {
234 pub fn new(extraction: BodyExtraction) -> Self {
236 Self {
237 inner: super::NeutralExtractor::new(),
238 extraction,
239 }
240 }
241}
242
243pub trait BodyExtractor: Sized {
261 fn body(self, extraction: BodyExtraction) -> Body<Self>;
263}
264
265impl<E> BodyExtractor for E
266where
267 E: hitbox::Extractor,
268{
269 fn body(self, extraction: BodyExtraction) -> Body<Self> {
270 Body {
271 inner: self,
272 extraction,
273 }
274 }
275}
276
277fn extract_jq_parts(values: Vec<Value>) -> Vec<KeyPart> {
279 let mut parts = Vec::new();
280
281 for value in values {
282 match value {
283 Value::Object(map) => {
284 for (key, val) in map {
285 let value_str = value_to_string(&val);
286 parts.push(KeyPart::new(key, value_str));
287 }
288 }
289 Value::Array(arr) => {
290 for item in arr {
291 match item {
292 Value::Object(map) => {
293 for (key, val) in map {
294 let value_str = value_to_string(&val);
295 parts.push(KeyPart::new(key, value_str));
296 }
297 }
298 other => {
299 let value_str = value_to_string(&other);
300 parts.push(KeyPart::new("body", value_str));
301 }
302 }
303 }
304 }
305 other => {
306 let value_str = value_to_string(&other);
307 parts.push(KeyPart::new("body", value_str));
308 }
309 }
310 }
311
312 parts
313}
314
315fn value_to_string(value: &Value) -> Option<String> {
317 match value {
318 Value::Null => None,
319 Value::String(s) => Some(s.clone()),
320 Value::Number(n) => Some(n.to_string()),
321 Value::Bool(b) => Some(b.to_string()),
322 other => Some(other.to_string()),
323 }
324}
325
326fn extract_regex_parts(
328 body: &str,
329 regex: &Regex,
330 key: &Option<String>,
331 global: bool,
332 transforms: &Transforms,
333) -> Vec<KeyPart> {
334 let mut parts = Vec::new();
335 let capture_names: Vec<_> = regex.capture_names().flatten().collect();
336 let has_named_groups = !capture_names.is_empty();
337
338 let apply_transforms = |key_name: &str, value: String| -> String {
339 match transforms {
340 Transforms::None => value,
341 Transforms::FullBody(chain) => apply_transform_chain(value, chain),
342 Transforms::PerKey(map) => {
343 if let Some(chain) = map.get(key_name) {
344 apply_transform_chain(value, chain)
345 } else {
346 value
347 }
348 }
349 }
350 };
351
352 if global {
353 for caps in regex.captures_iter(body) {
354 if has_named_groups {
355 for name in &capture_names {
356 if let Some(m) = caps.name(name) {
357 let value = apply_transforms(name, m.as_str().to_string());
358 parts.push(KeyPart::new(*name, Some(value)));
359 }
360 }
361 } else if let Some(m) = caps.get(1).or_else(|| caps.get(0)) {
362 let key_name = key.as_deref().unwrap_or("body");
363 let value = apply_transforms(key_name, m.as_str().to_string());
364 parts.push(KeyPart::new(key_name, Some(value)));
365 }
366 }
367 } else if let Some(caps) = regex.captures(body) {
368 if has_named_groups {
369 for name in &capture_names {
370 if let Some(m) = caps.name(name) {
371 let value = apply_transforms(name, m.as_str().to_string());
372 parts.push(KeyPart::new(*name, Some(value)));
373 }
374 }
375 } else if let Some(m) = caps.get(1).or_else(|| caps.get(0)) {
376 let key_name = key.as_deref().unwrap_or("body");
377 let value = apply_transforms(key_name, m.as_str().to_string());
378 parts.push(KeyPart::new(key_name, Some(value)));
379 }
380 }
381
382 parts
383}
384
385#[async_trait]
386impl<ReqBody, E> Extractor for Body<E>
387where
388 ReqBody: HttpBody + Send + 'static,
389 ReqBody::Error: Send,
390 ReqBody::Data: Send,
391 E: Extractor<Subject = CacheableHttpRequest<ReqBody>> + Send + Sync,
392{
393 type Subject = E::Subject;
394
395 async fn get(&self, subject: Self::Subject) -> KeyParts<Self::Subject> {
396 let (parts, body) = subject.into_parts();
397
398 let payload = match body.collect().await {
400 Ok(bytes) => bytes,
401 Err(error_body) => {
402 let request = CacheableHttpRequest::from_request(http::Request::from_parts(
403 parts, error_body,
404 ));
405 let mut key_parts = self.inner.get(request).await;
406 key_parts.push(KeyPart::new("body", None::<String>));
407 return key_parts;
408 }
409 };
410
411 let body_bytes = payload.to_vec();
412 let body_str = String::from_utf8_lossy(&body_bytes);
413
414 let extracted_parts = match &self.extraction {
415 BodyExtraction::Hash => {
416 let hash = apply_hash(&body_str);
417 vec![KeyPart::new("body", Some(hash))]
418 }
419 BodyExtraction::Jq(jq) => {
420 let json_value = serde_json::from_str(&body_str).unwrap_or(Value::Null);
421 let results = jq.apply(json_value);
422 extract_jq_parts(results)
423 }
424 BodyExtraction::Regex(regex_ext) => extract_regex_parts(
425 &body_str,
426 ®ex_ext.regex,
427 ®ex_ext.key,
428 regex_ext.global,
429 ®ex_ext.transforms,
430 ),
431 };
432
433 let body = crate::BufferedBody::Complete(Some(payload));
434 let request = CacheableHttpRequest::from_request(http::Request::from_parts(parts, body));
435
436 let mut key_parts = self.inner.get(request).await;
437 for part in extracted_parts {
438 key_parts.push(part);
439 }
440 key_parts
441 }
442}