hitbox_http/extractors/header.rs
1//! Header extraction for cache keys.
2//!
3//! Provides [`Header`] extractor with support for name selection, value extraction,
4//! and transformation.
5//!
6//! # Examples
7//!
8//! Extract a single header:
9//!
10//! ```
11//! use hitbox_http::extractors::{Method, header::HeaderExtractor};
12//!
13//! # use bytes::Bytes;
14//! # use http_body_util::Empty;
15//! # use hitbox_http::extractors::{NeutralExtractor, header::Header};
16//! let extractor = Method::new()
17//! .header("x-api-key".to_string());
18//! # let _: &Header<Method<NeutralExtractor<Empty<Bytes>>>> = &extractor;
19//! ```
20
21use async_trait::async_trait;
22use hitbox::{Extractor, KeyPart, KeyParts};
23use http::HeaderValue;
24use regex::Regex;
25
26use super::NeutralExtractor;
27pub use super::transform::Transform;
28use super::transform::apply_transform_chain;
29use crate::CacheableHttpRequest;
30
31/// Selects which headers to extract.
32#[derive(Debug, Clone)]
33pub enum NameSelector {
34 /// Match a single header by exact name.
35 Exact(String),
36 /// Match all headers starting with a prefix.
37 ///
38 /// Results are sorted by header name for deterministic cache keys.
39 Starts(String),
40}
41
42/// Extracts values from header content.
43#[derive(Debug, Clone)]
44pub enum ValueExtractor {
45 /// Use the full header value.
46 Full,
47 /// Extract using regex (returns first capture group, or full match if no groups).
48 Regex(Regex),
49}
50
51/// Extracts header values as cache key parts.
52///
53/// Supports flexible header selection, value extraction, and transformation.
54///
55/// # Key Parts Generated
56///
57/// For each matched header, generates a `KeyPart` with:
58/// - Key: the header name
59/// - Value: the extracted (and optionally transformed) value
60#[derive(Debug)]
61pub struct Header<E> {
62 inner: E,
63 name_selector: NameSelector,
64 value_extractor: ValueExtractor,
65 transforms: Vec<Transform>,
66}
67
68impl<S> Header<NeutralExtractor<S>> {
69 /// Creates a header extractor for a single header by exact name.
70 ///
71 /// The header value becomes a cache key part with the header name
72 /// as key. For more complex extraction (prefix matching, regex, transforms),
73 /// use [`Header::new_with`].
74 ///
75 /// Chain onto existing extractors using [`HeaderExtractor::header`] instead
76 /// if you already have an extractor chain.
77 pub fn new(name: String) -> Self {
78 Self {
79 inner: NeutralExtractor::new(),
80 name_selector: NameSelector::Exact(name),
81 value_extractor: ValueExtractor::Full,
82 transforms: Vec::new(),
83 }
84 }
85}
86
87impl<E> Header<E> {
88 /// Creates a header extractor with full configuration options.
89 ///
90 /// This constructor provides complete control over header extraction:
91 /// - Select headers by exact name or prefix pattern
92 /// - Extract full values or use regex capture groups
93 /// - Apply transformations (hash, lowercase, uppercase)
94 ///
95 /// For simple exact-name extraction without transforms, use [`Header::new`]
96 /// or [`HeaderExtractor::header`] instead.
97 pub fn new_with(
98 inner: E,
99 name_selector: NameSelector,
100 value_extractor: ValueExtractor,
101 transforms: Vec<Transform>,
102 ) -> Self {
103 Self {
104 inner,
105 name_selector,
106 value_extractor,
107 transforms,
108 }
109 }
110}
111
112/// Extension trait for adding header extraction to an extractor chain.
113///
114/// # For Callers
115///
116/// Chain this to extract HTTP header values as cache key parts. The header
117/// name becomes the key part name, and the header value becomes the key part value.
118///
119/// # For Implementors
120///
121/// This trait is automatically implemented for all [`Extractor`]
122/// types. You don't need to implement it manually.
123pub trait HeaderExtractor: Sized {
124 /// Adds extraction for a single header by exact name.
125 fn header(self, name: String) -> Header<Self>;
126}
127
128impl<E> HeaderExtractor for E
129where
130 E: Extractor,
131{
132 fn header(self, name: String) -> Header<Self> {
133 Header {
134 inner: self,
135 name_selector: NameSelector::Exact(name),
136 value_extractor: ValueExtractor::Full,
137 transforms: Vec::new(),
138 }
139 }
140}
141
142/// Extract value from header using the value extractor.
143fn extract_value(value: &HeaderValue, extractor: &ValueExtractor) -> Option<String> {
144 let value_str = value.to_str().ok()?;
145
146 match extractor {
147 ValueExtractor::Full => Some(value_str.to_string()),
148 ValueExtractor::Regex(regex) => {
149 regex.captures(value_str).and_then(|caps| {
150 // Return first capture group if exists, otherwise full match
151 caps.get(1)
152 .or_else(|| caps.get(0))
153 .map(|m| m.as_str().to_string())
154 })
155 }
156 }
157}
158
159#[async_trait]
160impl<ReqBody, E> Extractor for Header<E>
161where
162 ReqBody: hyper::body::Body + Send + 'static,
163 ReqBody::Error: Send,
164 E: Extractor<Subject = CacheableHttpRequest<ReqBody>> + Send + Sync,
165{
166 type Subject = E::Subject;
167
168 async fn get(&self, subject: Self::Subject) -> KeyParts<Self::Subject> {
169 let headers = &subject.parts().headers;
170 let mut extracted_parts = Vec::new();
171
172 match &self.name_selector {
173 NameSelector::Exact(name) => {
174 let value = headers
175 .get(name.as_str())
176 .and_then(|v| extract_value(v, &self.value_extractor))
177 .map(|v| apply_transform_chain(v, &self.transforms));
178
179 extracted_parts.push(KeyPart::new(name.clone(), value));
180 }
181 NameSelector::Starts(prefix) => {
182 for (name, value) in headers.iter() {
183 let name_str = name.as_str();
184 if name_str.starts_with(prefix.as_str()) {
185 let extracted = extract_value(value, &self.value_extractor)
186 .map(|v| apply_transform_chain(v, &self.transforms));
187
188 extracted_parts.push(KeyPart::new(name_str, extracted));
189 }
190 }
191 // Sort by header name for deterministic cache keys
192 extracted_parts.sort_by(|a, b| a.key().cmp(b.key()));
193 }
194 }
195
196 let mut parts = self.inner.get(subject).await;
197 parts.append(&mut extracted_parts);
198 parts
199 }
200}