hitbox_http/predicates/body/plain.rs
1//! Plain-text body matching operations.
2//!
3//! Provides [`PlainOperation`] for matching raw body bytes using equality,
4//! prefix/suffix checks, substring search, and regular expressions.
5
6use bytes::{Buf, Bytes, BytesMut};
7use hitbox::predicate::PredicateResult;
8use http_body_util::BodyExt;
9use hyper::body::Body as HttpBody;
10
11use crate::{BufferedBody, PartialBufferedBody, Remaining};
12
13/// Searches for a pattern in a body stream without collecting the entire body.
14///
15/// Optimizes the search by stopping early when the pattern is found and handling
16/// patterns that span chunk boundaries.
17async fn streaming_search<B>(body: BufferedBody<B>, pattern: &[u8]) -> (bool, BufferedBody<B>)
18where
19 B: HttpBody + Unpin,
20 B::Data: Send,
21{
22 if pattern.is_empty() {
23 return (true, body);
24 }
25
26 match body {
27 // Already complete - just search
28 BufferedBody::Complete(Some(bytes)) => {
29 let found = bytes.windows(pattern.len()).any(|w| w == pattern);
30 (found, BufferedBody::Complete(Some(bytes)))
31 }
32 BufferedBody::Complete(None) => (false, BufferedBody::Complete(None)),
33
34 // Partial - extract parts and search through prefix + remaining
35 BufferedBody::Partial(partial) => {
36 let (prefix, remaining) = partial.into_parts();
37 match remaining {
38 Remaining::Body(body) => streaming_search_body(prefix, body, pattern).await,
39 Remaining::Error(error) => {
40 // Already have an error, just search in the prefix we have
41 let found = prefix
42 .as_ref()
43 .map(|b| b.windows(pattern.len()).any(|w| w == pattern))
44 .unwrap_or(false);
45 (
46 found,
47 BufferedBody::Partial(PartialBufferedBody::new(
48 prefix,
49 Remaining::Error(error),
50 )),
51 )
52 }
53 }
54 }
55
56 // Passthrough - stream through it
57 BufferedBody::Passthrough(stream) => streaming_search_body(None, stream, pattern).await,
58 }
59}
60
61/// Helper function that performs streaming search on any Body implementation.
62/// Optionally starts with an initial prefix buffer.
63async fn streaming_search_body<B>(
64 initial_prefix: Option<Bytes>,
65 mut body: B,
66 pattern: &[u8],
67) -> (bool, BufferedBody<B>)
68where
69 B: HttpBody + Unpin,
70 B::Data: Send,
71{
72 let mut buffer = BytesMut::new();
73
74 // Initialize with prefix if provided
75 if let Some(prefix_bytes) = initial_prefix {
76 buffer.extend_from_slice(&prefix_bytes);
77 }
78
79 // Keep last (pattern.len() - 1) bytes to handle pattern spanning chunks
80 let overlap_size = pattern.len().saturating_sub(1);
81
82 loop {
83 match body.frame().await {
84 Some(Ok(frame)) => {
85 if let Ok(mut data) = frame.into_data() {
86 // Search in: [overlap from previous] + [current chunk]
87 let search_start = buffer.len().saturating_sub(overlap_size);
88 buffer.extend_from_slice(&data.copy_to_bytes(data.remaining()));
89
90 // Search in the new region (overlap + new data)
91 if buffer[search_start..]
92 .windows(pattern.len())
93 .any(|w| w == pattern)
94 {
95 // Found! Return complete body with all buffered data
96 return (true, BufferedBody::Complete(Some(buffer.freeze())));
97 }
98 }
99 }
100 Some(Err(error)) => {
101 // Error occurred - save buffered data + error in Partial body
102 let buffered = if buffer.is_empty() {
103 None
104 } else {
105 Some(buffer.freeze())
106 };
107
108 // Check if pattern was in buffered data before error
109 let found = buffered
110 .as_ref()
111 .map(|b| b.windows(pattern.len()).any(|w| w == pattern))
112 .unwrap_or(false);
113
114 let result_body = BufferedBody::Partial(PartialBufferedBody::new(
115 buffered,
116 Remaining::Error(Some(error)),
117 ));
118 return (found, result_body);
119 }
120 None => {
121 // End of stream
122 let combined = if buffer.is_empty() {
123 None
124 } else {
125 Some(buffer.freeze())
126 };
127
128 let found = combined
129 .as_ref()
130 .map(|b| b.windows(pattern.len()).any(|w| w == pattern))
131 .unwrap_or(false);
132
133 return (found, BufferedBody::Complete(combined));
134 }
135 }
136 }
137}
138
139/// Operations for matching raw body bytes.
140///
141/// Each operation checks the body content against a pattern and returns
142/// [`Cacheable`](PredicateResult::Cacheable) on match or
143/// [`NonCacheable`](PredicateResult::NonCacheable) otherwise.
144///
145/// # Performance
146///
147/// - [`Starts`](Self::Starts): Reads only the prefix bytes needed for comparison
148/// - [`Contains`](Self::Contains): Uses streaming search, stops early on match
149/// - [`Eq`](Self::Eq), [`Ends`](Self::Ends), [`RegExp`](Self::RegExp): Collect the entire body
150///
151/// # Caveats
152///
153/// All operations consume the body stream. The body is returned as
154/// [`BufferedBody::Complete`] or [`BufferedBody::Partial`] after checking.
155#[derive(Debug)]
156pub enum PlainOperation {
157 /// Use when the entire body must match exactly.
158 ///
159 /// Best for known static responses or signatures.
160 Eq(Bytes),
161 /// Use when a marker or pattern appears anywhere in the body.
162 ///
163 /// Best for success markers, error messages, or content indicators.
164 /// Uses streaming search for efficiency.
165 Contains(Bytes),
166 /// Use when the body should begin with a specific prefix.
167 ///
168 /// Best for magic numbers, file signatures, or protocol headers.
169 /// Only reads the prefix bytes needed.
170 Starts(Bytes),
171 /// Use when the body should end with a specific suffix.
172 ///
173 /// Best for file extensions embedded in content or trailing markers.
174 /// Requires reading the entire body.
175 Ends(Bytes),
176 /// Use when matching complex patterns in body content.
177 ///
178 /// Best for structured text, log formats, or flexible content matching.
179 /// Requires reading the entire body.
180 RegExp(regex::bytes::Regex),
181}
182
183impl PlainOperation {
184 /// Checks if this operation matches the body.
185 ///
186 /// Returns [`Cacheable`](PredicateResult::Cacheable) when the body matches,
187 /// [`NonCacheable`](PredicateResult::NonCacheable) otherwise.
188 ///
189 /// The returned body is always in a buffered state suitable for further
190 /// processing or caching.
191 pub async fn check<B>(&self, body: BufferedBody<B>) -> PredicateResult<BufferedBody<B>>
192 where
193 B: HttpBody + Unpin,
194 B::Data: Send,
195 {
196 match self {
197 PlainOperation::Starts(prefix) => {
198 // Empty prefix always matches
199 if prefix.is_empty() {
200 return PredicateResult::Cacheable(body);
201 }
202
203 // Use collect_exact to read exactly prefix.len() bytes
204 use crate::CollectExactResult;
205
206 let result = body.collect_exact(prefix.len()).await;
207
208 // Check if body starts with prefix
209 let matches = match &result {
210 CollectExactResult::AtLeast { buffered, .. } => buffered.starts_with(prefix),
211 CollectExactResult::Incomplete { .. } => false, // Not enough bytes
212 };
213
214 // Reconstruct body
215 let result_body = result.into_buffered_body();
216
217 if matches {
218 PredicateResult::Cacheable(result_body)
219 } else {
220 PredicateResult::NonCacheable(result_body)
221 }
222 }
223
224 PlainOperation::Eq(expected) => body
225 .collect()
226 .await
227 .map(|body_bytes| {
228 let matches = body_bytes.as_ref() == expected.as_ref();
229 let result_body = BufferedBody::Complete(Some(body_bytes));
230 if matches {
231 PredicateResult::Cacheable(result_body)
232 } else {
233 PredicateResult::NonCacheable(result_body)
234 }
235 })
236 .unwrap_or_else(PredicateResult::NonCacheable),
237
238 PlainOperation::Contains(sequence) => {
239 let (found, result_body) = streaming_search(body, sequence.as_ref()).await;
240 if found {
241 PredicateResult::Cacheable(result_body)
242 } else {
243 PredicateResult::NonCacheable(result_body)
244 }
245 }
246
247 PlainOperation::Ends(suffix) => body
248 .collect()
249 .await
250 .map(|body_bytes| {
251 let matches = body_bytes.ends_with(suffix);
252 let result_body = BufferedBody::Complete(Some(body_bytes));
253 if matches {
254 PredicateResult::Cacheable(result_body)
255 } else {
256 PredicateResult::NonCacheable(result_body)
257 }
258 })
259 .unwrap_or_else(PredicateResult::NonCacheable),
260
261 PlainOperation::RegExp(regex) => body
262 .collect()
263 .await
264 .map(|body_bytes| {
265 let matches = regex.is_match(body_bytes.as_ref());
266 let result_body = BufferedBody::Complete(Some(body_bytes));
267 if matches {
268 PredicateResult::Cacheable(result_body)
269 } else {
270 PredicateResult::NonCacheable(result_body)
271 }
272 })
273 .unwrap_or_else(PredicateResult::NonCacheable),
274 }
275 }
276}