Skip to main content

hitbox_http/predicates/body/
plain.rs

1//! Plain-text body matching operations.
2//!
3//! Provides [`PlainOperation`] for matching raw body bytes using equality,
4//! prefix/suffix checks, substring search, and regular expressions.
5
6use bytes::{Buf, Bytes, BytesMut};
7use hitbox::predicate::PredicateResult;
8use http_body_util::BodyExt;
9use hyper::body::Body as HttpBody;
10
11use crate::{BufferedBody, PartialBufferedBody, Remaining};
12
13/// Searches for a pattern in a body stream without collecting the entire body.
14///
15/// Optimizes the search by stopping early when the pattern is found and handling
16/// patterns that span chunk boundaries.
17async fn streaming_search<B>(body: BufferedBody<B>, pattern: &[u8]) -> (bool, BufferedBody<B>)
18where
19    B: HttpBody + Unpin,
20    B::Data: Send,
21{
22    if pattern.is_empty() {
23        return (true, body);
24    }
25
26    match body {
27        // Already complete - just search
28        BufferedBody::Complete(Some(bytes)) => {
29            let found = bytes.windows(pattern.len()).any(|w| w == pattern);
30            (found, BufferedBody::Complete(Some(bytes)))
31        }
32        BufferedBody::Complete(None) => (false, BufferedBody::Complete(None)),
33
34        // Partial - extract parts and search through prefix + remaining
35        BufferedBody::Partial(partial) => {
36            let (prefix, remaining) = partial.into_parts();
37            match remaining {
38                Remaining::Body(body) => streaming_search_body(prefix, body, pattern).await,
39                Remaining::Error(error) => {
40                    // Already have an error, just search in the prefix we have
41                    let found = prefix
42                        .as_ref()
43                        .map(|b| b.windows(pattern.len()).any(|w| w == pattern))
44                        .unwrap_or(false);
45                    (
46                        found,
47                        BufferedBody::Partial(PartialBufferedBody::new(
48                            prefix,
49                            Remaining::Error(error),
50                        )),
51                    )
52                }
53            }
54        }
55
56        // Passthrough - stream through it
57        BufferedBody::Passthrough(stream) => streaming_search_body(None, stream, pattern).await,
58    }
59}
60
61/// Helper function that performs streaming search on any Body implementation.
62/// Optionally starts with an initial prefix buffer.
63async fn streaming_search_body<B>(
64    initial_prefix: Option<Bytes>,
65    mut body: B,
66    pattern: &[u8],
67) -> (bool, BufferedBody<B>)
68where
69    B: HttpBody + Unpin,
70    B::Data: Send,
71{
72    let mut buffer = BytesMut::new();
73
74    // Initialize with prefix if provided
75    if let Some(prefix_bytes) = initial_prefix {
76        buffer.extend_from_slice(&prefix_bytes);
77    }
78
79    // Keep last (pattern.len() - 1) bytes to handle pattern spanning chunks
80    let overlap_size = pattern.len().saturating_sub(1);
81
82    loop {
83        match body.frame().await {
84            Some(Ok(frame)) => {
85                if let Ok(mut data) = frame.into_data() {
86                    // Search in: [overlap from previous] + [current chunk]
87                    let search_start = buffer.len().saturating_sub(overlap_size);
88                    buffer.extend_from_slice(&data.copy_to_bytes(data.remaining()));
89
90                    // Search in the new region (overlap + new data)
91                    if buffer[search_start..]
92                        .windows(pattern.len())
93                        .any(|w| w == pattern)
94                    {
95                        // Found! Return complete body with all buffered data
96                        return (true, BufferedBody::Complete(Some(buffer.freeze())));
97                    }
98                }
99            }
100            Some(Err(error)) => {
101                // Error occurred - save buffered data + error in Partial body
102                let buffered = if buffer.is_empty() {
103                    None
104                } else {
105                    Some(buffer.freeze())
106                };
107
108                // Check if pattern was in buffered data before error
109                let found = buffered
110                    .as_ref()
111                    .map(|b| b.windows(pattern.len()).any(|w| w == pattern))
112                    .unwrap_or(false);
113
114                let result_body = BufferedBody::Partial(PartialBufferedBody::new(
115                    buffered,
116                    Remaining::Error(Some(error)),
117                ));
118                return (found, result_body);
119            }
120            None => {
121                // End of stream
122                let combined = if buffer.is_empty() {
123                    None
124                } else {
125                    Some(buffer.freeze())
126                };
127
128                let found = combined
129                    .as_ref()
130                    .map(|b| b.windows(pattern.len()).any(|w| w == pattern))
131                    .unwrap_or(false);
132
133                return (found, BufferedBody::Complete(combined));
134            }
135        }
136    }
137}
138
139/// Operations for matching raw body bytes.
140///
141/// Each operation checks the body content against a pattern and returns
142/// [`Cacheable`](PredicateResult::Cacheable) on match or
143/// [`NonCacheable`](PredicateResult::NonCacheable) otherwise.
144///
145/// # Performance
146///
147/// - [`Starts`](Self::Starts): Reads only the prefix bytes needed for comparison
148/// - [`Contains`](Self::Contains): Uses streaming search, stops early on match
149/// - [`Eq`](Self::Eq), [`Ends`](Self::Ends), [`RegExp`](Self::RegExp): Collect the entire body
150///
151/// # Caveats
152///
153/// All operations consume the body stream. The body is returned as
154/// [`BufferedBody::Complete`] or [`BufferedBody::Partial`] after checking.
155#[derive(Debug)]
156pub enum PlainOperation {
157    /// Use when the entire body must match exactly.
158    ///
159    /// Best for known static responses or signatures.
160    Eq(Bytes),
161    /// Use when a marker or pattern appears anywhere in the body.
162    ///
163    /// Best for success markers, error messages, or content indicators.
164    /// Uses streaming search for efficiency.
165    Contains(Bytes),
166    /// Use when the body should begin with a specific prefix.
167    ///
168    /// Best for magic numbers, file signatures, or protocol headers.
169    /// Only reads the prefix bytes needed.
170    Starts(Bytes),
171    /// Use when the body should end with a specific suffix.
172    ///
173    /// Best for file extensions embedded in content or trailing markers.
174    /// Requires reading the entire body.
175    Ends(Bytes),
176    /// Use when matching complex patterns in body content.
177    ///
178    /// Best for structured text, log formats, or flexible content matching.
179    /// Requires reading the entire body.
180    RegExp(regex::bytes::Regex),
181}
182
183impl PlainOperation {
184    /// Checks if this operation matches the body.
185    ///
186    /// Returns [`Cacheable`](PredicateResult::Cacheable) when the body matches,
187    /// [`NonCacheable`](PredicateResult::NonCacheable) otherwise.
188    ///
189    /// The returned body is always in a buffered state suitable for further
190    /// processing or caching.
191    pub async fn check<B>(&self, body: BufferedBody<B>) -> PredicateResult<BufferedBody<B>>
192    where
193        B: HttpBody + Unpin,
194        B::Data: Send,
195    {
196        match self {
197            PlainOperation::Starts(prefix) => {
198                // Empty prefix always matches
199                if prefix.is_empty() {
200                    return PredicateResult::Cacheable(body);
201                }
202
203                // Use collect_exact to read exactly prefix.len() bytes
204                use crate::CollectExactResult;
205
206                let result = body.collect_exact(prefix.len()).await;
207
208                // Check if body starts with prefix
209                let matches = match &result {
210                    CollectExactResult::AtLeast { buffered, .. } => buffered.starts_with(prefix),
211                    CollectExactResult::Incomplete { .. } => false, // Not enough bytes
212                };
213
214                // Reconstruct body
215                let result_body = result.into_buffered_body();
216
217                if matches {
218                    PredicateResult::Cacheable(result_body)
219                } else {
220                    PredicateResult::NonCacheable(result_body)
221                }
222            }
223
224            PlainOperation::Eq(expected) => body
225                .collect()
226                .await
227                .map(|body_bytes| {
228                    let matches = body_bytes.as_ref() == expected.as_ref();
229                    let result_body = BufferedBody::Complete(Some(body_bytes));
230                    if matches {
231                        PredicateResult::Cacheable(result_body)
232                    } else {
233                        PredicateResult::NonCacheable(result_body)
234                    }
235                })
236                .unwrap_or_else(PredicateResult::NonCacheable),
237
238            PlainOperation::Contains(sequence) => {
239                let (found, result_body) = streaming_search(body, sequence.as_ref()).await;
240                if found {
241                    PredicateResult::Cacheable(result_body)
242                } else {
243                    PredicateResult::NonCacheable(result_body)
244                }
245            }
246
247            PlainOperation::Ends(suffix) => body
248                .collect()
249                .await
250                .map(|body_bytes| {
251                    let matches = body_bytes.ends_with(suffix);
252                    let result_body = BufferedBody::Complete(Some(body_bytes));
253                    if matches {
254                        PredicateResult::Cacheable(result_body)
255                    } else {
256                        PredicateResult::NonCacheable(result_body)
257                    }
258                })
259                .unwrap_or_else(PredicateResult::NonCacheable),
260
261            PlainOperation::RegExp(regex) => body
262                .collect()
263                .await
264                .map(|body_bytes| {
265                    let matches = regex.is_match(body_bytes.as_ref());
266                    let result_body = BufferedBody::Complete(Some(body_bytes));
267                    if matches {
268                        PredicateResult::Cacheable(result_body)
269                    } else {
270                        PredicateResult::NonCacheable(result_body)
271                    }
272                })
273                .unwrap_or_else(PredicateResult::NonCacheable),
274        }
275    }
276}