hyperscan/chimera/
runtime.rs

1use std::fmt;
2use std::mem::{self, MaybeUninit};
3use std::ops::Range;
4use std::ptr;
5use std::slice;
6
7use derive_more::{Deref, From, Into};
8use foreign_types::{foreign_type, ForeignType, ForeignTypeRef};
9
10use crate::{
11    chimera::{error::AsResult, ffi, DatabaseRef},
12    Result,
13};
14
15foreign_type! {
16    /// A large enough region of scratch space to support a given database.
17    pub unsafe type Scratch: Send {
18        type CType = ffi::ch_scratch_t;
19
20        fn drop = free_scratch;
21        fn clone = clone_scratch;
22    }
23}
24
25/// Free a scratch block previously allocated by `ch_alloc_scratch()` or `ch_clone_scratch()`.
26unsafe fn free_scratch(s: *mut ffi::ch_scratch_t) {
27    ffi::ch_free_scratch(s).expect("free scratch");
28}
29
30/// Allocate a scratch space that is a clone of an existing scratch space.
31unsafe fn clone_scratch(s: *mut ffi::ch_scratch_t) -> *mut ffi::ch_scratch_t {
32    let mut p = MaybeUninit::uninit();
33    ffi::ch_clone_scratch(s, p.as_mut_ptr()).expect("clone scratch");
34    p.assume_init()
35}
36
37impl ScratchRef {
38    /// Provides the size of the given scratch space.
39    pub fn size(&self) -> Result<usize> {
40        let mut size = MaybeUninit::uninit();
41
42        unsafe { ffi::ch_scratch_size(self.as_ptr(), size.as_mut_ptr()).map(|_| size.assume_init()) }
43    }
44}
45
46impl DatabaseRef {
47    /// Allocate a `scratch` space for use by Chimera.
48    ///
49    /// This is required for runtime use, and one scratch space per thread,
50    /// or concurrent caller, is required.
51    pub fn alloc_scratch(&self) -> Result<Scratch> {
52        let mut s = MaybeUninit::zeroed();
53
54        unsafe { ffi::ch_alloc_scratch(self.as_ptr(), s.as_mut_ptr()).map(|_| Scratch::from_ptr(s.assume_init())) }
55    }
56
57    /// Reallocate a `scratch` space for use by Chimera.
58    pub fn realloc_scratch(&self, s: &mut Scratch) -> Result<&ScratchRef> {
59        let mut p = s.as_ptr();
60
61        unsafe {
62            ffi::ch_alloc_scratch(self.as_ptr(), &mut p).map(|_| {
63                s.0 = ptr::NonNull::new_unchecked(p);
64
65                ScratchRef::from_ptr(p)
66            })
67        }
68    }
69}
70
71/// Callback return value used to tell the Chimera matcher what to do after processing this match.
72#[repr(u32)]
73#[derive(Clone, Copy, Debug, PartialEq, Eq)]
74pub enum Matching {
75    /// Continue matching.
76    Continue = ffi::CH_CALLBACK_CONTINUE,
77    /// Terminate matching.
78    Terminate = ffi::CH_CALLBACK_TERMINATE,
79    /// Skip remaining matches for this ID and continue.
80    Skip = ffi::CH_CALLBACK_SKIP_PATTERN,
81}
82
83impl Default for Matching {
84    fn default() -> Self {
85        Matching::Continue
86    }
87}
88
89/// The type of error event that occurred.
90#[repr(u32)]
91#[derive(Clone, Copy, Debug, From, PartialEq, Eq)]
92pub enum Error {
93    /// PCRE hits its match limit.
94    MatchLimit = ffi::CH_ERROR_MATCHLIMIT,
95    /// PCRE hits its recursion limit.
96    RecursionLimit = ffi::CH_ERROR_RECURSIONLIMIT,
97}
98
99/// Structure representing a captured subexpression within a match.
100#[repr(transparent)]
101#[derive(Clone, Copy, From, Into, Deref, PartialEq, Eq)]
102pub struct Capture(ffi::ch_capture);
103
104impl fmt::Debug for Capture {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        f.debug_struct("Capture")
107            .field("is_active", &self.is_active())
108            .field("from", &self.from)
109            .field("to", &self.to)
110            .finish()
111    }
112}
113
114impl From<Capture> for Range<usize> {
115    fn from(capture: Capture) -> Self {
116        capture.range()
117    }
118}
119
120impl Capture {
121    /// Indicating that a particular capture group is active
122    pub fn is_active(&self) -> bool {
123        self.flags == ffi::CH_CAPTURE_FLAG_ACTIVE
124    }
125
126    /// Returns the range of capture group
127    pub fn range(&self) -> Range<usize> {
128        self.from as usize..self.to as usize
129    }
130}
131
132/// Definition of the match event callback function type.
133///
134/// A callback function matching the defined type must be provided by the
135/// application calling the `DatabaseRef::scan`
136///
137/// This callback function will be invoked whenever a match is located in the
138/// target data during the execution of a scan. The details of the match are
139/// passed in as parameters to the callback function, and the callback function
140/// should return a value indicating whether or not matching should continue on
141/// the target data. If no callbacks are desired from a scan call, NULL may be
142/// provided in order to suppress match production.
143pub trait MatchEventHandler<'a> {
144    /// Split the match event handler to callback and userdata.
145    ///
146    /// # Safety
147    ///
148    /// The returned function can only be called with the returned pointer, or a pointer to another C closure.
149    unsafe fn split(&mut self) -> (ffi::ch_match_event_handler, *mut libc::c_void);
150}
151
152impl MatchEventHandler<'_> for () {
153    unsafe fn split(&mut self) -> (ffi::ch_match_event_handler, *mut libc::c_void) {
154        (None, ptr::null_mut())
155    }
156}
157
158impl MatchEventHandler<'_> for Matching {
159    unsafe fn split(&mut self) -> (ffi::ch_match_event_handler, *mut libc::c_void) {
160        unsafe extern "C" fn trampoline(
161            _id: u32,
162            _from: u64,
163            _to: u64,
164            _flags: u32,
165            _size: u32,
166            _captured: *const ffi::ch_capture_t,
167            ctx: *mut ::libc::c_void,
168        ) -> ::libc::c_int {
169            *(*(ctx as *mut (&mut Matching, *mut ()))).0 as _
170        }
171
172        (Some(trampoline), self as *mut _ as *mut _)
173    }
174}
175
176impl<'a, F> MatchEventHandler<'a> for F
177where
178    F: FnMut(u32, u64, u64, u32, Option<&'a [Capture]>) -> Matching,
179{
180    unsafe fn split(&mut self) -> (ffi::ch_match_event_handler, *mut libc::c_void) {
181        (Some(on_match_trampoline::<'a, F>), self as *mut _ as *mut _)
182    }
183}
184
185unsafe extern "C" fn on_match_trampoline<'a, F>(
186    id: u32,
187    from: u64,
188    to: u64,
189    flags: u32,
190    size: u32,
191    captured: *const ffi::ch_capture_t,
192    ctx: *mut ::libc::c_void,
193) -> ffi::ch_callback_t
194where
195    F: FnMut(u32, u64, u64, u32, Option<&'a [Capture]>) -> Matching,
196{
197    let &mut (ref mut callback, _) = &mut *(ctx as *mut (&mut F, *mut ()));
198
199    callback(
200        id,
201        from,
202        to,
203        flags,
204        if captured.is_null() || size == 0 {
205            None
206        } else {
207            Some(slice::from_raw_parts(captured as *const _, size as usize))
208        },
209    ) as i32
210}
211
212/// Definition of the Chimera error event callback function type.
213///
214/// A callback function matching the defined type may be provided by the
215/// application calling the @ref ch_scan function. This callback function
216/// will be invoked when an error event occurs during matching; this indicates
217/// that some matches for a given expression may not be reported.
218pub trait ErrorEventHandler {
219    /// Split the match event handler to callback and userdata.
220    ///
221    /// # Safety
222    ///
223    /// The returned function can only be called with the returned pointer, or a pointer to another C closure.
224    unsafe fn split(&mut self) -> (ffi::ch_error_event_handler, *mut libc::c_void);
225}
226
227impl ErrorEventHandler for () {
228    unsafe fn split(&mut self) -> (ffi::ch_error_event_handler, *mut libc::c_void) {
229        (None, ptr::null_mut())
230    }
231}
232impl ErrorEventHandler for Matching {
233    unsafe fn split(&mut self) -> (ffi::ch_error_event_handler, *mut libc::c_void) {
234        unsafe extern "C" fn trampoline(
235            _error_type: ffi::ch_error_event_t,
236            _id: u32,
237            _info: *mut ::libc::c_void,
238            ctx: *mut ::libc::c_void,
239        ) -> ffi::ch_callback_t {
240            *(*(ctx as *mut (*mut (), &mut Matching))).1 as _
241        }
242
243        (Some(trampoline), self as *mut _ as *mut _)
244    }
245}
246
247impl<F> ErrorEventHandler for F
248where
249    F: FnMut(Error, u32) -> Matching,
250{
251    unsafe fn split(&mut self) -> (ffi::ch_error_event_handler, *mut libc::c_void) {
252        (Some(on_error_trampoline::<F>), self as *mut _ as *mut _)
253    }
254}
255
256unsafe extern "C" fn on_error_trampoline<F>(
257    error_type: ffi::ch_error_event_t,
258    id: u32,
259    _info: *mut ::libc::c_void,
260    ctx: *mut ::libc::c_void,
261) -> ffi::ch_callback_t
262where
263    F: FnMut(Error, u32) -> Matching,
264{
265    let &mut (_, ref mut callback) = &mut *(ctx as *mut (*mut (), &mut F));
266
267    callback(mem::transmute(error_type), id) as i32
268}
269
270impl DatabaseRef {
271    /// The block regular expression scanner.
272    ///
273    /// ## Handling Matches
274    ///
275    /// `scan` will call a user-supplied callback when a match is found.
276    ///
277    /// This closure has the following signature:
278    ///
279    /// ```rust,no_run
280    /// # use hyperscan::chimera::{Capture, Matching};
281    /// fn on_match_event(id: u32, from: u64, to: u64, flags: u32, captured: Option<&[Capture]>) -> Matching {
282    ///     Matching::Continue
283    /// }
284    /// ```
285    ///
286    /// ### Parameters
287    ///
288    /// - `id`: The ID number of the expression that matched.
289    /// - `from`: The offset of the first byte that matches the expression.
290    /// - `to`: The offset after the last byte that matches the expression.
291    /// - `flags`: This is provided for future use and is unused at present.
292    /// - `captured`: An array of `Capture` structures that contain the start and end offsets of entire pattern match and each captured subexpression.
293    ///
294    /// ### Return
295    ///
296    /// The callback can return `Matching::Terminate` to stop matching.
297    /// Otherwise, a return value of `Matching::Continue` will continue,
298    /// with the current pattern if configured to produce multiple matches per pattern,
299    /// while a return value of `Matching::Skip` will cease matching this pattern but continue matching the next pattern.
300    ///
301    /// ## Handling Runtime Errors
302    ///
303    /// `scan` will call a user-supplied callback when a runtime error occurs in libpcre.
304    ///
305    /// This closure has the following signature:
306    ///
307    /// ```rust,no_run
308    /// # use hyperscan::chimera::{Error, Matching};
309    /// fn on_error_event(error_type: Error, id: u32) -> Matching {
310    ///     Matching::Continue
311    /// }
312    /// ```
313    ///
314    /// The `id` argument will be set to the identifier for the matching expression provided at compile time.
315    ///
316    /// The match callback has the capability to either halt scanning or continue scanning for the next pattern.
317    ///
318    /// ### Return
319    ///
320    /// The callback can return `Matching::Skip` to cease matching this pattern but continue matching the next pattern.
321    /// Otherwise, we stop matching for all patterns with `Matching::Terminate`.
322    pub fn scan<'a, T, F, E>(
323        &self,
324        data: T,
325        scratch: &'a ScratchRef,
326        mut on_match_event: F,
327        mut on_error_event: E,
328    ) -> Result<()>
329    where
330        T: AsRef<[u8]>,
331        F: MatchEventHandler<'a>,
332        E: ErrorEventHandler,
333    {
334        let data = data.as_ref();
335        unsafe {
336            let (on_match_callback, on_match_data) = on_match_event.split();
337            let (on_error_callback, on_error_data) = on_error_event.split();
338
339            let mut userdata = (on_match_data, on_error_data);
340
341            ffi::ch_scan(
342                self.as_ptr(),
343                data.as_ptr() as *const _,
344                data.len() as _,
345                0,
346                scratch.as_ptr(),
347                on_match_callback,
348                on_error_callback,
349                &mut userdata as *mut _ as *mut _,
350            )
351            .ok()
352        }
353    }
354}
355
356#[cfg(test)]
357pub mod tests {
358    use std::ptr;
359
360    use foreign_types::ForeignType;
361
362    use crate::chimera::prelude::*;
363
364    const SCRATCH_SIZE: usize = 2000;
365
366    #[test]
367    fn test_scratch() {
368        let db: Database = "test".parse().unwrap();
369
370        let s = db.alloc_scratch().unwrap();
371
372        assert!(s.size().unwrap() > SCRATCH_SIZE);
373
374        let mut s2 = s.clone();
375
376        assert!(!ptr::eq(s.as_ptr(), s2.as_ptr()));
377
378        assert!(s2.size().unwrap() > SCRATCH_SIZE);
379
380        let db2: Database = "foobar".parse().unwrap();
381
382        db2.realloc_scratch(&mut s2).unwrap();
383
384        assert!(!ptr::eq(s.as_ptr(), s2.as_ptr()));
385        assert!(s2.size().unwrap() >= s.size().unwrap());
386    }
387}