vectorscan/
lib.rs

1/* Copyright 2022-2023 Danny McClanahan */
2/* SPDX-License-Identifier: BSD-3-Clause */
3
4//! Wrapper for the vectorscan C regex library.
5//!
6//! # Quirks
7//! The [vectorscan] library (originally [hyperscan], from Intel) supports
8//! high-performance pattern matching using a subset of PCRE syntax. It was
9//! originally written for extremely low-latency network traffic monitoring, so
10//! it has some interface quirks that may be unfamiliar:
11//! - **[Vectorscan Callback API]:** Matches are "returned" to the user when
12//!   vectorscan executes a user-provided C ABI method call, so overlapping
13//!   matches and other interactive feedback with the matching engine are much
14//!   easier to support compared to a synchronous method call.
15//! - **Highly Expressive Pattern Set Matching:** [`expression::ExpressionSet`]
16//!   supports the full range of searching and matching operations available to
17//!   individual [`expression::Expression`] instances. This is rare: most other
18//!   regex engines e.g. do not support finding match offsets, but instead only
19//!   which expressions in a set matched.
20//! - **[Mutable State and String Searching]:** Vectorscan requires the user to
21//!   explicitly provide a "scratch" space with [`state::Scratch`] to each
22//!   search method. This state is not very large, but most other regex engines
23//!   attempt to present an interface without any mutable state, even if
24//!   internally they use constructions like lazy DFAs.
25//!
26//! [vectorscan]: https://github.com/VectorCamp/vectorscan
27//! [hyperscan]: https://github.com/intel/hyperscan
28//! [Vectorscan Callback API]: crate::matchers#vectorscan-callback-api
29//! [Highly Expressive Pattern Set Matching]: crate::expression
30//! [Mutable State and String Searching]: crate::state#mutable-state-and-string-searching
31//!
32//! # Feature Flags
33//! This library uses [`spack-rs`](https://docs.rs/spack-rs) to configure the build of the
34//! vectorscan codebase using [`spack`](https://spack.io), so it can be precise about which native
35//! dependencies it brings in:
36//! - **`"static"` (default):** link against vectorscan statically. Conflicts
37//!   with `"dynamic"`.
38//! - **`"dynamic"`:** link against vectorscan dynamically. Conflicts with
39//!   `"static"`, `"chimera"`, and `"alloc"`. Because of `spack`'s caching and
40//!   RPATH rewriting, the same dynamic library can be shared by every
41//!   dependency of this crate.
42//! - **`"compiler"` (default):** whether to bring in the entire `libhs`
43//!   library, or just `libhs_runtime`, which is unable to [compile patterns]
44//!   but can [deserialize them]. This significantly reduces the size of the
45//!   code added to the binary.
46//! - **`"chimera"`:** whether to link against PCRE and add in extra vectorscan
47//!   code to provide the chimera PCRE compatible search library. Conflicts with
48//!   `"dynamic"` and requires `"compiler"`.
49//!
50//! [compile patterns]: crate::database::Database::compile
51//! [deserialize them]: crate::database::SerializedDb::deserialize_db
52//!
53//! Feature flags are also used to gate certain functionality to minimize
54//! external dependencies when not in use:
55//! - **`"alloc"`:** hook into vectorscan's dynamic memory allocation with
56//!   [`crate::alloc`]. Requires `"static"` due to modifying process-global
57//!   hooks.
58//! - **`"stream"` (default):** supports stream parsing with [`crate::stream`].
59//! - **`"vectored"` (default):** supports vectored mode parsing with
60//!   [`Mode::VECTORED`].
61//! - **`"catch-unwind"` (default):** catches Rust panics in the match callback
62//!   before they bubble back up to vectorscan to produce undefined behavior.
63//! - **`"async"`:** provides an `async` interface over vectorscan's quirky
64//!   callback API using [`tokio`] as described in [Asynchronous String
65//!   Scanning].
66//! - **`"tokio-impls"`:** implements [`tokio::io::AsyncWrite`] for stream
67//!   parsers in [`crate::stream::channel::AsyncStreamWriter`].
68//!
69//! [Asynchronous String Scanning]: crate::state::Scratch#asynchronous-string-scanning
70//! [`Mode::VECTORED`]: crate::flags::Mode::VECTORED
71
72/* Warn for missing docs in general, and hard require crate-level docs. */
73#![warn(missing_docs)]
74#![deny(rustdoc::missing_crate_level_docs)]
75/* Make all doctests fail if they produce any warnings. */
76#![doc(test(attr(deny(warnings))))]
77/* Generate docs.rs info for feature switches. */
78#![cfg_attr(docsrs, feature(doc_cfg))]
79
80pub(crate) use vectorscan_sys::hs;
81
82#[cfg(feature = "alloc")]
83#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
84pub mod alloc;
85pub mod database;
86pub mod error;
87#[cfg(feature = "compiler")]
88#[cfg_attr(docsrs, doc(cfg(feature = "compiler")))]
89pub mod expression;
90#[cfg(feature = "compiler")]
91#[cfg_attr(docsrs, doc(cfg(feature = "compiler")))]
92pub mod flags;
93pub mod matchers;
94pub mod sources;
95pub mod state;
96#[cfg(feature = "stream")]
97#[cfg_attr(docsrs, doc(cfg(feature = "stream")))]
98pub mod stream;
99
100unsafe fn free_misc(p: *mut u8) {
101  let p = p as *mut std::os::raw::c_void;
102  cfg_if::cfg_if! {
103    if #[cfg(feature = "alloc")] {
104      alloc::misc_free_func(p);
105    } else {
106      libc::free(p);
107    }
108  }
109}
110
111#[cfg(feature = "chimera")]
112unsafe fn free_misc_chimera(p: *mut u8) {
113  let p = p as *mut std::os::raw::c_void;
114  cfg_if::cfg_if! {
115    if #[cfg(feature = "alloc")] {
116      alloc::chimera::chimera_misc_free_func(p);
117    } else {
118      libc::free(p);
119    }
120  }
121}
122
123/// Utility function to test the current system architecture.
124///
125/// Vectorscan requires the Supplemental Streaming SIMD Extensions 3 instruction
126/// set. This function can be called on any x86 platform to determine if the
127/// system provides the required instruction set.
128///
129/// This function does not test for more advanced features if Vectorscan has
130/// been built for a more specific architecture, for example the AVX2
131/// instruction set.
132///
133/// Returns [`ArchError`](error::VectorscanRuntimeError::ArchError) if system
134/// does not support Vectorscan.
135///
136/// # Dependency on `"compiler"` Feature
137/// This method is not available in the `hs_runtime` library for some reason, so
138/// it currently cannot be provided without enabling the `"compiler"` feature.
139///
140///```
141/// # fn main() -> Result<(), vectorscan::error::VectorscanRuntimeError> {
142/// vectorscan::check_valid_platform()?;
143/// # Ok(())
144/// # }
145/// ```
146#[cfg(feature = "compiler")]
147#[cfg_attr(docsrs, doc(cfg(feature = "compiler")))]
148pub fn check_valid_platform() -> Result<(), error::VectorscanRuntimeError> {
149  error::VectorscanRuntimeError::from_native(unsafe { hs::hs_valid_platform() })
150}
151
152/// Utility function for identifying this release version.
153///
154/// Returns a string containing the version number of this release build and the
155/// date of the build. It is allocated statically, so it does not need to
156/// be freed by the caller.
157///
158///```
159/// let v = vectorscan::vectorscan_version().to_str().unwrap();
160/// assert!(v.starts_with("5.4.11 "));
161/// ```
162pub fn vectorscan_version() -> &'static std::ffi::CStr {
163  unsafe { std::ffi::CStr::from_ptr(hs::hs_version()) }
164}
165
166/// Utility function for identifying this release version.
167///
168/// Returns a string containing the version number of this release build and the
169/// date of the build. It is allocated statically, so it does not need to
170/// be freed by the caller.
171///
172///```
173/// let v = vectorscan::chimera_version().to_str().unwrap();
174/// assert!(v.starts_with("5.4.11 "));
175/// ```
176#[cfg(feature = "chimera")]
177#[cfg_attr(docsrs, doc(cfg(feature = "chimera")))]
178pub fn chimera_version() -> &'static std::ffi::CStr {
179  unsafe { std::ffi::CStr::from_ptr(hs::ch_version()) }
180}
181
182#[cfg(feature = "async")]
183mod async_utils {
184  use futures_core::stream::Stream;
185  use tokio::sync::mpsc;
186
187  use std::{
188    pin::Pin,
189    task::{Context, Poll},
190  };
191
192  /* Reimplementation of tokio_stream::wrappers::UnboundedReceiverStream. */
193  #[derive(Debug)]
194  #[repr(transparent)]
195  pub struct UnboundedReceiverStream<T>(pub mpsc::UnboundedReceiver<T>);
196
197  impl<T> Stream for UnboundedReceiverStream<T> {
198    type Item = T;
199
200    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
201      self.0.poll_recv(cx)
202    }
203  }
204}