vectorscan/lib.rs
1/* Copyright 2022-2023 Danny McClanahan */
2/* SPDX-License-Identifier: BSD-3-Clause */
3
4//! Wrapper for the vectorscan C regex library.
5//!
6//! # Quirks
7//! The [vectorscan] library (originally [hyperscan], from Intel) supports
8//! high-performance pattern matching using a subset of PCRE syntax. It was
9//! originally written for extremely low-latency network traffic monitoring, so
10//! it has some interface quirks that may be unfamiliar:
11//! - **[Vectorscan Callback API]:** Matches are "returned" to the user when
12//! vectorscan executes a user-provided C ABI method call, so overlapping
13//! matches and other interactive feedback with the matching engine are much
14//! easier to support compared to a synchronous method call.
15//! - **Highly Expressive Pattern Set Matching:** [`expression::ExpressionSet`]
16//! supports the full range of searching and matching operations available to
17//! individual [`expression::Expression`] instances. This is rare: most other
18//! regex engines e.g. do not support finding match offsets, but instead only
19//! which expressions in a set matched.
20//! - **[Mutable State and String Searching]:** Vectorscan requires the user to
21//! explicitly provide a "scratch" space with [`state::Scratch`] to each
22//! search method. This state is not very large, but most other regex engines
23//! attempt to present an interface without any mutable state, even if
24//! internally they use constructions like lazy DFAs.
25//!
26//! [vectorscan]: https://github.com/VectorCamp/vectorscan
27//! [hyperscan]: https://github.com/intel/hyperscan
28//! [Vectorscan Callback API]: crate::matchers#vectorscan-callback-api
29//! [Highly Expressive Pattern Set Matching]: crate::expression
30//! [Mutable State and String Searching]: crate::state#mutable-state-and-string-searching
31//!
32//! # Feature Flags
33//! This library uses [`spack-rs`](https://docs.rs/spack-rs) to configure the build of the
34//! vectorscan codebase using [`spack`](https://spack.io), so it can be precise about which native
35//! dependencies it brings in:
36//! - **`"static"` (default):** link against vectorscan statically. Conflicts
37//! with `"dynamic"`.
38//! - **`"dynamic"`:** link against vectorscan dynamically. Conflicts with
39//! `"static"`, `"chimera"`, and `"alloc"`. Because of `spack`'s caching and
40//! RPATH rewriting, the same dynamic library can be shared by every
41//! dependency of this crate.
42//! - **`"compiler"` (default):** whether to bring in the entire `libhs`
43//! library, or just `libhs_runtime`, which is unable to [compile patterns]
44//! but can [deserialize them]. This significantly reduces the size of the
45//! code added to the binary.
46//! - **`"chimera"`:** whether to link against PCRE and add in extra vectorscan
47//! code to provide the chimera PCRE compatible search library. Conflicts with
48//! `"dynamic"` and requires `"compiler"`.
49//!
50//! [compile patterns]: crate::database::Database::compile
51//! [deserialize them]: crate::database::SerializedDb::deserialize_db
52//!
53//! Feature flags are also used to gate certain functionality to minimize
54//! external dependencies when not in use:
55//! - **`"alloc"`:** hook into vectorscan's dynamic memory allocation with
56//! [`crate::alloc`]. Requires `"static"` due to modifying process-global
57//! hooks.
58//! - **`"stream"` (default):** supports stream parsing with [`crate::stream`].
59//! - **`"vectored"` (default):** supports vectored mode parsing with
60//! [`Mode::VECTORED`].
61//! - **`"catch-unwind"` (default):** catches Rust panics in the match callback
62//! before they bubble back up to vectorscan to produce undefined behavior.
63//! - **`"async"`:** provides an `async` interface over vectorscan's quirky
64//! callback API using [`tokio`] as described in [Asynchronous String
65//! Scanning].
66//! - **`"tokio-impls"`:** implements [`tokio::io::AsyncWrite`] for stream
67//! parsers in [`crate::stream::channel::AsyncStreamWriter`].
68//!
69//! [Asynchronous String Scanning]: crate::state::Scratch#asynchronous-string-scanning
70//! [`Mode::VECTORED`]: crate::flags::Mode::VECTORED
71
72/* Warn for missing docs in general, and hard require crate-level docs. */
73#![warn(missing_docs)]
74#![deny(rustdoc::missing_crate_level_docs)]
75/* Make all doctests fail if they produce any warnings. */
76#![doc(test(attr(deny(warnings))))]
77/* Generate docs.rs info for feature switches. */
78#![cfg_attr(docsrs, feature(doc_cfg))]
79
80pub(crate) use vectorscan_sys::hs;
81
82#[cfg(feature = "alloc")]
83#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
84pub mod alloc;
85pub mod database;
86pub mod error;
87#[cfg(feature = "compiler")]
88#[cfg_attr(docsrs, doc(cfg(feature = "compiler")))]
89pub mod expression;
90#[cfg(feature = "compiler")]
91#[cfg_attr(docsrs, doc(cfg(feature = "compiler")))]
92pub mod flags;
93pub mod matchers;
94pub mod sources;
95pub mod state;
96#[cfg(feature = "stream")]
97#[cfg_attr(docsrs, doc(cfg(feature = "stream")))]
98pub mod stream;
99
100unsafe fn free_misc(p: *mut u8) {
101 let p = p as *mut std::os::raw::c_void;
102 cfg_if::cfg_if! {
103 if #[cfg(feature = "alloc")] {
104 alloc::misc_free_func(p);
105 } else {
106 libc::free(p);
107 }
108 }
109}
110
111#[cfg(feature = "chimera")]
112unsafe fn free_misc_chimera(p: *mut u8) {
113 let p = p as *mut std::os::raw::c_void;
114 cfg_if::cfg_if! {
115 if #[cfg(feature = "alloc")] {
116 alloc::chimera::chimera_misc_free_func(p);
117 } else {
118 libc::free(p);
119 }
120 }
121}
122
123/// Utility function to test the current system architecture.
124///
125/// Vectorscan requires the Supplemental Streaming SIMD Extensions 3 instruction
126/// set. This function can be called on any x86 platform to determine if the
127/// system provides the required instruction set.
128///
129/// This function does not test for more advanced features if Vectorscan has
130/// been built for a more specific architecture, for example the AVX2
131/// instruction set.
132///
133/// Returns [`ArchError`](error::VectorscanRuntimeError::ArchError) if system
134/// does not support Vectorscan.
135///
136/// # Dependency on `"compiler"` Feature
137/// This method is not available in the `hs_runtime` library for some reason, so
138/// it currently cannot be provided without enabling the `"compiler"` feature.
139///
140///```
141/// # fn main() -> Result<(), vectorscan::error::VectorscanRuntimeError> {
142/// vectorscan::check_valid_platform()?;
143/// # Ok(())
144/// # }
145/// ```
146#[cfg(feature = "compiler")]
147#[cfg_attr(docsrs, doc(cfg(feature = "compiler")))]
148pub fn check_valid_platform() -> Result<(), error::VectorscanRuntimeError> {
149 error::VectorscanRuntimeError::from_native(unsafe { hs::hs_valid_platform() })
150}
151
152/// Utility function for identifying this release version.
153///
154/// Returns a string containing the version number of this release build and the
155/// date of the build. It is allocated statically, so it does not need to
156/// be freed by the caller.
157///
158///```
159/// let v = vectorscan::vectorscan_version().to_str().unwrap();
160/// assert!(v.starts_with("5.4.11 "));
161/// ```
162pub fn vectorscan_version() -> &'static std::ffi::CStr {
163 unsafe { std::ffi::CStr::from_ptr(hs::hs_version()) }
164}
165
166/// Utility function for identifying this release version.
167///
168/// Returns a string containing the version number of this release build and the
169/// date of the build. It is allocated statically, so it does not need to
170/// be freed by the caller.
171///
172///```
173/// let v = vectorscan::chimera_version().to_str().unwrap();
174/// assert!(v.starts_with("5.4.11 "));
175/// ```
176#[cfg(feature = "chimera")]
177#[cfg_attr(docsrs, doc(cfg(feature = "chimera")))]
178pub fn chimera_version() -> &'static std::ffi::CStr {
179 unsafe { std::ffi::CStr::from_ptr(hs::ch_version()) }
180}
181
182#[cfg(feature = "async")]
183mod async_utils {
184 use futures_core::stream::Stream;
185 use tokio::sync::mpsc;
186
187 use std::{
188 pin::Pin,
189 task::{Context, Poll},
190 };
191
192 /* Reimplementation of tokio_stream::wrappers::UnboundedReceiverStream. */
193 #[derive(Debug)]
194 #[repr(transparent)]
195 pub struct UnboundedReceiverStream<T>(pub mpsc::UnboundedReceiver<T>);
196
197 impl<T> Stream for UnboundedReceiverStream<T> {
198 type Item = T;
199
200 fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
201 self.0.poll_recv(cx)
202 }
203 }
204}