hyperscan_tokio/lib.rs
1//! # hyperscan-tokio
2//!
3//! The most complete high-performance async regular expression matching library for Rust,
4//! featuring both Hyperscan/VectorScan and Chimera (PCRE with capture groups) support.
5//!
6//! ## Core Features
7//!
8//! - **Dual Engine Support**: Both Hyperscan and Chimera in one library
9//! - **PCRE Pattern Support**: Full PCRE syntax with capture groups via Chimera
10//! - **Async-first design**: Built for Tokio with async/await support
11//! - **High performance**: Leverages Hyperscan/VectorScan's SIMD acceleration
12//! - **Multiple scanning modes**: Block, streaming, and vectored scanning
13//! - **Thread-safe**: Databases can be shared across threads
14//! - **Memory efficient**: Scratch space pooling and zero-copy operations
15//!
16//! ## Why hyperscan-tokio?
17//!
18//! This is the only Rust library that provides both:
19//! 1. **Hyperscan**: For blazing-fast multi-pattern matching
20//! 2. **Chimera**: For PCRE-compatible patterns with capture group support
21//!
22//! Whether you need simple pattern matching or complex PCRE expressions with
23//! capture groups, this library has you covered.
24//!
25//! ## Optional Features
26//!
27//! - `chimera`: Enable PCRE-compatible patterns with capture groups
28//! - `jemalloc`/`mimalloc`: Alternative memory allocators
29//! - `arrow`: Apache Arrow integration for batch processing
30//! - `simd-accel`: SIMD acceleration features
31//!
32//! ## Quick Start
33//!
34//! ### Hyperscan - Fast Multi-Pattern Matching
35//!
36//! ```rust,no_run
37//! use hyperscan_tokio::prelude::*;
38//!
39//! # async fn example() -> Result<()> {
40//! // Build a database with multiple patterns
41//! let db = DatabaseBuilder::new()
42//! .add(Pattern::new(r"\d+").id(1).build()?)
43//! .add(Pattern::new(r"[a-z]+").id(2).build()?)
44//! .build()?;
45//!
46//! // Create a scanner
47//! let scanner = Scanner::new(db)?;
48//!
49//! // Scan data - finds all patterns simultaneously
50//! let matches = scanner.scan("abc 123 def 456").await?;
51//! # Ok(())
52//! # }
53//! ```
54//!
55//! ### Chimera - PCRE Patterns with Capture Groups
56//!
57//! To use Chimera, enable the `chimera` feature in your `Cargo.toml`:
58//!
59//! ```toml
60//! [dependencies]
61//! hyperscan-tokio = { version = "0.1", features = ["chimera"] }
62//! ```
63//!
64//! ```rust,no_run
65//! #[cfg(feature = "chimera")]
66//! use hyperscan_tokio::prelude::*;
67//!
68//! #[cfg(feature = "chimera")]
69//! # fn example() -> Result<()> {
70//! // Compile a PCRE pattern with named capture groups
71//! let chimera = Chimera::compile(
72//! r"(?P<user>\w+)@(?P<domain>\w+\.\w+)",
73//! Flags::empty(),
74//! Mode::BLOCK
75//! )?;
76//!
77//! // Scan and extract capture groups
78//! chimera.scan(b"Contact: alice@example.com", |m| {
79//! if let Some(user) = m.group_by_name("user") {
80//! println!("User: {:?}", user.as_str(m.data));
81//! }
82//! if let Some(domain) = m.group_by_name("domain") {
83//! println!("Domain: {:?}", domain.as_str(m.data));
84//! }
85//! MatchControl::Continue
86//! })?;
87//! # Ok(())
88//! # }
89//! # #[cfg(not(feature = "chimera"))]
90//! # fn example() {}
91//! ```
92//!
93//! ## Performance
94//!
95//! This library achieves scanning speeds of 20+ GB/s on modern hardware:
96//!
97//! ```text
98//! scanning_throughput/1048576 time: [45.2 µs 45.8 µs 46.4 µs]
99//! thrpt: [21.5 GiB/s 21.9 GiB/s 22.1 GiB/s]
100//! ```
101//!
102//! ## Working with Patterns
103//!
104//! ### Pattern Building
105//!
106//! ```rust,no_run
107//! use hyperscan_tokio::{Pattern, Flags};
108//!
109//! // Simple pattern
110//! let p1 = Pattern::new(r"\d+").id(1).build()?;
111//!
112//! // Pattern with flags
113//! let p2 = Pattern::new("test")
114//! .id(2)
115//! .flags(Flags::CASELESS | Flags::MULTILINE)
116//! .build()?;
117//!
118//! // Extended pattern with constraints
119//! let p3 = Pattern::new(r"secret")
120//! .id(3)
121//! .min_offset(100) // Must start after byte 100
122//! .max_offset(500) // Must start before byte 500
123//! .min_length(10) // Match must be at least 10 bytes
124//! .build()?;
125//!
126//! // Pattern with edit distance
127//! let p4 = Pattern::new("password")
128//! .id(4)
129//! .edit_distance(2) // Allow up to 2 character edits
130//! .build()?;
131//! # Ok::<(), hyperscan_tokio::Error>(())
132//! ```
133//!
134//! ### Database Compilation
135//!
136//! ```rust,no_run
137//! use hyperscan_tokio::{DatabaseBuilder, Mode};
138//!
139//! // Block mode (for complete data)
140//! let block_db = DatabaseBuilder::new()
141//! .add_pattern(pattern1)
142//! .add_pattern(pattern2)
143//! .mode(Mode::BLOCK)
144//! .build()?;
145//!
146//! // Stream mode (for data streams)
147//! let stream_db = DatabaseBuilder::new()
148//! .add_pattern(pattern)
149//! .mode(Mode::STREAM)
150//! .build()?;
151//!
152//! // Vectored mode (for scattered data)
153//! let vectored_db = DatabaseBuilder::new()
154//! .add_pattern(pattern)
155//! .mode(Mode::VECTORED)
156//! .build()?;
157//! # Ok::<(), hyperscan_tokio::Error>(())
158//! ```
159//!
160//! ## Scanning Modes
161//!
162//! ### Block Scanning
163//!
164//! For scanning complete data blocks:
165//!
166//! ```rust,no_run
167//! # use hyperscan_tokio::prelude::*;
168//! # async fn example() -> Result<()> {
169//! let scanner = Scanner::new(database)?;
170//!
171//! // Scan string data
172//! let matches = scanner.scan("text to scan").await?;
173//!
174//! // Scan bytes
175//! let matches = scanner.scan_bytes(b"binary data").await?;
176//!
177//! // Zero-copy with Bytes
178//! let data = bytes::Bytes::from_static(b"zero-copy scan");
179//! let matches = scanner.scan_bytes(data).await?;
180//! # Ok(())
181//! # }
182//! ```
183//!
184//! ### Stream Scanning
185//!
186//! For scanning data streams:
187//!
188//! ```rust,no_run
189//! # use hyperscan_tokio::prelude::*;
190//! # async fn example() -> Result<()> {
191//! let scanner = StreamScanner::new(database)?;
192//!
193//! // Scan a Tokio stream
194//! let stream = tokio::fs::File::open("large_file.txt").await?;
195//! let mut match_stream = scanner.scan_stream(stream).await?;
196//!
197//! while let Some(m) = match_stream.next().await {
198//! let match_result = m?;
199//! println!("Found match: {:?}", match_result);
200//! }
201//! # Ok(())
202//! # }
203//! ```
204//!
205//! ### Vectored Scanning
206//!
207//! For scanning multiple non-contiguous buffers:
208//!
209//! ```rust,no_run
210//! # use hyperscan_tokio::prelude::*;
211//! # async fn example() -> Result<()> {
212//! let scanner = VectoredScanner::new(database)?;
213//!
214//! let buffers = vec![
215//! b"first buffer",
216//! b"second buffer",
217//! b"third buffer",
218//! ];
219//!
220//! let matches = scanner.scan_vectored(&buffers).await?;
221//! # Ok(())
222//! # }
223//! ```
224//!
225//! ## Advanced Features
226//!
227//! ### Worker Pool for Parallel Processing
228//!
229//! ```rust,no_run
230//! # use hyperscan_tokio::prelude::*;
231//! # async fn example() -> Result<()> {
232//! let pool = WorkerPoolBuilder::default()
233//! .num_workers(8)
234//! .queue_size(10_000)
235//! .build(database)?;
236//!
237//! // Process many items in parallel
238//! let jobs: Vec<ScanJob> = data_items.into_iter()
239//! .map(|data| ScanJob { id: generate_id(), data })
240//! .collect();
241//!
242//! let results = pool.scan_batch(jobs).await?;
243//! # Ok(())
244//! # }
245//! ```
246//!
247//! ### Hot-Reloadable Patterns
248//!
249//! ```rust,no_run
250//! # use hyperscan_tokio::prelude::*;
251//! # async fn example() -> Result<()> {
252//! let reloadable = ReloadableDatabase::new(initial_database);
253//!
254//! // In another task, reload patterns without stopping
255//! tokio::spawn(async move {
256//! let new_db = load_new_patterns().await?;
257//! reloadable.reload(new_db).await?;
258//! });
259//!
260//! // Scanning continues with new patterns automatically
261//! let scanner = Scanner::new(reloadable.current())?;
262//! # Ok(())
263//! # }
264//! ```
265//!
266//! ### Database Caching
267//!
268//! ```rust,no_run
269//! # use hyperscan_tokio::prelude::*;
270//! # async fn example() -> Result<()> {
271//! let cache = DatabaseCache::builder()
272//! .max_size(100)
273//! .ttl(Duration::from_secs(3600))
274//! .build();
275//!
276//! // Patterns are compiled only once and cached
277//! let db = cache.get_or_compile(patterns, || {
278//! DatabaseBuilder::new()
279//! .patterns(patterns)
280//! .build()
281//! }).await?;
282//! # Ok(())
283//! # }
284//! ```
285//!
286//! ## Error Handling
287//!
288//! All operations return `Result<T, Error>` with detailed error information:
289//!
290//! ```rust,no_run
291//! # use hyperscan_tokio::prelude::*;
292//! match Pattern::new("[invalid").build() {
293//! Ok(pattern) => { /* use pattern */ },
294//! Err(Error::Compile { message, pattern_id, position }) => {
295//! println!("Compile error in pattern {}: {} at position {:?}",
296//! pattern_id.unwrap_or(0), message, position);
297//! },
298//! Err(e) => println!("Other error: {}", e),
299//! }
300//! ```
301//!
302//! ## Thread Safety
303//!
304//! - `Database`: `Send + Sync` - can be shared across threads
305//! - `Scanner`: `Send + Clone` - can be cloned for each thread
306//! - `Scratch`: Thread-local - each thread needs its own
307//!
308//! ## Memory Management
309//!
310//! The library supports custom allocators for optimal performance:
311//!
312//! ```toml
313//! # Use jemalloc (default)
314//! hyperscan-tokio = { version = "0.1", features = ["jemalloc"] }
315//!
316//! # Or use mimalloc
317//! hyperscan-tokio = { version = "0.1", features = ["mimalloc"] }
318//! ```
319
320#![warn(missing_docs)]
321#![warn(rustdoc::broken_intra_doc_links)]
322#![cfg_attr(docsrs, feature(doc_cfg))]
323
324pub mod allocator;
325pub mod builder;
326pub mod cache;
327
328/// Chimera - PCRE-compatible pattern matching with capture groups
329#[cfg(feature = "chimera")]
330#[cfg_attr(docsrs, doc(cfg(feature = "chimera")))]
331pub mod chimera;
332
333#[cfg(feature = "chimera")]
334mod chimera_analyzer;
335#[cfg(feature = "chimera")]
336mod chimera_builder;
337#[cfg(feature = "chimera")]
338mod chimera_scanner;
339#[cfg(feature = "chimera")]
340mod chimera_scratch;
341
342pub mod database;
343pub mod error;
344pub mod expression;
345pub mod features;
346pub mod literal;
347pub mod pattern;
348pub mod scanner;
349pub mod scratch_pool;
350pub mod stream;
351pub mod vectored;
352pub mod worker_pool;
353pub mod zero_copy;
354
355/// Metrics and observability support
356#[cfg(feature = "metrics")]
357#[cfg_attr(docsrs, doc(cfg(feature = "metrics")))]
358pub mod metrics;
359
360// Public exports
361pub use builder::{DatabaseBuilder, Pattern, PatternBuilder};
362pub use cache::{DatabaseCache, DatabaseCacheBuilder, CacheKey, CacheStats};
363#[cfg(feature = "chimera")]
364pub use chimera::{Chimera, Builder as ChimeraBuilder, Match as ChimeraMatch, ErrorEventType, MatchControl};
365pub use pattern::{CaptureGroup, PatternInfo};
366pub use database::{Database, DatabaseInfo, ExpressionInfo, ReloadableDatabase};
367pub use error::{Error, Result};
368pub use expression::{ExpressionContext, ExpressionContextBuilder};
369pub use literal::{Literal, LiteralBuilder, LiteralFlags, LiteralDatabaseBuilder};
370pub use scanner::{Match, Scanner, BlockScanner};
371pub use scratch_pool::{Scratch, ScratchPool};
372pub use stream::{StreamScanner, StreamState, MatchStream};
373pub use vectored::VectoredScanner;
374pub use worker_pool::{WorkerPool, WorkerPoolBuilder, ScanJob, ScanResult};
375pub use zero_copy::ScanInput;
376
377#[cfg(feature = "metrics")]
378pub use metrics::{Metrics, MetricsCollector};
379
380// Re-export commonly used types from sys crate
381pub use hyperscan_tokio_sys::{Flags, Mode, Platform};
382
383// Re-export feature detection
384pub use features::{supported_features, SupportedFeatures, CpuArchitecture};
385
386/// Prelude module for convenient imports
387///
388/// # Example
389///
390/// ```rust
391/// use hyperscan_tokio::prelude::*;
392/// ```
393pub mod prelude {
394 pub use crate::{
395 DatabaseBuilder, Pattern, PatternBuilder,
396 Database, DatabaseInfo, ExpressionInfo, ReloadableDatabase,
397 Scanner, BlockScanner, Match,
398 StreamScanner, VectoredScanner,
399 WorkerPool, WorkerPoolBuilder,
400 Literal, LiteralBuilder, LiteralDatabaseBuilder,
401 Scratch, ScratchPool,
402 CaptureGroup,
403 ExpressionContext, ExpressionContextBuilder,
404 Error, Result,
405 Flags, Mode, Platform,
406 supported_features,
407 };
408
409 #[cfg(feature = "chimera")]
410 pub use crate::{
411 Chimera, ChimeraBuilder, ChimeraMatch,
412 ErrorEventType, MatchControl,
413 };
414
415 #[cfg(feature = "metrics")]
416 pub use crate::{Metrics, MetricsCollector};
417}