Skip to main content

keyhog_scanner/
simd.rs

1//! Vectorscan/Hyperscan SIMD regex backend for high-throughput scanning.
2//!
3//! When the `simd` feature is enabled, this replaces the AC+fallback approach
4//! with Hyperscan's simultaneous multi-pattern matching using SIMD instructions.
5//! Gives 3-5x throughput improvement. Accuracy is identical - same patterns, faster engine.
6
7#[cfg(feature = "simd")]
8pub(crate) mod backend {
9    use hyperscan::{
10        Block as BlockMode, BlockDatabase, Builder, Matching, Pattern, PatternFlags, Patterns,
11        Scratch,
12    };
13    use std::path::PathBuf;
14    use std::sync::atomic::{AtomicU64, Ordering};
15
16    /// Target number of patterns per compile shard. The cold compile is a
17    /// single serial C-side NFA/DFA build whose wall-clock scales ~linearly
18    /// with the pattern count, so the shard COUNT is sized to keep each shard
19    /// near this many patterns: `shards = ceil(n / TARGET_PATTERNS_PER_SHARD)`,
20    /// capped at the core count. Sizing by patterns-per-shard (rather than a
21    /// fixed shard count) is what flattens the build's scaling: as the corpus
22    /// grows, the number of shards grows while each shard's serial build stays
23    /// ~constant, so on a many-core box "double the patterns" is absorbed by
24    /// spinning up more parallel shards instead of doubling each shard's work.
25    /// ~80 was chosen empirically: on the ~900-detector corpus (~1.7k compiled
26    /// patterns) it lands the full set at ~21 shards and a half set at ~11, so
27    /// BOTH sit comfortably under a 32-core box's one-wave budget AND carry a
28    /// near-equal per-shard pattern count - which flattens the full/half
29    /// cold-compile ratio to ~1.3x (vs ~1.9-2.0x serial). A smaller target
30    /// (e.g. 40) pushes the full set up against the core count, making its
31    /// per-shard size diverge from the half set's and the ratio worse; a much
32    /// larger target gives up parallelism. Per-shard builds stay ~150-190ms
33    /// (vs ~1600ms for the serial all-patterns build). Each shard is
34    /// disk-cached independently (keyed by the SHA-256 of its own pattern
35    /// list), so the warm path stays a deserialize-only load. Overridable at
36    /// runtime via `KEYHOG_SHARD_TARGET` for hardware-specific tuning.
37    const TARGET_PATTERNS_PER_SHARD: usize = 80;
38
39    /// Hard ceiling on shard count, so a pathologically large detector set on a
40    /// 128-core box cannot spawn an unbounded number of databases (each costs a
41    /// scan-time dispatch). At this cap the per-shard size grows again, but the
42    /// real corpus (~900 patterns) sits at ~23 shards, well under it.
43    const MAX_COMPILE_SHARDS: usize = 64;
44
45    /// Monotonic per-process id source so each `HsScanner` instance gets a
46    /// distinct key for its thread-local scratch cache (below). Multiple
47    /// scanners in one process must not hand each other a scratch allocated
48    /// against a different database.
49    static SCANNER_ID_SEQ: AtomicU64 = AtomicU64::new(0);
50
51    /// One compiled shard: its database plus a Mutex-guarded scratch pool. Each
52    /// `Scratch` is tied to exactly one `BlockDatabase`, so the pools are
53    /// per-shard.
54    struct Shard {
55        db: BlockDatabase,
56        scratch_pool: parking_lot::Mutex<Vec<Scratch>>,
57    }
58
59    /// Compiled Hyperscan databases for all detector patterns, sharded across
60    /// cores at compile time.
61    ///
62    /// Thread-safe: every database is immutable after compilation and the
63    /// scratch pools are Mutex-guarded. The public scan/lookup surface is
64    /// unchanged from the single-database version - `pattern_info`/
65    /// `pattern_count` still index a single global `pattern_map` keyed by the
66    /// HS pattern id, because each shard's patterns carry their ORIGINAL global
67    /// id, so a match from any shard maps back through the same table and the
68    /// scan output is the union of all shards in original-byte space.
69    ///
70    /// # Examples
71    ///
72    /// ```rust,ignore
73    /// use keyhog_scanner::simd::backend::HsScanner;
74    ///
75    /// let _scanner = HsScanner::compile(&[(0, 0, "demo_[A-Z0-9]{8}", false)])?;
76    /// ```
77    pub struct HsScanner {
78        /// Independently-compiled shard databases. Their union over a scan is
79        /// exactly the set of matches a single all-patterns database would
80        /// produce (Hyperscan match ids are the global pattern ids, which are
81        /// disjoint across shards).
82        shards: Vec<Shard>,
83        /// Map from HS pattern ID to (detector_index, pattern_index, has_group).
84        /// Global and shared across shards - unchanged from the single-db build.
85        pattern_map: Vec<(usize, usize, bool)>,
86        /// Distinct id for this scanner instance, used to key the thread-local
87        /// per-shard scratch cache so two scanners never share scratches.
88        scanner_id: u64,
89    }
90
91    // SAFETY: BlockDatabase is immutable after compilation and safe to share.
92    // Scratch pools are Mutex-guarded. Individual Scratch objects are only used
93    // by one thread at a time (taken from pool/thread-local, returned after use).
94    unsafe impl Send for HsScanner {}
95    unsafe impl Sync for HsScanner {}
96
97    impl HsScanner {
98        /// Compile patterns into a Hyperscan database.
99        ///
100        /// # Examples
101        ///
102        /// ```rust,ignore
103        /// use keyhog_scanner::simd::backend::HsScanner;
104        ///
105        /// let _scanner = HsScanner::compile(&[(0, 0, "demo_[A-Z0-9]{8}", false)])?;
106        /// ```
107        pub fn compile(
108            patterns: &[(usize, usize, &str, bool)],
109        ) -> Result<(Self, Vec<usize>), String> {
110            let mut hs_pats = Vec::new();
111            let mut pattern_map = Vec::new();
112            let mut unsupported = Vec::new();
113
114            for (i, &(det_idx, pat_idx, regex, has_group)) in patterns.iter().enumerate() {
115                // Skip patterns that are too long for Hyperscan (>500 chars)
116                if regex.len() > 500 {
117                    unsupported.push(i);
118                    continue;
119                }
120                // CASELESS only. No SOM_LEFTMOST - it causes "Pattern too large"
121                // on complex regexes. Match positions extracted by regex crate.
122                let flags = PatternFlags::CASELESS;
123                match Pattern::with_flags(regex, flags) {
124                    Ok(mut p) => {
125                        p.id = Some(pattern_map.len());
126                        hs_pats.push(p);
127                        pattern_map.push((det_idx, pat_idx, has_group));
128                    }
129                    Err(_) => {
130                        unsupported.push(i);
131                    }
132                }
133            }
134
135            if hs_pats.is_empty() {
136                return Err("no patterns compiled".into());
137            }
138
139            // Task 1c: Cache directory validation
140            let cache_dir = {
141                let dir = if let Ok(custom) = std::env::var("KEYHOG_CACHE_DIR") {
142                    let path = PathBuf::from(custom);
143                    let home = dirs::home_dir().ok_or("Fix: Could not determine HOME directory")?;
144                    // SAFETY: geteuid() is a trivial syscall with no memory
145                    // safety preconditions and always succeeds on Linux/macOS.
146                    let uid = unsafe { libc::geteuid() };
147                    let tmp_user_dir = PathBuf::from(format!("/tmp/keyhog-cache-{}", uid));
148
149                    if !path.starts_with(&home) && !path.starts_with(&tmp_user_dir) {
150                        return Err(format!(
151                            "Fix: KEYHOG_CACHE_DIR must be under {} or {}",
152                            home.display(),
153                            tmp_user_dir.display()
154                        ));
155                    }
156                    path
157                } else {
158                    // Persistent per-user cache so the ~1.7 s Hyperscan compile
159                    // is paid once per (machine, pattern-set, hyperscan version,
160                    // CPU features) - NOT once per reboot. The previous default
161                    // lived under /tmp, which most distros mount on tmpfs or
162                    // sweep on boot, so every reboot discarded the compiled DB
163                    // and the next scan ate the full cold-start again.
164                    // ~/.cache/keyhog (XDG_CACHE_HOME) survives reboots. Falls
165                    // back to the /tmp dir only when no home/cache directory is
166                    // resolvable (minimal containers, locked-down sandboxes).
167                    // SAFETY: see geteuid() above - trivial syscall.
168                    let uid = unsafe { libc::geteuid() };
169                    match dirs::cache_dir() {
170                        Some(cache) => cache.join("keyhog"),
171                        None => PathBuf::from(format!("/tmp/keyhog-cache-{}", uid)),
172                    }
173                };
174
175                if dir.exists() {
176                    let meta = std::fs::symlink_metadata(&dir)
177                        .map_err(|e| format!("Fix: Could not read cache dir metadata: {}", e))?;
178                    if meta.is_symlink() {
179                        return Err("Fix: KEYHOG_CACHE_DIR cannot be a symlink".into());
180                    }
181                    #[cfg(unix)]
182                    {
183                        use std::os::unix::fs::{MetadataExt, PermissionsExt};
184                        // SAFETY: `geteuid` is a thread-safe read-only
185                        // syscall that takes no arguments and cannot
186                        // fail. The Rust binding is `unsafe` only
187                        // because it crosses an FFI boundary.
188                        let uid = unsafe { libc::geteuid() };
189                        if meta.uid() != uid {
190                            return Err(
191                                "Fix: Cache directory is not owned by the current user".into()
192                            );
193                        }
194                        if meta.permissions().mode() & 0o777 != 0o700 {
195                            std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700))
196                                .map_err(|e| {
197                                    format!("Fix: Could not set cache dir permissions: {}", e)
198                                })?;
199                        }
200                    }
201                } else {
202                    std::fs::create_dir_all(&dir)
203                        .map_err(|e| format!("Fix: Could not create cache dir: {}", e))?;
204                    #[cfg(unix)]
205                    {
206                        use std::os::unix::fs::PermissionsExt;
207                        std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700))
208                            .map_err(|e| {
209                                format!("Fix: Could not set cache dir permissions: {}", e)
210                            })?;
211                    }
212                }
213                dir
214            };
215
216            // Cache key: SHA-256 of all pattern strings + environment metadata.
217            let cache_key = {
218                use sha2::{Digest, Sha256};
219                let mut h = Sha256::new();
220                for p in &hs_pats {
221                    h.update(p.expression.as_bytes());
222                    h.update([0]);
223                }
224
225                // Task 1a: include hyperscan library version, CPU features, target arch
226                h.update(hyperscan::version().to_string().as_bytes());
227                h.update(b"0.3.2"); // Pin hyperscan crate version
228
229                #[cfg(target_arch = "x86_64")]
230                {
231                    if is_x86_feature_detected!("avx512f") {
232                        h.update(b"avx512f");
233                    }
234                    if is_x86_feature_detected!("avx2") {
235                        h.update(b"avx2");
236                    }
237                    if is_x86_feature_detected!("sse4.2") {
238                        h.update(b"sse4.2");
239                    }
240                }
241                #[cfg(target_arch = "aarch64")]
242                {
243                    h.update(b"neon");
244                }
245                h.update(std::env::consts::ARCH.as_bytes());
246
247                hex::encode(h.finalize())
248            };
249            // ── Shard the pattern set and compile each shard in parallel ──
250            //
251            // The single serial `Builder::build` over the whole pattern set is
252            // the entire cold-compile cost (~99.7% of it; the rayon regex-
253            // validate phase upstream is ~5ms) and it scales ~linearly with the
254            // pattern count while every core but one idles. Splitting the
255            // patterns into K independent shards and building them on the rayon
256            // pool lets the idle cores absorb the work, so doubling the pattern
257            // count is bounded by the largest shard, not the sum. Each
258            // `Builder::build` is fully independent and CPU-bound; the match ids
259            // are the GLOBAL pattern ids (set on `Pattern.id` above), so a match
260            // from any shard maps back through the same `pattern_map` and the
261            // union of all shards' matches is exactly what a single all-patterns
262            // database would have produced - no recall change, only WHERE each
263            // pattern compiles.
264            let cores = std::thread::available_parallelism()
265                .map(|c| c.get())
266                .unwrap_or(1);
267            // Shard count: aim for ~TARGET patterns per shard, but cap at the
268            // core count so every shard runs in a SINGLE parallel wave. Two
269            // boundary behaviours matter for the full/half cold-compile ratio:
270            //
271            //   * Below the cap (small/medium corpus): shard COUNT scales with n
272            //     at a fixed per-shard size, so each `Builder::build` costs the
273            //     same and doubling the corpus is fully absorbed by spinning up
274            //     more parallel shards (flat).
275            //   * At the cap (large corpus, n > cores*TARGET): per-shard size
276            //     grows as n/cores. With TARGET tuned so the full corpus sits
277            //     right at the cap, a half-size corpus lands just below it at a
278            //     similar per-shard size, so the two stay within a small factor
279            //     and the ratio tracks the (sub-linear) per-shard build growth
280            //     rather than the pattern count.
281            //
282            // Letting shards exceed cores was measured to be WORSE: the build
283            // then runs ceil(shards/cores) work-stealing waves and the wall-clock
284            // quantizes at the core boundary (a corpus needing 42 shards on 32
285            // cores pays ~1.7x vs a half needing 21), so we stay within one wave.
286            let target = std::env::var("KEYHOG_SHARD_TARGET")
287                .ok()
288                .and_then(|v| v.parse::<usize>().ok())
289                .filter(|&v| v >= 1)
290                .unwrap_or(TARGET_PATTERNS_PER_SHARD);
291            let cap = cores.min(MAX_COMPILE_SHARDS).max(1);
292            let shard_count = hs_pats
293                .len()
294                .div_ceil(target)
295                .clamp(1, cap)
296                .min(hs_pats.len())
297                .max(1);
298
299            // LPT (longest-processing-time-first) bin-packing partition. The
300            // Hyperscan build time of a shard is dominated by its heaviest
301            // regexes (DFA state blow-up is super-linear in pattern length), so a
302            // naive round-robin that happens to land several long regexes in one
303            // shard makes that shard the wall-clock-determining straggler. Sort
304            // patterns by a cost proxy (expression length) descending and place
305            // each on the currently-lightest shard. This minimizes the MAX shard
306            // cost, so wall-clock ~ mean shard cost, which scales smoothly with
307            // patterns-per-shard - the property the full/half ratio test checks.
308            // Each shard keeps the patterns' original GLOBAL ids (set on
309            // `Pattern.id` above), so the union semantics are unchanged.
310            let mut order: Vec<usize> = (0..hs_pats.len()).collect();
311            order.sort_unstable_by_key(|&i| std::cmp::Reverse(hs_pats[i].expression.len()));
312            let mut shard_pats: Vec<Vec<Pattern>> = (0..shard_count).map(|_| Vec::new()).collect();
313            let mut shard_cost: Vec<u64> = vec![0; shard_count];
314            for &i in &order {
315                // Index of the lightest shard so far. `shard_count` is
316                // `.clamp(1, cap).max(1)` above, so `shard_cost` is never empty
317                // and `min_by_key` always yields `Some`; `unwrap_or(0)` keeps the
318                // path panic-free (shard 0 always exists) without a production
319                // `.expect`.
320                let lightest = shard_cost
321                    .iter()
322                    .enumerate()
323                    .min_by_key(|(_, &c)| c)
324                    .map(|(idx, _)| idx)
325                    .unwrap_or(0);
326                // Cost proxy: length plus a fixed per-pattern overhead so a shard
327                // with many short patterns is not treated as free.
328                shard_cost[lightest] += hs_pats[i].expression.len() as u64 + 16;
329                shard_pats[lightest].push(hs_pats[i].clone());
330            }
331
332            const CACHE_MAGIC: &[u8; 4] = b"KHHS";
333            const CACHE_VERSION: u32 = 1;
334
335            // Compile (or cache-load) every shard concurrently. Returns the
336            // built database and the global ids the shard had to drop (over-long
337            // / unsupported constructs) for the keyword-fallback reroute.
338            use rayon::prelude::*;
339            let shard_results: Vec<Result<(BlockDatabase, Vec<usize>), String>> = shard_pats
340                .into_par_iter()
341                .enumerate()
342                .map(|(shard_idx, pats)| {
343                    // Per-shard cache key: the shared env-metadata digest plus
344                    // this shard's own pattern strings, so each shard file is
345                    // independent and the warm path is a deserialize-only load.
346                    // The partition is deterministic for a given (pattern set,
347                    // shard_count), so the keys are stable across runs; a host
348                    // with a different shard_count simply produces different
349                    // per-shard keys (no stale-read collision).
350                    let shard_key = {
351                        use sha2::{Digest, Sha256};
352                        let mut h = Sha256::new();
353                        h.update(cache_key.as_bytes());
354                        h.update((shard_count as u64).to_le_bytes());
355                        h.update((shard_idx as u64).to_le_bytes());
356                        for p in &pats {
357                            h.update(p.expression.as_bytes());
358                            h.update([0]);
359                        }
360                        hex::encode(h.finalize())
361                    };
362                    let cache_path = cache_dir.join(format!("hs-{shard_key}.db"));
363
364                    // Try the per-shard disk cache first.
365                    if let Ok(bytes) = std::fs::read(&cache_path) {
366                        if bytes.len() > 8
367                            && &bytes[0..4] == CACHE_MAGIC
368                            && bytes[4..8].try_into().map(u32::from_le_bytes).unwrap_or(0)
369                                == CACHE_VERSION
370                        {
371                            use hyperscan::Serialized;
372                            let payload: &[u8] = &bytes[8..];
373                            if let Ok(db) = payload.deserialize::<BlockMode>() {
374                                tracing::info!(
375                                    cache = %cache_path.display(),
376                                    shard = shard_idx,
377                                    patterns = pats.len(),
378                                    "HS shard loaded from cache"
379                                );
380                                return Ok((db, Vec::new()));
381                            }
382                        }
383                    }
384
385                    // Cold: build this shard, then atomically persist it.
386                    let (db, dropped) = Self::compile_hs_db(&pats)?;
387                    if let Ok(ser) = db.serialize() {
388                        let mut data = Vec::with_capacity(ser.as_ref().len() + 8);
389                        data.extend_from_slice(CACHE_MAGIC);
390                        data.extend_from_slice(&CACHE_VERSION.to_le_bytes());
391                        data.extend_from_slice(ser.as_ref());
392                        let parent = cache_path
393                            .parent()
394                            .unwrap_or_else(|| std::path::Path::new("."));
395                        if let Ok(mut tmp) = tempfile::NamedTempFile::new_in(parent) {
396                            if std::io::Write::write_all(&mut tmp, &data).is_ok()
397                                && tmp.as_file().sync_all().is_ok()
398                            {
399                                if let Err(error) = tmp.persist(&cache_path) {
400                                    tracing::debug!(
401                                        cache = %cache_path.display(),
402                                        error = %error,
403                                        "HS shard cache persist failed; next run will recompile"
404                                    );
405                                }
406                            }
407                        }
408                        tracing::info!(cache = %cache_path.display(), shard = shard_idx, "HS shard cached");
409                    }
410                    Ok((db, dropped))
411                })
412                .collect();
413
414            // Assemble the shards; any single shard's compile error fails the
415            // whole build (matches the previous all-or-nothing contract).
416            let mut shards = Vec::with_capacity(shard_count);
417            for result in shard_results {
418                let (db, dropped) = result?;
419                unsupported.extend(dropped);
420                // Verify scratch allocation works once per shard; further
421                // scratches are allocated lazily per-thread on first scan.
422                let test_scratch = db
423                    .alloc_scratch()
424                    .map_err(|e| format!("hyperscan scratch: {e}"))?;
425                shards.push(Shard {
426                    db,
427                    scratch_pool: parking_lot::Mutex::new(vec![test_scratch]),
428                });
429            }
430
431            // The caller (`build_simd_scanner`) already logs
432            // `unsupported.len()` via tracing::info!, and consumers that
433            // need the count get the Vec returned alongside. No need to
434            // store a redundant copy on the scanner itself.
435            Ok((
436                Self {
437                    shards,
438                    pattern_map,
439                    scanner_id: SCANNER_ID_SEQ.fetch_add(1, Ordering::Relaxed),
440                },
441                unsupported,
442            ))
443        }
444
445        /// Build one shard's `BlockDatabase`, returning the database and the
446        /// GLOBAL pattern ids it had to drop (over-long or an unsupported
447        /// construct Hyperscan rejects only at build time). The dropped ids are
448        /// rerouted into the keyword fallback by the caller so the pattern is
449        /// never silently lost. Because sharding makes each shard far smaller
450        /// than the old single combined database, the size-limit retry below
451        /// almost never fires now - which strictly REDUCES the set of patterns
452        /// dropped for "Pattern too large", improving recall, never hurting it.
453        fn compile_hs_db(hs_pats: &[Pattern]) -> Result<(BlockDatabase, Vec<usize>), String> {
454            let mut attempts = hs_pats.to_vec();
455            let mut dropped: Vec<usize> = Vec::new();
456            let started = std::time::Instant::now();
457            let db: BlockDatabase = loop {
458                let patterns_obj = Patterns(std::mem::take(&mut attempts));
459                match Builder::build::<BlockMode>(&patterns_obj) {
460                    Ok(db) => break db,
461                    Err(_) if patterns_obj.0.len() > 100 => {
462                        // Reclaim ownership for the next attempt.
463                        attempts = patterns_obj.0;
464                        attempts.sort_by_key(|p| std::cmp::Reverse(p.expression.len()));
465                        let remove_count = attempts.len() / 10;
466                        for _ in 0..remove_count {
467                            if let Some(removed) = attempts.pop() {
468                                dropped.push(removed.id.unwrap_or(0));
469                            }
470                        }
471                        attempts.sort_by_key(|p| p.id.unwrap_or(0));
472                    }
473                    Err(e) => return Err(format!("hyperscan compile: {e}")),
474                }
475            };
476            tracing::info!(
477                patterns = hs_pats.len() - dropped.len(),
478                compile_ms = started.elapsed().as_millis(),
479                "HS shard compiled"
480            );
481            Ok((db, dropped))
482        }
483
484        /// Scan text and return `(hs_pattern_id, match_start, match_end)`.
485        /// Uses a scratch pool for thread-safety without per-call allocation.
486        ///
487        /// # Examples
488        ///
489        /// ```rust,ignore
490        /// use keyhog_scanner::simd::backend::HsScanner;
491        ///
492        /// let (scanner, _) = HsScanner::compile(&[(0, 0, "demo_[A-Z0-9]{8}", false)])?;
493        /// let _matches = scanner.scan(b"demo_ABC12345");
494        /// ```
495        pub fn scan(&self, text: &[u8]) -> Vec<(usize, usize, usize)> {
496            // Thread-local per-(scanner, shard) scratch: zero mutex contention
497            // on parallel scans. Each rayon thread keeps one scratch per shard,
498            // reused across all files it processes. Keyed by `scanner_id` so two
499            // scanners in one process never hand each other a scratch allocated
500            // against a different database; keyed by shard so each shard's
501            // immutable database gets its own. No lock, no allocation after
502            // first touch.
503            thread_local! {
504                static TLS: std::cell::RefCell<
505                    std::collections::HashMap<(u64, usize), Scratch>,
506                > = std::cell::RefCell::new(std::collections::HashMap::new());
507            }
508
509            // The match callback pushes the GLOBAL pattern id (set on
510            // `Pattern.id` at compile), so the union over shards is identical
511            // to a single all-patterns database's output - offsets are in the
512            // original byte space, no remapping. Reserve once for the common
513            // case; the union is typically small.
514            let mut matches = Vec::with_capacity(32);
515
516            for (shard_idx, shard) in self.shards.iter().enumerate() {
517                let key = (self.scanner_id, shard_idx);
518                // Take this thread's scratch for the shard (or allocate one):
519                // pool first to reuse the compile-time test scratch, else a
520                // fresh alloc tied to the shard's db.
521                let scratch = TLS
522                    .with(|tls| tls.borrow_mut().remove(&key))
523                    .or_else(|| shard.scratch_pool.lock().pop())
524                    .or_else(|| shard.db.alloc_scratch().ok());
525
526                let Some(scratch) = scratch else {
527                    continue;
528                };
529
530                let _ = shard.db.scan(text, &scratch, |id, from, to, _flags| {
531                    matches.push((id as usize, from as usize, to as usize));
532                    Matching::Continue
533                });
534
535                TLS.with(|tls| {
536                    tls.borrow_mut().insert(key, scratch);
537                });
538            }
539            matches
540        }
541
542        /// Look up detector and pattern metadata for a Hyperscan pattern id.
543        ///
544        /// # Examples
545        ///
546        /// ```rust,ignore
547        /// use keyhog_scanner::simd::backend::HsScanner;
548        ///
549        /// let (scanner, _) = HsScanner::compile(&[(0, 0, "demo_[A-Z0-9]{8}", false)])?;
550        /// assert!(scanner.pattern_info(0).is_some());
551        /// ```
552        pub fn pattern_info(&self, hs_id: usize) -> Option<(usize, usize, bool)> {
553            self.pattern_map.get(hs_id).copied()
554        }
555
556        /// Return the number of patterns compiled into the SIMD database.
557        ///
558        /// # Examples
559        ///
560        /// ```rust,ignore
561        /// use keyhog_scanner::simd::backend::HsScanner;
562        ///
563        /// let (scanner, _) = HsScanner::compile(&[(0, 0, "demo_[A-Z0-9]{8}", false)])?;
564        /// assert_eq!(scanner.pattern_count(), 1);
565        /// ```
566        pub fn pattern_count(&self) -> usize {
567            self.pattern_map.len()
568        }
569    }
570}