hyperscan_tokio_sys/
lib.rs

1//! Low-level FFI bindings to VectorScan/Hyperscan
2//! 
3//! This crate provides raw bindings and safe wrappers around the VectorScan C API.
4
5#![allow(non_upper_case_globals)]
6#![allow(non_camel_case_types)]
7#![allow(non_snake_case)]
8#![allow(dead_code)]
9
10use std::ffi::{CStr, CString};
11use std::fmt;
12use std::os::raw::{c_char, c_int, c_uint, c_void, c_ulonglong};
13use std::ptr;
14use std::slice;
15
16// Include generated bindings
17include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
18
19// Re-export key types
20pub use hs_database_t as RawDatabase;
21pub use hs_stream_t as RawStream;
22pub use hs_scratch_t as RawScratch;
23pub use hs_compile_error_t as RawCompileError;
24
25// Chimera types - only available when has_chimera is set
26#[cfg(has_chimera)]
27pub use ch_database_t as RawChimeraDatabase;
28#[cfg(has_chimera)]
29pub use ch_scratch_t as RawChimeraScratch;
30#[cfg(has_chimera)]
31pub use ch_compile_error_t as RawChimeraCompileError;
32
33/// Database pointer wrapper
34/// 
35/// SAFETY INVARIANTS:
36/// - Inner pointer is always valid or null
37/// - Pointer is freed exactly once in Drop
38/// - Send + Sync safe because VectorScan databases are thread-safe
39#[repr(transparent)]
40pub struct DatabasePtr(pub *mut hs_database_t);
41
42// SAFETY: VectorScan databases are thread-safe for read operations
43unsafe impl Send for DatabasePtr {}
44// SAFETY: VectorScan databases can be shared across threads for scanning
45unsafe impl Sync for DatabasePtr {}
46
47/// Stream pointer wrapper
48/// 
49/// SAFETY INVARIANTS:
50/// - Stream pointers are NOT thread-safe (no Sync)
51/// - Can be sent between threads but not used concurrently
52/// - Must be properly closed before dropping
53#[repr(transparent)]
54#[derive(Copy, Clone)]
55pub struct StreamPtr(pub *mut hs_stream_t);
56
57// SAFETY: Streams can be moved between threads but not shared
58unsafe impl Send for StreamPtr {}
59
60/// Scratch space pointer wrapper
61/// 
62/// SAFETY INVARIANTS:
63/// - Scratch spaces are thread-local (not Send or Sync)
64/// - Each thread must have its own scratch space
65/// - Scratch must be allocated for specific database
66#[repr(transparent)]
67#[derive(Copy, Clone)]
68pub struct ScratchPtr(pub *mut hs_scratch_t);
69
70// SAFETY: Scratch spaces can be sent between threads
71// This is safe because the scratch itself doesn't contain thread-local state
72unsafe impl Send for ScratchPtr {}
73
74/// Chimera database pointer wrapper
75#[cfg(has_chimera)]
76#[repr(transparent)]
77pub struct ChimeraDatabasePtr(pub *mut ch_database_t);
78
79#[cfg(has_chimera)]
80unsafe impl Send for ChimeraDatabasePtr {}
81#[cfg(has_chimera)]
82unsafe impl Sync for ChimeraDatabasePtr {}
83
84/// Chimera scratch pointer wrapper
85#[cfg(has_chimera)]
86#[repr(transparent)]
87pub struct ChimeraScratchPtr(pub *mut ch_scratch_t);
88
89#[cfg(has_chimera)]
90unsafe impl Send for ChimeraScratchPtr {}
91
92/// Compile flags
93bitflags::bitflags! {
94    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
95    pub struct Flags: u32 {
96        const CASELESS = HS_FLAG_CASELESS;
97        const DOTALL = HS_FLAG_DOTALL;
98        const MULTILINE = HS_FLAG_MULTILINE;
99        const SINGLEMATCH = HS_FLAG_SINGLEMATCH;
100        const ALLOWEMPTY = HS_FLAG_ALLOWEMPTY;
101        const UTF8 = HS_FLAG_UTF8;
102        const UCP = HS_FLAG_UCP;
103        const PREFILTER = HS_FLAG_PREFILTER;
104        const SOM_LEFTMOST = HS_FLAG_SOM_LEFTMOST;
105    }
106}
107
108/// Compile mode flags
109bitflags::bitflags! {
110    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
111    pub struct Mode: u32 {
112        const BLOCK = HS_MODE_BLOCK;
113        const STREAM = HS_MODE_STREAM;
114        const VECTORED = HS_MODE_VECTORED;
115        const SOM_HORIZON_LARGE = HS_MODE_SOM_HORIZON_LARGE;
116        const SOM_HORIZON_MEDIUM = HS_MODE_SOM_HORIZON_MEDIUM;
117        const SOM_HORIZON_SMALL = HS_MODE_SOM_HORIZON_SMALL;
118    }
119}
120
121/// Platform information
122#[derive(Debug, Clone)]
123pub struct Platform {
124    pub tune: u32,
125    pub cpu_features: u64,
126}
127
128impl Platform {
129    /// Detect current platform automatically
130    pub fn detect() -> Result<Self, String> {
131        let mut info = hs_platform_info {
132            tune: 0,
133            cpu_features: 0,
134            reserved1: 0,
135            reserved2: 0,
136        };
137        
138        let ret = unsafe {
139            hs_populate_platform(&mut info)
140        };
141        
142        if ret != HS_SUCCESS as i32 {
143            return Err(format!("Failed to detect platform: {}", ret));
144        }
145        
146        Ok(Self {
147            tune: info.tune,
148            cpu_features: info.cpu_features,
149        })
150    }
151    
152    /// Create platform for current CPU (alias for detect)
153    pub fn native() -> Self {
154        let mut info = hs_platform_info {
155            tune: 0,
156            cpu_features: 0,
157            reserved1: 0,
158            reserved2: 0,
159        };
160        
161        unsafe {
162            hs_populate_platform(&mut info);
163        }
164        
165        Self {
166            tune: info.tune,
167            cpu_features: info.cpu_features,
168        }
169    }
170}
171
172/// Extended expression info
173#[derive(Debug, Clone, Default)]
174pub struct ExpressionExt {
175    pub flags: u64,
176    pub min_offset: u64,
177    pub max_offset: u64,
178    pub min_length: u64,
179    pub edit_distance: u32,
180    pub hamming_distance: u32,
181}
182
183impl ExpressionExt {
184    pub const FLAG_MIN_OFFSET: u64 = 1 << 0;
185    pub const FLAG_MAX_OFFSET: u64 = 1 << 1;
186    pub const FLAG_MIN_LENGTH: u64 = 1 << 2;
187    pub const FLAG_EDIT_DISTANCE: u64 = 1 << 3;
188    pub const FLAG_HAMMING_DISTANCE: u64 = 1 << 4;
189}
190
191/// Compile multiple patterns with extended parameters
192pub fn compile_extended(
193    patterns: &[&str],
194    flags: &[Flags],
195    ids: &[u32],
196    ext: &[ExpressionExt],
197    mode: Mode,
198    platform: Option<&Platform>,
199) -> Result<DatabasePtr, CompileError> {
200    assert_eq!(patterns.len(), flags.len());
201    assert_eq!(patterns.len(), ids.len());
202    assert_eq!(patterns.len(), ext.len());
203    
204    // SAFETY: Converting Rust strings to C strings
205    // - Each CString is heap-allocated and valid until explicitly freed
206    // - CString::new ensures no null bytes in patterns
207    // - into_raw transfers ownership, we must free later
208    let pattern_ptrs: Vec<*const c_char> = patterns
209        .iter()
210        .map(|p| CString::new(*p).unwrap().into_raw() as *const c_char)
211        .collect();
212    
213    let flag_values: Vec<c_uint> = flags.iter().map(|f| f.bits()).collect();
214    
215    // SAFETY: Convert to hyperscan ext structs
216    // - Each Box::new allocates on heap
217    // - Box::into_raw transfers ownership, we must free later
218    // - The structs are POD types safe for FFI
219    let ext_ptrs: Vec<*const hs_expr_ext_t> = ext
220        .iter()
221        .map(|e| {
222            Box::into_raw(Box::new(hs_expr_ext_t {
223                flags: e.flags,
224                min_offset: e.min_offset,
225                max_offset: e.max_offset,
226                min_length: e.min_length,
227                edit_distance: e.edit_distance,
228                hamming_distance: e.hamming_distance,
229            })) as *const hs_expr_ext_t
230        })
231        .collect();
232    
233    let mut database: *mut hs_database_t = ptr::null_mut();
234    let mut error: *mut hs_compile_error_t = ptr::null_mut();
235    
236    let platform_ptr = platform
237        .map(|p| &hs_platform_info {
238            tune: p.tune,
239            cpu_features: p.cpu_features,
240            reserved1: 0,
241            reserved2: 0,
242        } as *const _)
243        .unwrap_or(ptr::null());
244    
245    let ret = unsafe {
246        // SAFETY: FFI call requirements
247        // - All pointers in pattern_ptrs are valid CStrings
248        // - All pointers in ext_ptrs are valid heap allocations
249        // - flag_values and ids are valid slices with matching length
250        // - database and error pointers are stack-allocated
251        // - platform_ptr is either null or points to valid stack data
252        hs_compile_ext_multi(
253            pattern_ptrs.as_ptr(),
254            flag_values.as_ptr(),
255            ids.as_ptr(),
256            ext_ptrs.as_ptr(),
257            patterns.len() as c_uint,
258            mode.bits(),
259            platform_ptr,
260            &mut database,
261            &mut error,
262        )
263    };
264    
265    // SAFETY: Clean up all allocated memory
266    // - Each pattern_ptr was created with CString::into_raw
267    // - Must use CString::from_raw to properly deallocate
268    for ptr in pattern_ptrs {
269        unsafe { CString::from_raw(ptr as *mut c_char); }
270    }
271    // SAFETY: Clean up all allocated ext structs
272    // - Each ext_ptr was created with Box::into_raw
273    // - Must use Box::from_raw to properly deallocate
274    for ptr in ext_ptrs {
275        unsafe { Box::from_raw(ptr as *mut hs_expr_ext_t); }
276    }
277    
278    if ret != HS_SUCCESS as i32 {
279        let err = unsafe { VectorScan::extract_compile_error(error) };
280        return Err(err);
281    }
282    
283    Ok(DatabasePtr(database))
284}
285
286/// Compile error information
287#[derive(Debug)]
288pub struct CompileError {
289    pub message: String,
290    pub expression: i32,
291    pub position: Option<usize>,
292}
293
294/// Pattern validation result
295pub struct ValidationError {
296    pub message: String,
297    pub position: Option<usize>,
298}
299
300impl fmt::Display for CompileError {
301    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
302        write!(f, "Compile error in expression {}: {}", self.expression, self.message)
303    }
304}
305
306impl std::error::Error for CompileError {}
307
308/// Match callback return value
309#[repr(i32)]
310pub enum Matching {
311    Continue = 0,
312    Terminate = 1,
313}
314
315/// Match flags
316bitflags::bitflags! {
317    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
318    pub struct MatchFlags: u32 {
319        /// This match has a valid SOM value
320        const SOM_VALID = 1 << 0;
321    }
322}
323
324/// Capture group information
325#[derive(Debug, Clone)]
326pub struct CaptureGroup {
327    pub active: bool,
328    pub from: i64,
329    pub to: i64,
330}
331
332/// Chimera match event with capture groups
333#[cfg(has_chimera)]
334pub type ChimeraMatchCallback = Box<dyn FnMut(u32, u64, u64, u32, &[CaptureGroup]) -> Matching + Send>;
335
336/// Type-erased callback
337pub type MatchCallback = Box<dyn FnMut(u32, u64, u64) -> Matching + Send>;
338
339/// Memory allocation function type
340pub type AllocFunc = unsafe extern "C" fn(size: usize) -> *mut c_void;
341/// Memory free function type
342pub type FreeFunc = unsafe extern "C" fn(ptr: *mut c_void);
343
344/// Safe wrappers around VectorScan API
345pub struct VectorScan;
346
347impl VectorScan {
348    /// Set custom memory allocator
349    pub fn set_allocator(alloc_fn: AllocFunc, free_fn: FreeFunc) -> Result<(), String> {
350        let ret = unsafe {
351            hs_set_allocator(Some(alloc_fn), Some(free_fn))
352        };
353        
354        if ret != HS_SUCCESS as i32 {
355            return Err(format!("Failed to set allocator: {}", ret));
356        }
357        
358        Ok(())
359    }
360    
361    /// Clear custom memory allocator (use defaults)
362    pub fn clear_allocator() -> Result<(), String> {
363        let ret = unsafe {
364            hs_set_allocator(None, None)
365        };
366        
367        if ret != HS_SUCCESS as i32 {
368            return Err(format!("Failed to clear allocator: {}", ret));
369        }
370        
371        Ok(())
372    }
373    /// Get version string
374    pub fn version() -> &'static str {
375        unsafe {
376            CStr::from_ptr(hs_version())
377                .to_str()
378                .unwrap_or("unknown")
379        }
380    }
381    
382    /// Check if platform is valid
383    pub fn valid_platform() -> Result<(), String> {
384        let ret = unsafe { hs_valid_platform() };
385        if ret == HS_SUCCESS as i32 {
386            Ok(())
387        } else {
388            Err(format!("Platform not supported: {}", ret))
389        }
390    }
391    
392    /// Compile a single pattern
393    pub fn compile(
394        pattern: &str,
395        flags: Flags,
396        mode: Mode,
397        platform: Option<&Platform>,
398    ) -> Result<DatabasePtr, CompileError> {
399        let pattern_cstr = CString::new(pattern).map_err(|_| CompileError {
400            message: "Pattern contains null byte".to_string(),
401            expression: 0,
402            position: None,
403        })?;
404        
405        let mut database: *mut hs_database_t = ptr::null_mut();
406        let mut error: *mut hs_compile_error_t = ptr::null_mut();
407        
408        let platform_ptr = platform
409            .map(|p| &hs_platform_info {
410                tune: p.tune,
411                cpu_features: p.cpu_features,
412                reserved1: 0,
413                reserved2: 0,
414            } as *const _)
415            .unwrap_or(ptr::null());
416        
417        let ret = unsafe {
418            hs_compile(
419                pattern_cstr.as_ptr(),
420                flags.bits(),
421                mode.bits(),
422                platform_ptr,
423                &mut database,
424                &mut error,
425            )
426        };
427        
428        if ret != HS_SUCCESS as i32 {
429            let err = unsafe { Self::extract_compile_error(error) };
430            return Err(err);
431        }
432        
433        Ok(DatabasePtr(database))
434    }
435    
436    /// Compile literal patterns
437    pub fn compile_lit_multi(
438        literals: &[&[u8]],
439        flags: &[Flags],
440        ids: &[u32],
441        mode: Mode,
442        platform: Option<&Platform>,
443    ) -> Result<DatabasePtr, CompileError> {
444        assert_eq!(literals.len(), flags.len());
445        assert_eq!(literals.len(), ids.len());
446        
447        let literal_ptrs: Vec<*const c_char> = literals
448            .iter()
449            .map(|lit| lit.as_ptr() as *const c_char)
450            .collect();
451        
452        let literal_lens: Vec<usize> = literals
453            .iter()
454            .map(|lit| lit.len())
455            .collect();
456        
457        let flag_values: Vec<c_uint> = flags.iter().map(|f| f.bits()).collect();
458        
459        let mut database: *mut hs_database_t = ptr::null_mut();
460        let mut error: *mut hs_compile_error_t = ptr::null_mut();
461        
462        let platform_ptr = platform
463            .map(|p| &hs_platform_info {
464                tune: p.tune,
465                cpu_features: p.cpu_features,
466                reserved1: 0,
467                reserved2: 0,
468            } as *const _)
469            .unwrap_or(ptr::null());
470        
471        let ret = unsafe {
472            hs_compile_lit_multi(
473                literal_ptrs.as_ptr(),
474                flag_values.as_ptr(),
475                ids.as_ptr(),
476                literal_lens.as_ptr(),
477                literals.len() as c_uint,
478                mode.bits(),
479                platform_ptr,
480                &mut database,
481                &mut error,
482            )
483        };
484        
485        if ret != HS_SUCCESS as i32 {
486            let err = unsafe { Self::extract_compile_error(error) };
487            return Err(err);
488        }
489        
490        Ok(DatabasePtr(database))
491    }
492    
493    /// Compile multiple patterns
494    pub fn compile_multi(
495        patterns: &[&str],
496        flags: &[Flags],
497        ids: &[u32],
498        mode: Mode,
499        platform: Option<&Platform>,
500    ) -> Result<DatabasePtr, CompileError> {
501        assert_eq!(patterns.len(), flags.len());
502        assert_eq!(patterns.len(), ids.len());
503        
504        let pattern_ptrs: Vec<*const c_char> = patterns
505            .iter()
506            .map(|p| CString::new(*p).unwrap().into_raw() as *const c_char)
507            .collect();
508        
509        let flag_values: Vec<c_uint> = flags.iter().map(|f| f.bits()).collect();
510        
511        let mut database: *mut hs_database_t = ptr::null_mut();
512        let mut error: *mut hs_compile_error_t = ptr::null_mut();
513        
514        let platform_ptr = platform
515            .map(|p| &hs_platform_info {
516                tune: p.tune,
517                cpu_features: p.cpu_features,
518                reserved1: 0,
519                reserved2: 0,
520            } as *const _)
521            .unwrap_or(ptr::null());
522        
523        let ret = unsafe {
524            hs_compile_multi(
525                pattern_ptrs.as_ptr(),
526                flag_values.as_ptr(),
527                ids.as_ptr(),
528                patterns.len() as c_uint,
529                mode.bits(),
530                platform_ptr,
531                &mut database,
532                &mut error,
533            )
534        };
535        
536        // Clean up CStrings
537        for ptr in pattern_ptrs {
538            unsafe { CString::from_raw(ptr as *mut c_char); }
539        }
540        
541        if ret != HS_SUCCESS as i32 {
542            let err = unsafe { Self::extract_compile_error(error) };
543            return Err(err);
544        }
545        
546        Ok(DatabasePtr(database))
547    }
548    
549    /// Allocate scratch space
550    pub fn alloc_scratch(database: &DatabasePtr) -> Result<ScratchPtr, String> {
551        let mut scratch: *mut hs_scratch_t = ptr::null_mut();
552        
553        let ret = unsafe {
554            hs_alloc_scratch(database.0, &mut scratch)
555        };
556        
557        if ret != HS_SUCCESS as i32 {
558            return Err(format!("Failed to allocate scratch: {}", ret));
559        }
560        
561        Ok(ScratchPtr(scratch))
562    }
563    
564    /// Get scratch size
565    pub fn scratch_size(scratch: &ScratchPtr) -> Result<usize, String> {
566        let mut size: usize = 0;
567        
568        let ret = unsafe {
569            hs_scratch_size(scratch.0, &mut size)
570        };
571        
572        if ret != HS_SUCCESS as i32 {
573            return Err(format!("Failed to get scratch size: {}", ret));
574        }
575        
576        Ok(size)
577    }
578    
579    /// Reallocate scratch for a database
580    pub fn scratch_realloc(scratch: &mut ScratchPtr, database: &DatabasePtr) -> Result<(), String> {
581        let ret = unsafe {
582            hs_alloc_scratch(database.0, &mut scratch.0)
583        };
584        
585        if ret != HS_SUCCESS as i32 {
586            return Err(format!("Failed to reallocate scratch: {}", ret));
587        }
588        
589        Ok(())
590    }
591    
592    /// Clone scratch space
593    pub fn clone_scratch(src: &ScratchPtr) -> Result<ScratchPtr, String> {
594        let mut scratch: *mut hs_scratch_t = ptr::null_mut();
595        
596        let ret = unsafe {
597            hs_clone_scratch(src.0, &mut scratch)
598        };
599        
600        if ret != HS_SUCCESS as i32 {
601            return Err(format!("Failed to clone scratch: {}", ret));
602        }
603        
604        Ok(ScratchPtr(scratch))
605    }
606    
607    /// Scan data in block mode
608    pub fn scan<F>(
609        database: &DatabasePtr,
610        data: &[u8],
611        scratch: &mut ScratchPtr,
612        mut on_match: F,
613    ) -> Result<(), String>
614    where
615        F: FnMut(u32, u64, u64) -> Matching,
616    {
617        // SAFETY: Creating context pointer for FFI callback
618        // - on_match lives on our stack frame for the entire FFI call
619        // - We pass it as *mut c_void but it's actually &mut F
620        // - The callback will cast it back to &mut F safely
621        let context = &mut on_match as *mut _ as *mut c_void;
622        
623        let ret = unsafe {
624            // SAFETY: FFI call requirements
625            // - database.0 is a valid database pointer
626            // - data is a valid byte slice with correct length
627            // - scratch.0 is a valid scratch pointer for this database
628            // - match_handler is a valid C function pointer
629            // - context remains valid for the duration of this call
630            hs_scan(
631                database.0,
632                data.as_ptr() as *const c_char,
633                data.len() as c_uint,
634                0, // flags
635                scratch.0,
636                Some(match_handler::<F>),
637                context,
638            )
639        };
640        
641        match ret {
642            x if x == HS_SUCCESS as i32 => Ok(()),
643            x if x == HS_SCAN_TERMINATED as i32 => Ok(()),
644            _ => Err(format!("Scan failed: {}", ret)),
645        }
646    }
647    
648    /// Serialize database
649    pub fn serialize_database(database: &DatabasePtr) -> Result<Vec<u8>, String> {
650        let mut bytes: *mut c_char = ptr::null_mut();
651        let mut length: usize = 0;
652        
653        let ret = unsafe {
654            // SAFETY: FFI call requirements
655            // - database.0 is a valid database pointer
656            // - bytes and length are stack-allocated and their addresses are valid
657            // - hs_serialize_database will allocate memory and store pointer in bytes
658            hs_serialize_database(
659                database.0,
660                &mut bytes,
661                &mut length,
662            )
663        };
664        
665        if ret != HS_SUCCESS as i32 {
666            return Err(format!("Failed to serialize database: {}", ret));
667        }
668        
669        // SAFETY: Memory management
670        // - bytes now points to memory allocated by VectorScan
671        // - length contains the valid size of that allocation
672        // - We must copy the data and free the original allocation
673        let data = unsafe {
674            slice::from_raw_parts(bytes as *const u8, length).to_vec()
675        };
676        
677        // SAFETY: Freeing VectorScan-allocated memory
678        // - bytes was allocated by VectorScan's allocator
679        // - Must be freed with libc::free (or custom allocator if set)
680        // - No other references to this memory exist after copying to Vec
681        unsafe {
682            libc::free(bytes as *mut c_void);
683        }
684        
685        Ok(data)
686    }
687    
688    /// Deserialize database
689    pub fn deserialize_database(data: &[u8]) -> Result<DatabasePtr, String> {
690        let mut database: *mut hs_database_t = ptr::null_mut();
691        
692        let ret = unsafe {
693            hs_deserialize_database(
694                data.as_ptr() as *const c_char,
695                data.len(),
696                &mut database,
697            )
698        };
699        
700        if ret != HS_SUCCESS as i32 {
701            return Err(format!("Failed to deserialize database: {}", ret));
702        }
703        
704        Ok(DatabasePtr(database))
705    }
706    
707    /// Get database size
708    pub fn database_size(database: &DatabasePtr) -> Result<usize, String> {
709        let mut size: usize = 0;
710        
711        let ret = unsafe {
712            hs_database_size(database.0, &mut size)
713        };
714        
715        if ret != HS_SUCCESS as i32 {
716            return Err(format!("Failed to get database size: {}", ret));
717        }
718        
719        Ok(size)
720    }
721    
722    /// Get stream size
723    pub fn stream_size(database: &DatabasePtr) -> Result<usize, String> {
724        let mut size: usize = 0;
725        
726        let ret = unsafe {
727            hs_stream_size(database.0, &mut size)
728        };
729        
730        if ret != HS_SUCCESS as i32 {
731            return Err(format!("Failed to get stream size: {}", ret));
732        }
733        
734        Ok(size)
735    }
736    
737    /// Get database info
738    pub fn database_info(database: &DatabasePtr) -> Result<String, String> {
739        let mut info: *mut c_char = ptr::null_mut();
740        
741        let ret = unsafe {
742            hs_database_info(database.0, &mut info)
743        };
744        
745        if ret != HS_SUCCESS as i32 {
746            return Err(format!("Failed to get database info: {}", ret));
747        }
748        
749        let info_str = unsafe {
750            CStr::from_ptr(info).to_string_lossy().to_string()
751        };
752        
753        unsafe {
754            libc::free(info as *mut c_void);
755        }
756        
757        Ok(info_str)
758    }
759    
760    /// Open a stream
761    pub fn open_stream(database: &DatabasePtr) -> Result<StreamPtr, String> {
762        let mut stream: *mut hs_stream_t = ptr::null_mut();
763        
764        let ret = unsafe {
765            hs_open_stream(database.0, 0, &mut stream)
766        };
767        
768        if ret != HS_SUCCESS as i32 {
769            return Err(format!("Failed to open stream: {}", ret));
770        }
771        
772        Ok(StreamPtr(stream))
773    }
774    
775    /// Scan stream
776    pub fn scan_stream<F>(
777        stream: &mut StreamPtr,
778        data: &[u8],
779        scratch: &mut ScratchPtr,
780        mut on_match: F,
781    ) -> Result<(), String>
782    where
783        F: FnMut(u32, u64, u64) -> Matching,
784    {
785        let context = &mut on_match as *mut _ as *mut c_void;
786        
787        let ret = unsafe {
788            hs_scan_stream(
789                stream.0,
790                data.as_ptr() as *const c_char,
791                data.len() as c_uint,
792                0, // flags
793                scratch.0,
794                Some(match_handler::<F>),
795                context,
796            )
797        };
798        
799        match ret {
800            x if x == HS_SUCCESS as i32 => Ok(()),
801            x if x == HS_SCAN_TERMINATED as i32 => Ok(()),
802            _ => Err(format!("Stream scan failed: {}", ret)),
803        }
804    }
805    
806    /// Close stream
807    pub fn close_stream<F>(
808        stream: StreamPtr,
809        scratch: &mut ScratchPtr,
810        mut on_match: F,
811    ) -> Result<(), String>
812    where
813        F: FnMut(u32, u64, u64) -> Matching,
814    {
815        let context = &mut on_match as *mut _ as *mut c_void;
816        
817        let ret = unsafe {
818            hs_close_stream(
819                stream.0,
820                scratch.0,
821                Some(match_handler::<F>),
822                context,
823            )
824        };
825        
826        match ret {
827            x if x == HS_SUCCESS as i32 => Ok(()),
828            x if x == HS_SCAN_TERMINATED as i32 => Ok(()),
829            _ => Err(format!("Failed to close stream: {}", ret)),
830        }
831    }
832    
833    /// Free database
834    pub fn free_database(database: DatabasePtr) {
835        unsafe {
836            hs_free_database(database.0);
837        }
838    }
839    
840    /// Free scratch
841    pub fn free_scratch(scratch: ScratchPtr) {
842        unsafe {
843            hs_free_scratch(scratch.0);
844        }
845    }
846    
847    /// Validate a pattern expression
848    pub fn validate_expression(pattern: &str, flags: Flags) -> Result<(), ValidationError> {
849        let mut info: *mut hs_expr_info_t = ptr::null_mut();
850        let mut error: *mut hs_compile_error_t = ptr::null_mut();
851        
852        let pattern_cstr = CString::new(pattern).map_err(|_| ValidationError {
853            message: "Pattern contains null byte".to_string(),
854            position: None,
855        })?;
856        
857        let ret = unsafe {
858            hs_expression_info(
859                pattern_cstr.as_ptr(),
860                flags.bits(),
861                &mut info,
862                &mut error,
863            )
864        };
865        
866        if ret != HS_SUCCESS as i32 {
867            let err = unsafe { 
868                let msg = CStr::from_ptr((*error).message)
869                    .to_string_lossy()
870                    .to_string();
871                // Try to extract position from error message
872                let position = Self::extract_position_from_message(&msg);
873                hs_free_compile_error(error);
874                ValidationError {
875                    message: msg,
876                    position,
877                }
878            };
879            return Err(err);
880        }
881        
882        // Free the info structure
883        unsafe {
884            libc::free(info as *mut c_void);
885        }
886        
887        Ok(())
888    }
889    
890    /// Extract position from error message (e.g., "Parse error at position 10: ...")
891    fn extract_position_from_message(msg: &str) -> Option<usize> {
892        if let Some(pos_str) = msg.find("position ") {
893            let start = pos_str + 9;
894            let end = msg[start..].find(|c: char| !c.is_numeric())
895                .map(|i| start + i)
896                .unwrap_or(msg.len());
897            msg[start..end].parse().ok()
898        } else {
899            None
900        }
901    }
902    
903    /// Extract compile error details
904    unsafe fn extract_compile_error(error: *mut hs_compile_error_t) -> CompileError {
905        let message = CStr::from_ptr((*error).message)
906            .to_string_lossy()
907            .to_string();
908        let position = Self::extract_position_from_message(&message);
909        
910        let err = CompileError {
911            message,
912            expression: (*error).expression,
913            position,
914        };
915        hs_free_compile_error(error);
916        err
917    }
918}
919
920/// Chimera API wrapper
921#[cfg(has_chimera)]
922pub struct Chimera;
923
924#[cfg(has_chimera)]
925impl Chimera {
926    /// Compile a Chimera pattern with capture group support
927    pub fn compile(
928        pattern: &str,
929        flags: Flags,
930        mode: Mode,
931        platform: Option<&Platform>,
932    ) -> Result<ChimeraDatabasePtr, CompileError> {
933        let pattern_cstr = CString::new(pattern).map_err(|_| CompileError {
934            message: "Pattern contains null byte".to_string(),
935            expression: 0,
936            position: None,
937        })?;
938        
939        let mut database: *mut ch_database_t = ptr::null_mut();
940        let mut error: *mut ch_compile_error_t = ptr::null_mut();
941        
942        let platform_ptr = platform
943            .map(|p| &hs_platform_info {
944                tune: p.tune,
945                cpu_features: p.cpu_features,
946                reserved1: 0,
947                reserved2: 0,
948            } as *const _)
949            .unwrap_or(ptr::null());
950        
951        let ret = unsafe {
952            ch_compile(
953                pattern_cstr.as_ptr(),
954                flags.bits(),
955                mode.bits(),
956                platform_ptr,
957                &mut database,
958                &mut error,
959            )
960        };
961        
962        if ret != CH_SUCCESS as i32 {
963            let err = unsafe { Self::extract_compile_error(error) };
964            return Err(err);
965        }
966        
967        Ok(ChimeraDatabasePtr(database))
968    }
969    
970    /// Compile multiple Chimera patterns
971    pub fn compile_multi(
972        patterns: &[&str],
973        flags: &[Flags],
974        ids: &[u32],
975        mode: Mode,
976        platform: Option<&Platform>,
977    ) -> Result<ChimeraDatabasePtr, CompileError> {
978        assert_eq!(patterns.len(), flags.len());
979        assert_eq!(patterns.len(), ids.len());
980        
981        let pattern_ptrs: Vec<*const c_char> = patterns
982            .iter()
983            .map(|p| CString::new(*p).unwrap().into_raw() as *const c_char)
984            .collect();
985        
986        let flag_values: Vec<c_uint> = flags.iter().map(|f| f.bits()).collect();
987        
988        let mut database: *mut ch_database_t = ptr::null_mut();
989        let mut error: *mut ch_compile_error_t = ptr::null_mut();
990        
991        let platform_ptr = platform
992            .map(|p| &hs_platform_info {
993                tune: p.tune,
994                cpu_features: p.cpu_features,
995                reserved1: 0,
996                reserved2: 0,
997            } as *const _)
998            .unwrap_or(ptr::null());
999        
1000        let ret = unsafe {
1001            ch_compile_multi(
1002                pattern_ptrs.as_ptr(),
1003                flag_values.as_ptr(),
1004                ids.as_ptr(),
1005                patterns.len() as c_uint,
1006                mode.bits(),
1007                platform_ptr,
1008                &mut database,
1009                &mut error,
1010            )
1011        };
1012        
1013        // Clean up CStrings
1014        for ptr in pattern_ptrs {
1015            unsafe { CString::from_raw(ptr as *mut c_char); }
1016        }
1017        
1018        if ret != CH_SUCCESS as i32 {
1019            let err = unsafe { Self::extract_compile_error(error) };
1020            return Err(err);
1021        }
1022        
1023        Ok(ChimeraDatabasePtr(database))
1024    }
1025    
1026    /// Allocate scratch for Chimera
1027    pub fn alloc_scratch(database: &ChimeraDatabasePtr) -> Result<ChimeraScratchPtr, String> {
1028        let mut scratch: *mut ch_scratch_t = ptr::null_mut();
1029        
1030        let ret = unsafe {
1031            ch_alloc_scratch(database.0, &mut scratch)
1032        };
1033        
1034        if ret != CH_SUCCESS as i32 {
1035            return Err(format!("Failed to allocate Chimera scratch: {}", ret));
1036        }
1037        
1038        Ok(ChimeraScratchPtr(scratch))
1039    }
1040    
1041    /// Scan with Chimera (captures supported)
1042    pub fn scan<F>(
1043        database: &ChimeraDatabasePtr,
1044        data: &[u8],
1045        scratch: &mut ChimeraScratchPtr,
1046        mut on_match: F,
1047    ) -> Result<(), String>
1048    where
1049        F: FnMut(u32, u64, u64, u32, &[CaptureGroup]) -> Matching,
1050    {
1051        let context = &mut on_match as *mut _ as *mut c_void;
1052        
1053        let ret = unsafe {
1054            ch_scan(
1055                database.0,
1056                data.as_ptr() as *const c_char,
1057                data.len() as c_uint,
1058                0, // flags
1059                scratch.0,
1060                None, // error event handler
1061                Some(chimera_match_handler::<F>),
1062                context,
1063            )
1064        };
1065        
1066        match ret {
1067            x if x == CH_SUCCESS as i32 => Ok(()),
1068            x if x == CH_SCAN_TERMINATED as i32 => Ok(()),
1069            _ => Err(format!("Chimera scan failed: {}", ret)),
1070        }
1071    }
1072    
1073    /// Free Chimera database
1074    pub fn free_database(database: ChimeraDatabasePtr) {
1075        unsafe {
1076            ch_free_database(database.0);
1077        }
1078    }
1079    
1080    /// Free Chimera scratch
1081    pub fn free_scratch(scratch: ChimeraScratchPtr) {
1082        unsafe {
1083            ch_free_scratch(scratch.0);
1084        }
1085    }
1086    
1087    /// Extract compile error
1088    unsafe fn extract_compile_error(error: *mut ch_compile_error_t) -> CompileError {
1089        let message = CStr::from_ptr((*error).message)
1090            .to_string_lossy()
1091            .to_string();
1092        let position = VectorScan::extract_position_from_message(&message);
1093        
1094        let err = CompileError {
1095            message,
1096            expression: (*error).expression,
1097            position,
1098        };
1099        ch_free_compile_error(error);
1100        err
1101    }
1102}
1103
1104/// Chimera match handler
1105/// 
1106/// SAFETY: FFI Callback Safety for Chimera with capture groups
1107/// - Context pointer validity: Guaranteed by caller's lifetime
1108/// - No unwinding: Callback cannot panic across FFI boundary
1109/// - Capture group handling: Groups pointer is valid for callback duration
1110/// - Memory safety: All accessed memory is valid for callback duration
1111#[cfg(has_chimera)]
1112extern "C" fn chimera_match_handler<F>(
1113    id: c_uint,
1114    from: c_ulonglong,
1115    to: c_ulonglong,
1116    _flags: c_uint,
1117    captured: c_uint,
1118    groups: *const ch_capture_t,
1119    context: *mut c_void,
1120) -> c_int
1121where
1122    F: FnMut(u32, u64, u64, u32, &[CaptureGroup]) -> Matching,
1123{
1124    // SAFETY: context points to a valid F that outlives this callback
1125    let callback = unsafe { &mut *(context as *mut F) };
1126    
1127    // SAFETY: Convert capture groups from C representation
1128    // - groups pointer is valid if captured > 0
1129    // - groups points to an array of captured elements
1130    // - The array is valid for the duration of this callback
1131    let capture_groups = if captured > 0 && !groups.is_null() {
1132        unsafe {
1133            let groups_slice = slice::from_raw_parts(groups, captured as usize);
1134            groups_slice.iter().map(|g| CaptureGroup {
1135                active: g.flags & CH_CAPTURE_FLAG_ACTIVE != 0,
1136                from: g.from as i64,
1137                to: g.to as i64,
1138            }).collect::<Vec<_>>()
1139        }
1140    } else {
1141        Vec::new()
1142    };
1143    
1144    match callback(id, from, to, captured, &capture_groups) {
1145        Matching::Continue => 0,
1146        Matching::Terminate => 1,
1147    }
1148}
1149
1150/// Match event handler with SOM support
1151/// 
1152/// SAFETY: FFI Callback Safety
1153/// - Context pointer validity: Guaranteed by caller's lifetime
1154/// - No unwinding: Callback cannot panic across FFI boundary
1155/// - Memory safety: All accessed memory is valid for callback duration
1156/// - Thread safety: Callback may be called from any thread
1157extern "C" fn match_handler_som<F>(
1158    id: c_uint,
1159    from: c_ulonglong,
1160    to: c_ulonglong,
1161    flags: c_uint,
1162    context: *mut c_void,
1163) -> c_int
1164where
1165    F: FnMut(u32, u64, u64, u32) -> Matching,
1166{
1167    // SAFETY: context points to a valid F that outlives this callback
1168    // The callback was passed as &mut F where F lives on the caller's stack
1169    let callback = unsafe { &mut *(context as *mut F) };
1170    match callback(id, from, to, flags) {
1171        Matching::Continue => 0,
1172        Matching::Terminate => 1,
1173    }
1174}
1175
1176/// Match event handler
1177/// 
1178/// SAFETY: FFI Callback Safety
1179/// - Context pointer validity: Guaranteed by caller's lifetime
1180/// - No unwinding: Callback cannot panic across FFI boundary
1181/// - Memory safety: All accessed memory is valid for callback duration
1182/// - Thread safety: Callback may be called from any thread
1183extern "C" fn match_handler<F>(
1184    id: c_uint,
1185    from: c_ulonglong,
1186    to: c_ulonglong,
1187    _flags: c_uint,
1188    context: *mut c_void,
1189) -> c_int
1190where
1191    F: FnMut(u32, u64, u64) -> Matching,
1192{
1193    // SAFETY: context points to a valid F that outlives this callback
1194    // The callback was passed as &mut F where F lives on the caller's stack
1195    let callback = unsafe { &mut *(context as *mut F) };
1196    match callback(id, from, to) {
1197        Matching::Continue => 0,
1198        Matching::Terminate => 1,
1199    }
1200}
1201
1202// Ensure bitflags is available
1203pub use bitflags;
1204
1205#[cfg(test)]
1206mod tests {
1207    use super::*;
1208    
1209    #[test]
1210    fn test_version() {
1211        let version = VectorScan::version();
1212        assert!(!version.is_empty());
1213        println!("VectorScan version: {}", version);
1214    }
1215    
1216    #[test]
1217    fn test_platform() {
1218        assert!(VectorScan::valid_platform().is_ok());
1219    }
1220}
1221
1222#[cfg(all(test, miri))]
1223mod miri_tests {
1224    use super::*;
1225    
1226    #[test]
1227    fn test_database_ptr_safety() {
1228        // Test that DatabasePtr properly manages memory
1229        let pattern = "test";
1230        let db = VectorScan::compile(pattern, Flags::empty(), Mode::BLOCK, None).unwrap();
1231        
1232        // DatabasePtr should be Send + Sync
1233        std::thread::spawn(move || {
1234            let _db2 = db;
1235        }).join().unwrap();
1236    }
1237    
1238    #[test]
1239    fn test_scratch_ptr_safety() {
1240        // Test scratch allocation and deallocation
1241        let pattern = "test";
1242        let db = VectorScan::compile(pattern, Flags::empty(), Mode::BLOCK, None).unwrap();
1243        let scratch = VectorScan::alloc_scratch(&db).unwrap();
1244        
1245        // Scratch should be Send
1246        std::thread::spawn(move || {
1247            let _scratch2 = scratch;
1248        }).join().unwrap();
1249    }
1250    
1251    #[test]
1252    fn test_callback_pointer_safety() {
1253        // Test that callbacks don't cause UB
1254        let pattern = "test";
1255        let db = VectorScan::compile(pattern, Flags::empty(), Mode::BLOCK, None).unwrap();
1256        let mut scratch = VectorScan::alloc_scratch(&db).unwrap();
1257        
1258        let mut called = false;
1259        VectorScan::scan(&db, b"test", &mut scratch, |_, _, _| {
1260            called = true;
1261            Matching::Continue
1262        }).unwrap();
1263        
1264        assert!(called);
1265    }
1266    
1267    #[test]
1268    fn test_cstring_safety() {
1269        // Test CString conversions
1270        let patterns = vec!["test1", "test2"];
1271        let pattern_ptrs: Vec<*const c_char> = patterns
1272            .iter()
1273            .map(|p| CString::new(*p).unwrap().into_raw() as *const c_char)
1274            .collect();
1275        
1276        // Clean up properly
1277        for ptr in pattern_ptrs {
1278            unsafe { CString::from_raw(ptr as *mut c_char); }
1279        }
1280    }
1281    
1282    #[test]
1283    fn test_slice_from_raw_parts_safety() {
1284        // Test that slice creation is safe
1285        let data = vec![1u8, 2, 3, 4, 5];
1286        let ptr = data.as_ptr();
1287        let len = data.len();
1288        
1289        let slice = unsafe {
1290            slice::from_raw_parts(ptr, len)
1291        };
1292        
1293        assert_eq!(slice, &data[..]);
1294    }
1295}