matchy 2.0.1

Fast database for IP address and pattern matching with rich data storage
Documentation
//! Matchy - Fast Database for IP Address and Pattern Matching
//!
//! Matchy is a high-performance database library for querying IP addresses, CIDR ranges,
//! and glob patterns with rich associated data. Perfect for threat intelligence, GeoIP,
//! domain categorization, and network security applications.
//!
//! # Quick Start - Unified Database
//!
//! ```rust
//! use matchy::{Database, DatabaseBuilder, MatchMode, DataValue};
//! use std::collections::HashMap;
//!
//! // Build a database with both IP and pattern entries
//! let mut builder = DatabaseBuilder::new(MatchMode::CaseSensitive);
//!
//! // Add IP address
//! let mut data = HashMap::new();
//! data.insert("threat_level".to_string(), DataValue::String("high".to_string()));
//! builder.add_entry("1.2.3.4", data)?;
//!
//! // Add pattern
//! let mut data = HashMap::new();
//! data.insert("category".to_string(), DataValue::String("malware".to_string()));
//! builder.add_entry("*.evil.com", data)?;
//!
//! // Build and save
//! let db_bytes = builder.build()?;
//! # let tmp_path = std::env::temp_dir().join("matchy_doctest_threats.db");
//! # std::fs::write(&tmp_path, db_bytes)?;
//!
//! // Query the database
//! # let db = Database::from(tmp_path.to_str().unwrap()).open()?;
//! # // Cleanup
//! # let _ = std::fs::remove_file(&tmp_path);
//! #
//! # // For documentation purposes, show it as:
//! # /*
//! let db = Database::from("threats.db").open()?;
//!
//! // Automatic IP detection
//! if let Some(result) = db.lookup("1.2.3.4")? {
//!     println!("Found: {:?}", result);
//! }
//!
//! // Automatic pattern matching
//! if let Some(result) = db.lookup("malware.evil.com")? {
//!     println!("Matches pattern: {:?}", result);
//! }
//! # */
//! # Ok::<(), Box<dyn std::error::Error>>(())
//! ```
//!
//! # Key Features
//!
//! - **Unified Queries**: Automatically detects IP addresses vs patterns
//! - **Rich Data**: Store JSON-like structured data with each entry
//! - **Zero-Copy Loading**: Memory-mapped files load instantly (~1ms)
//! - **MMDB Compatible**: Drop-in replacement for libmaxminddb
//! - **Shared Memory**: Multiple processes share physical RAM
//! - **C/C++ API**: Stable FFI for any language
//! - **Fast Lookups**: O(log n) for IPs, O(n) for patterns
//!
//! # Architecture
//!
//! Matchy uses a hybrid binary format combining IP tree structures with
//! pattern matching automata:
//!
//! ```text
//! ┌──────────────────────────────────────┐
//! │  Database File Format                │
//! ├──────────────────────────────────────┤
//! │  1. IP Search Tree (binary trie)     │
//! │  2. Data Section (deduplicated)      │
//! │  3. Pattern Matcher (Aho-Corasick)   │
//! │  4. Metadata                         │
//! └──────────────────────────────────────┘
//!          ↓ mmap() syscall (~1ms)
//! ┌──────────────────────────────────────┐
//! │  Memory (read-only, shared)          │
//! │  Ready for queries immediately!      │
//! └──────────────────────────────────────┘
//! ```

#![warn(missing_docs)]
#![warn(clippy::all)]

// Module declarations

// Public modules (documented API)
/// Unified database API
pub mod database;
/// Error types for Paraglob operations
pub mod error;
/// Fast extraction of structured patterns (domains, IPs, emails) from text
///
/// Re-exported from matchy-extractor crate for convenience.
pub use matchy_extractor as extractor;
/// File reading utilities with automatic gzip decompression
pub mod file_reader;
/// MISP JSON threat intelligence importer
pub mod misp_importer;

// Internal format imports (not re-exported to users)
use matchy_format::mmdb;

/// Extension traits for DatabaseBuilder with schema support
mod builder_ext;
/// Batch processing infrastructure for efficient file analysis
///
/// General-purpose building blocks for sequential or parallel line-oriented processing:
/// - `LineFileReader` - Chunks files with gzip support
/// - `Worker` - Processes batches with extraction + matching  
/// - `LineBatch`, `MatchResult`, `LineMatch` - Data structures
pub mod processing;
/// Schema validation for yield values
///
/// Validates that yield values conform to a schema during database building.
pub mod schema_validation;
/// Built-in database schemas for yield value validation
///
/// Matchy includes schemas that define the structure of yield values
/// for common database types like ThreatDB.
pub mod schemas;
/// SIMD-accelerated utilities for pattern matching
///
/// Provides optimized implementations of common operations using SIMD instructions:
/// - ASCII lowercase conversion (4-8x faster than iterator chains)
/// - Byte searching and comparison
///
/// Re-exported from `matchy_paraglob` for convenience.
pub use matchy_paraglob::simd_utils;
/// Database validation for untrusted files
///
/// Provides comprehensive validation of `.mxy` database files including:
/// - **Standard**: All offsets, UTF-8 validation, basic structure
/// - **Strict**: Deep graph analysis, cycles, redundancy checks
pub mod validation;

/// Live database with automatic file watching and optional network updates (native only)
#[cfg(not(target_family = "wasm"))]
pub mod updater;

// Public C API (native platforms only - FFI not available on WASM)
#[cfg(not(target_family = "wasm"))]
pub mod c_api;

// Re-exports for Rust consumers

/// Unified database for IP and pattern lookups
pub use crate::database::{
    Database, DatabaseError, DatabaseOpener, DatabaseOptions, DatabaseStats, LookupRef, QueryResult,
};

#[cfg(not(target_family = "wasm"))]
pub use crate::database::{ReloadEvent, ReloadSource};

/// Data value type for database entries
pub use matchy_data_format::DataValue;

/// Main error type for matchy operations
pub use crate::error::{MatchyError, Result};
/// Match mode for text operations (case sensitive/insensitive)
pub use matchy_match_mode::MatchMode;

// Re-export component error types for advanced users
pub use crate::error::{FormatError, ParaglobError};

/// Unified database builder for creating databases with IP addresses and patterns
///
/// This is the primary API for building databases. It automatically detects whether
/// entries are IP addresses (including CIDRs) or glob patterns and handles them appropriately.
///
/// # Example
/// ```rust,no_run
/// use matchy::{DatabaseBuilder, MatchMode};
/// use std::collections::HashMap;
/// use matchy::DataValue;
///
/// let mut builder = DatabaseBuilder::new(MatchMode::CaseSensitive);
///
/// // Add IP entries
/// let mut data = HashMap::new();
/// data.insert("threat_level".to_string(), DataValue::String("high".to_string()));
/// builder.add_entry("1.2.3.4", data)?;
///
/// // Add pattern entries
/// let mut data = HashMap::new();
/// data.insert("category".to_string(), DataValue::String("malware".to_string()));
/// builder.add_entry("*.evil.com", data)?;
///
/// // Build and save
/// let db_bytes = builder.build()?;
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub use matchy_format::DatabaseBuilder;

/// Entry type classification for database builder
///
/// Represents whether an entry should be treated as an IP address, literal string,
/// or glob pattern. Used with [`DatabaseBuilder::detect_entry_type`] for explicit
/// type control.
pub use matchy_format::EntryType;

/// Trait for custom entry validation
///
/// Implement this trait to provide custom validation logic for entries
/// being added to a [`DatabaseBuilder`]. Use with
/// [`DatabaseBuilder::with_validator`](matchy_format::DatabaseBuilder::with_validator).
///
/// For schema-based validation, use [`DatabaseBuilderExt::with_schema`] instead,
/// which uses the built-in [`SchemaValidator`].
pub use matchy_format::EntryValidator;

// Schema validation re-exports
/// Schema validator for validating yield values against JSON schemas
///
/// Use this to validate data before adding entries to a database with a known schema.
///
/// # Example
/// ```rust,no_run
/// use matchy::SchemaValidator;
/// use matchy::DataValue;
/// use std::collections::HashMap;
///
/// // Create validator for ThreatDB schema
/// let validator = SchemaValidator::new("threatdb")?;
///
/// // Validate a yield value
/// let mut data = HashMap::new();
/// data.insert("threat_level".to_string(), DataValue::String("high".to_string()));
/// data.insert("category".to_string(), DataValue::String("malware".to_string()));
/// data.insert("source".to_string(), DataValue::String("abuse.ch".to_string()));
///
/// validator.validate(&data)?; // Ok - valid ThreatDB entry
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub use crate::schema_validation::{
    SchemaError, SchemaValidationError, SchemaValidator, ValidationErrorDetail,
};

/// Extension trait for adding schema validation to DatabaseBuilder
///
/// Import this trait to use the [`with_schema`](DatabaseBuilderExt::with_schema) method
/// on [`DatabaseBuilder`].
///
/// # Example
///
/// ```rust,ignore
/// use matchy::{DatabaseBuilder, DatabaseBuilderExt, MatchMode};
///
/// let mut builder = DatabaseBuilder::new(MatchMode::CaseInsensitive)
///     .with_schema("threatdb")?;
///
/// // Entries are now validated against ThreatDB schema
/// ```
pub use crate::builder_ext::DatabaseBuilderExt;

// Version information
/// Library version string
pub const MATCHY_VERSION: &str = env!("CARGO_PKG_VERSION");

/// Library major version
pub const MATCHY_VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");

/// Library minor version
pub const MATCHY_VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");

/// Library patch version
pub const MATCHY_VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_version() {
        // Verify version components parse as valid numbers
        assert!(MATCHY_VERSION_MAJOR.parse::<u32>().is_ok());
        assert!(MATCHY_VERSION_MINOR.parse::<u32>().is_ok());
        assert!(MATCHY_VERSION_PATCH.parse::<u32>().is_ok());

        // Verify full version matches format
        let expected =
            format!("{MATCHY_VERSION_MAJOR}.{MATCHY_VERSION_MINOR}.{MATCHY_VERSION_PATCH}");
        assert_eq!(MATCHY_VERSION, expected);
    }
}