1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
//! Matchy - Fast Database for IP Address and Pattern Matching
//!
//! Matchy is a high-performance database library for querying IP addresses, CIDR ranges,
//! and glob patterns with rich associated data. Perfect for threat intelligence, GeoIP,
//! domain categorization, and network security applications.
//!
//! # Quick Start - Unified Database
//!
//! ```rust
//! use matchy::{Database, DatabaseBuilder, MatchMode, DataValue};
//! use std::collections::HashMap;
//!
//! // Build a database with both IP and pattern entries
//! let mut builder = DatabaseBuilder::new(MatchMode::CaseSensitive);
//!
//! // Add IP address
//! let mut data = HashMap::new();
//! data.insert("threat_level".to_string(), DataValue::String("high".to_string()));
//! builder.add_entry("1.2.3.4", data)?;
//!
//! // Add pattern
//! let mut data = HashMap::new();
//! data.insert("category".to_string(), DataValue::String("malware".to_string()));
//! builder.add_entry("*.evil.com", data)?;
//!
//! // Build and save
//! let db_bytes = builder.build()?;
//! # let tmp_path = std::env::temp_dir().join("matchy_doctest_threats.db");
//! # std::fs::write(&tmp_path, db_bytes)?;
//!
//! // Query the database
//! # let db = Database::from(tmp_path.to_str().unwrap()).open()?;
//! # // Cleanup
//! # let _ = std::fs::remove_file(&tmp_path);
//! #
//! # // For documentation purposes, show it as:
//! # /*
//! let db = Database::from("threats.db").open()?;
//!
//! // Automatic IP detection
//! if let Some(result) = db.lookup("1.2.3.4")? {
//! println!("Found: {:?}", result);
//! }
//!
//! // Automatic pattern matching
//! if let Some(result) = db.lookup("malware.evil.com")? {
//! println!("Matches pattern: {:?}", result);
//! }
//! # */
//! # Ok::<(), Box<dyn std::error::Error>>(())
//! ```
//!
//! # Key Features
//!
//! - **Unified Queries**: Automatically detects IP addresses vs patterns
//! - **Rich Data**: Store JSON-like structured data with each entry
//! - **Zero-Copy Loading**: Memory-mapped files load instantly (~1ms)
//! - **MMDB Compatible**: Drop-in replacement for libmaxminddb
//! - **Shared Memory**: Multiple processes share physical RAM
//! - **C/C++ API**: Stable FFI for any language
//! - **Fast Lookups**: O(log n) for IPs, O(n) for patterns
//!
//! # Architecture
//!
//! Matchy uses a hybrid binary format combining IP tree structures with
//! pattern matching automata:
//!
//! ```text
//! ┌──────────────────────────────────────┐
//! │ Database File Format │
//! ├──────────────────────────────────────┤
//! │ 1. IP Search Tree (binary trie) │
//! │ 2. Data Section (deduplicated) │
//! │ 3. Pattern Matcher (Aho-Corasick) │
//! │ 4. Metadata │
//! └──────────────────────────────────────┘
//! ↓ mmap() syscall (~1ms)
//! ┌──────────────────────────────────────┐
//! │ Memory (read-only, shared) │
//! │ Ready for queries immediately! │
//! └──────────────────────────────────────┘
//! ```
// Module declarations
// Public modules (documented API)
/// Unified database API
/// Error types for Paraglob operations
/// Fast extraction of structured patterns (domains, IPs, emails) from text
///
/// Re-exported from matchy-extractor crate for convenience.
pub use matchy_extractor as extractor;
/// File reading utilities with automatic gzip decompression
/// MISP JSON threat intelligence importer
// Internal format imports (not re-exported to users)
use mmdb;
/// Extension traits for DatabaseBuilder with schema support
/// Batch processing infrastructure for efficient file analysis
///
/// General-purpose building blocks for sequential or parallel line-oriented processing:
/// - `LineFileReader` - Chunks files with gzip support
/// - `Worker` - Processes batches with extraction + matching
/// - `LineBatch`, `MatchResult`, `LineMatch` - Data structures
/// Schema validation for yield values
///
/// Validates that yield values conform to a schema during database building.
/// Built-in database schemas for yield value validation
///
/// Matchy includes schemas that define the structure of yield values
/// for common database types like ThreatDB.
/// SIMD-accelerated utilities for pattern matching
///
/// Provides optimized implementations of common operations using SIMD instructions:
/// - ASCII lowercase conversion (4-8x faster than iterator chains)
/// - Byte searching and comparison
///
/// Re-exported from `matchy_paraglob` for convenience.
pub use simd_utils;
/// Database validation for untrusted files
///
/// Provides comprehensive validation of `.mxy` database files including:
/// - **Standard**: All offsets, UTF-8 validation, basic structure
/// - **Strict**: Deep graph analysis, cycles, redundancy checks
/// Live database with automatic file watching and optional network updates (native only)
// Public C API (native platforms only - FFI not available on WASM)
// Re-exports for Rust consumers
/// Unified database for IP and pattern lookups
pub use crate;
pub use crate;
/// Data value type for database entries
pub use DataValue;
/// Main error type for matchy operations
pub use crate;
/// Match mode for text operations (case sensitive/insensitive)
pub use MatchMode;
// Re-export component error types for advanced users
pub use crate;
/// Unified database builder for creating databases with IP addresses and patterns
///
/// This is the primary API for building databases. It automatically detects whether
/// entries are IP addresses (including CIDRs) or glob patterns and handles them appropriately.
///
/// # Example
/// ```rust,no_run
/// use matchy::{DatabaseBuilder, MatchMode};
/// use std::collections::HashMap;
/// use matchy::DataValue;
///
/// let mut builder = DatabaseBuilder::new(MatchMode::CaseSensitive);
///
/// // Add IP entries
/// let mut data = HashMap::new();
/// data.insert("threat_level".to_string(), DataValue::String("high".to_string()));
/// builder.add_entry("1.2.3.4", data)?;
///
/// // Add pattern entries
/// let mut data = HashMap::new();
/// data.insert("category".to_string(), DataValue::String("malware".to_string()));
/// builder.add_entry("*.evil.com", data)?;
///
/// // Build and save
/// let db_bytes = builder.build()?;
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub use DatabaseBuilder;
/// Entry type classification for database builder
///
/// Represents whether an entry should be treated as an IP address, literal string,
/// or glob pattern. Used with [`DatabaseBuilder::detect_entry_type`] for explicit
/// type control.
pub use EntryType;
/// Trait for custom entry validation
///
/// Implement this trait to provide custom validation logic for entries
/// being added to a [`DatabaseBuilder`]. Use with
/// [`DatabaseBuilder::with_validator`](matchy_format::DatabaseBuilder::with_validator).
///
/// For schema-based validation, use [`DatabaseBuilderExt::with_schema`] instead,
/// which uses the built-in [`SchemaValidator`].
pub use EntryValidator;
// Schema validation re-exports
/// Schema validator for validating yield values against JSON schemas
///
/// Use this to validate data before adding entries to a database with a known schema.
///
/// # Example
/// ```rust,no_run
/// use matchy::SchemaValidator;
/// use matchy::DataValue;
/// use std::collections::HashMap;
///
/// // Create validator for ThreatDB schema
/// let validator = SchemaValidator::new("threatdb")?;
///
/// // Validate a yield value
/// let mut data = HashMap::new();
/// data.insert("threat_level".to_string(), DataValue::String("high".to_string()));
/// data.insert("category".to_string(), DataValue::String("malware".to_string()));
/// data.insert("source".to_string(), DataValue::String("abuse.ch".to_string()));
///
/// validator.validate(&data)?; // Ok - valid ThreatDB entry
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub use crate;
/// Extension trait for adding schema validation to DatabaseBuilder
///
/// Import this trait to use the [`with_schema`](DatabaseBuilderExt::with_schema) method
/// on [`DatabaseBuilder`].
///
/// # Example
///
/// ```rust,ignore
/// use matchy::{DatabaseBuilder, DatabaseBuilderExt, MatchMode};
///
/// let mut builder = DatabaseBuilder::new(MatchMode::CaseInsensitive)
/// .with_schema("threatdb")?;
///
/// // Entries are now validated against ThreatDB schema
/// ```
pub use crateDatabaseBuilderExt;
// Version information
/// Library version string
pub const MATCHY_VERSION: &str = env!;
/// Library major version
pub const MATCHY_VERSION_MAJOR: &str = env!;
/// Library minor version
pub const MATCHY_VERSION_MINOR: &str = env!;
/// Library patch version
pub const MATCHY_VERSION_PATCH: &str = env!;