Skip to main content

sanitize_engine/
lib.rs

1//! # sanitize-engine
2//!
3//! Deterministic, one-way data sanitization engine.
4//!
5//! This crate provides the core replacement infrastructure for replacing
6//! sensitive values with category-aware, deterministic substitutes.
7//! Replacements are **one-way only** — there is no key file, mapping
8//! table, or restore mode. It is the foundation layer consumed by
9//! higher-level streaming and CLI components.
10//!
11//! ## Key Components
12//!
13//! - [`category::Category`] — Classification of sensitive values (email,
14//!   IP, name, etc.) that determines replacement format.
15//! - [`generator::ReplacementGenerator`] — Trait abstracting replacement
16//!   strategy (HMAC-deterministic or CSPRNG-random).
17//! - [`strategy::Strategy`] — Pluggable replacement strategies that can
18//!   be called **directly** without any mapping table.
19//! - [`store::MappingStore`] — Optional thread-safe per-run dedup cache
20//!   ensuring the same input always maps to the same output within a run.
21//! - [`scanner::StreamScanner`] — Streaming regex scanner with chunk +
22//!   overlap for bounded-memory processing.
23//!
24//! ## Concurrency Model
25//!
26//! The `MappingStore` uses `DashMap` (shard-level locking) for the forward
27//! dedup cache. All types are `Send + Sync`.
28//!
29//! ## Stability
30//!
31//! This crate is pre-1.0. The core guarantees — one-way replacement,
32//! deterministic mode, and length preservation — are stable. Processor
33//! heuristics, default limits, and report schema may evolve across minor
34//! versions.
35//!
36//! ## Example: Store-Level Replacement
37//!
38//! ```rust
39//! use sanitize_engine::category::Category;
40//! use sanitize_engine::generator::HmacGenerator;
41//! use sanitize_engine::store::MappingStore;
42//! use std::sync::Arc;
43//!
44//! // Create a deterministic generator with a fixed seed.
45//! let generator = Arc::new(HmacGenerator::new([42u8; 32]));
46//!
47//! // Create the replacement store (optional capacity limit).
48//! let store = MappingStore::new(generator, None);
49//!
50//! // Sanitize a value (one-way).
51//! let sanitized = store.get_or_insert(&Category::Email, "alice@corp.com").unwrap();
52//! assert!(sanitized.contains("@corp.com"));
53//! assert_eq!(sanitized.len(), "alice@corp.com".len());
54//!
55//! // Same input → same output (per-run consistency).
56//! let again = store.get_or_insert(&Category::Email, "alice@corp.com").unwrap();
57//! assert_eq!(sanitized, again);
58//! ```
59//!
60//! ## Example: Streaming Scanner
61//!
62//! ```rust
63//! use sanitize_engine::category::Category;
64//! use sanitize_engine::generator::HmacGenerator;
65//! use sanitize_engine::scanner::{ScanConfig, ScanPattern, StreamScanner};
66//! use sanitize_engine::store::MappingStore;
67//! use std::sync::Arc;
68//!
69//! // Build patterns.
70//! let patterns = vec![
71//!     ScanPattern::from_regex(r"alice@corp\.com", Category::Email, "alice_email").unwrap(),
72//! ];
73//!
74//! // Store with deterministic generator.
75//! let generator = Arc::new(HmacGenerator::new([42u8; 32]));
76//! let store = Arc::new(MappingStore::new(generator, Some(1_000_000)));
77//!
78//! // Scanner with default chunk config.
79//! let config = ScanConfig::new(1_048_576, 4096);
80//! let scanner = StreamScanner::new(patterns, store, config).unwrap();
81//!
82//! // Scan bytes in-memory.
83//! let input = b"Contact alice@corp.com for details.";
84//! let (output, stats) = scanner.scan_bytes(input).unwrap();
85//!
86//! assert_eq!(stats.replacements_applied, 1);
87//! assert_eq!(output.len(), input.len());
88//! ```
89
90// Crate-level lint configuration.
91#![forbid(unsafe_code)]
92#![warn(clippy::all, clippy::pedantic)]
93// Allow specific pedantic lints that are too noisy for this crate.
94#![allow(
95    clippy::module_name_repetitions,
96    clippy::missing_panics_doc,
97    clippy::must_use_candidate, // We add #[must_use] manually on key APIs.
98    clippy::uninlined_format_args,
99    clippy::redundant_closure_for_method_calls,
100    clippy::doc_markdown,
101    clippy::similar_names
102)]
103
104pub mod atomic;
105pub mod category;
106pub mod error;
107pub mod generator;
108pub mod processor;
109pub mod report;
110pub mod scanner;
111pub mod secrets;
112pub mod store;
113pub mod strategy;
114
115// Re-exports for convenience.
116pub use atomic::{atomic_write, AtomicFileWriter};
117pub use category::Category;
118pub use error::{Result, SanitizeError};
119pub use generator::{HmacGenerator, RandomGenerator, ReplacementGenerator};
120pub use processor::archive::{
121    ArchiveFilter, ArchiveFormat, ArchiveProcessor, ArchiveProgress, ArchiveStats,
122    DEFAULT_MAX_ARCHIVE_DEPTH,
123};
124pub use processor::{FieldRule, FileTypeProfile, Processor, ProcessorRegistry};
125pub use report::{FileReport, ReportBuilder, ReportMetadata, SanitizeReport};
126pub use scanner::{ScanConfig, ScanPattern, ScanProgress, ScanStats, StreamScanner};
127pub use secrets::{
128    decrypt_secrets, encrypt_secrets, load_secrets_auto, looks_encrypted, SecretEntry,
129    SecretsFormat,
130};
131pub use store::MappingStore;
132pub use strategy::{
133    EntropyMode, FakeIp, HmacHash, PreserveLength, RandomString, RandomUuid, Strategy,
134    StrategyGenerator,
135};