pub struct ArchiveProcessor { /* private fields */ }Expand description
Processes archives by sanitizing each contained file and rebuilding the archive with the same format and preserved metadata.
§Usage
use sanitize_engine::processor::archive::{ArchiveProcessor, ArchiveFormat};
use sanitize_engine::processor::registry::ProcessorRegistry;
use sanitize_engine::scanner::{StreamScanner, ScanPattern, ScanConfig};
use sanitize_engine::generator::HmacGenerator;
use sanitize_engine::store::MappingStore;
use sanitize_engine::category::Category;
use std::sync::Arc;
let gen = Arc::new(HmacGenerator::new([42u8; 32]));
let store = Arc::new(MappingStore::new(gen, None));
let patterns = vec![
ScanPattern::from_regex(r"secret\w+", Category::Custom("secret".into()), "secrets").unwrap(),
];
let scanner = Arc::new(
StreamScanner::new(patterns, Arc::clone(&store), ScanConfig::default()).unwrap(),
);
let registry = Arc::new(ProcessorRegistry::with_builtins());
let archive_proc = ArchiveProcessor::new(registry, scanner, store, vec![]);Implementations§
Source§impl ArchiveProcessor
impl ArchiveProcessor
Sourcepub fn new(
registry: Arc<ProcessorRegistry>,
scanner: Arc<StreamScanner>,
store: Arc<MappingStore>,
profiles: Vec<FileTypeProfile>,
) -> Self
pub fn new( registry: Arc<ProcessorRegistry>, scanner: Arc<StreamScanner>, store: Arc<MappingStore>, profiles: Vec<FileTypeProfile>, ) -> Self
Create a new archive processor.
§Arguments
registry— structured processor registry.scanner— streaming scanner for fallback.store— shared mapping store for one-way dedup replacements.profiles— file-type profiles for structured matching.
Sourcepub fn with_max_depth(self, depth: u32) -> Self
pub fn with_max_depth(self, depth: u32) -> Self
Override the maximum nesting depth for recursive archive processing.
The default is DEFAULT_MAX_ARCHIVE_DEPTH (3). Values above
10 are clamped.
Sourcepub fn process_tar<R: Read, W: Write>(
&self,
reader: R,
writer: W,
) -> Result<ArchiveStats>
pub fn process_tar<R: Read, W: Write>( &self, reader: R, writer: W, ) -> Result<ArchiveStats>
Process a .tar archive, sanitizing each file entry and
rebuilding the archive with preserved metadata.
Entries that are not regular files (directories, symlinks, etc.) are copied through unchanged.
§Errors
Returns SanitizeError::ArchiveError on I/O failures or
SanitizeError::RecursionDepthExceeded for nested archives.
Sourcepub fn process_tar_gz<R: Read, W: Write>(
&self,
reader: R,
writer: W,
) -> Result<ArchiveStats>
pub fn process_tar_gz<R: Read, W: Write>( &self, reader: R, writer: W, ) -> Result<ArchiveStats>
Process a .tar.gz archive (gzip-compressed tar).
Decompresses on the fly, processes each entry, and recompresses the output.
§Errors
Returns SanitizeError::ArchiveError on I/O failures or
SanitizeError::RecursionDepthExceeded for nested archives.
Sourcepub fn process_zip<R: Read + Seek, W: Write + Seek>(
&self,
reader: R,
writer: W,
) -> Result<ArchiveStats>
pub fn process_zip<R: Read + Seek, W: Write + Seek>( &self, reader: R, writer: W, ) -> Result<ArchiveStats>
Process a .zip archive, sanitizing each file entry and
rebuilding the archive with preserved metadata.
§Type Bounds
Zip requires seekable I/O for both reading and writing.
§Errors
Returns SanitizeError::ArchiveError on I/O failures or
SanitizeError::RecursionDepthExceeded for nested archives.
Sourcepub fn process<R: Read + Seek, W: Write + Seek>(
&self,
reader: R,
writer: W,
format: ArchiveFormat,
) -> Result<ArchiveStats>
pub fn process<R: Read + Seek, W: Write + Seek>( &self, reader: R, writer: W, format: ArchiveFormat, ) -> Result<ArchiveStats>
Auto-detect the archive format and process accordingly.
For zip archives the reader must additionally implement Seek.
This method accepts Read + Seek to cover all formats uniformly.
Tar and tar.gz do not require seeking, but the bound is imposed
for a single entry point.
§Errors
Returns SanitizeError::ArchiveError on I/O failures or
SanitizeError::RecursionDepthExceeded for nested archives.