Skip to main content

ArchiveProcessor

Struct ArchiveProcessor 

Source
pub struct ArchiveProcessor { /* private fields */ }
Expand description

Processes archives by sanitizing each contained file and rebuilding the archive with the same format and preserved metadata.

§Usage

use sanitize_engine::processor::archive::{ArchiveProcessor, ArchiveFormat};
use sanitize_engine::processor::registry::ProcessorRegistry;
use sanitize_engine::scanner::{StreamScanner, ScanPattern, ScanConfig};
use sanitize_engine::generator::HmacGenerator;
use sanitize_engine::store::MappingStore;
use sanitize_engine::category::Category;
use std::sync::Arc;

let gen = Arc::new(HmacGenerator::new([42u8; 32]));
let store = Arc::new(MappingStore::new(gen, None));
let patterns = vec![
    ScanPattern::from_regex(r"secret\w+", Category::Custom("secret".into()), "secrets").unwrap(),
];
let scanner = Arc::new(
    StreamScanner::new(patterns, Arc::clone(&store), ScanConfig::default()).unwrap(),
);
let registry = Arc::new(ProcessorRegistry::with_builtins());

let archive_proc = ArchiveProcessor::new(registry, scanner, store, vec![]);

Implementations§

Source§

impl ArchiveProcessor

Source

pub fn new( registry: Arc<ProcessorRegistry>, scanner: Arc<StreamScanner>, store: Arc<MappingStore>, profiles: Vec<FileTypeProfile>, ) -> Self

Create a new archive processor.

§Arguments
  • registry — structured processor registry.
  • scanner — streaming scanner for fallback.
  • store — shared mapping store for one-way dedup replacements.
  • profiles — file-type profiles for structured matching.
Source

pub fn with_max_depth(self, depth: u32) -> Self

Override the maximum nesting depth for recursive archive processing.

The default is DEFAULT_MAX_ARCHIVE_DEPTH (3). Values above 10 are clamped.

Source

pub fn process_tar<R: Read, W: Write>( &self, reader: R, writer: W, ) -> Result<ArchiveStats>

Process a .tar archive, sanitizing each file entry and rebuilding the archive with preserved metadata.

Entries that are not regular files (directories, symlinks, etc.) are copied through unchanged.

§Errors

Returns SanitizeError::ArchiveError on I/O failures or SanitizeError::RecursionDepthExceeded for nested archives.

Source

pub fn process_tar_gz<R: Read, W: Write>( &self, reader: R, writer: W, ) -> Result<ArchiveStats>

Process a .tar.gz archive (gzip-compressed tar).

Decompresses on the fly, processes each entry, and recompresses the output.

§Errors

Returns SanitizeError::ArchiveError on I/O failures or SanitizeError::RecursionDepthExceeded for nested archives.

Source

pub fn process_zip<R: Read + Seek, W: Write + Seek>( &self, reader: R, writer: W, ) -> Result<ArchiveStats>

Process a .zip archive, sanitizing each file entry and rebuilding the archive with preserved metadata.

§Type Bounds

Zip requires seekable I/O for both reading and writing.

§Errors

Returns SanitizeError::ArchiveError on I/O failures or SanitizeError::RecursionDepthExceeded for nested archives.

Source

pub fn process<R: Read + Seek, W: Write + Seek>( &self, reader: R, writer: W, format: ArchiveFormat, ) -> Result<ArchiveStats>

Auto-detect the archive format and process accordingly.

For zip archives the reader must additionally implement Seek. This method accepts Read + Seek to cover all formats uniformly. Tar and tar.gz do not require seeking, but the bound is imposed for a single entry point.

§Errors

Returns SanitizeError::ArchiveError on I/O failures or SanitizeError::RecursionDepthExceeded for nested archives.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V