#![warn(missing_docs, clippy::pedantic)]
#![allow(unknown_lints)]
#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
#![forbid(unsafe_code)]
pub mod bloom;
pub mod builder;
pub mod detect;
pub mod extract;
#[cfg(feature = "gzip")]
pub mod gzip;
pub mod index;
#[cfg(feature = "lz4")]
pub mod lz4;
#[cfg(feature = "snappy")]
pub mod snappy;
#[cfg(feature = "zstd")]
pub mod zstd;
pub use builder::{CompressedIndexBuilder, StreamingIndexBuilder};
pub use extract::extract_from_bytes;
#[cfg(feature = "gzip")]
pub use extract::scan_tarball_literals;
pub use extract::CompressedBlock;
#[cfg(feature = "gzip")]
pub use flashsieve::NgramBloom;
pub use index::{BloomStats, CompressedIndex};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum CompressionFormat {
Lz4,
Snappy,
Gzip,
Zstd,
}
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum ZiftError {
#[error("unsupported compression format: {0}")]
UnsupportedFormat(CompressionFormat),
#[error("invalid compressed data at offset {offset}: {reason}")]
InvalidData {
offset: usize,
reason: String,
},
#[error("format {format} not enabled, compile with --features {feature}")]
FeatureNotEnabled {
format: CompressionFormat,
feature: &'static str,
},
#[error("block size {size} exceeds maximum {max}")]
BlockTooLarge {
size: usize,
max: usize,
},
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
}
#[cfg(feature = "gzip")]
pub fn bloom_from_literals(
blocks: &[CompressedBlock],
num_bits: usize,
) -> flashsieve::Result<NgramBloom> {
let mut bloom = NgramBloom::new(num_bits)?;
for block in blocks {
for window in block.literals.windows(2) {
bloom.insert_ngram(window[0], window[1]);
}
}
Ok(bloom)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compression_format_display() {
assert_eq!(CompressionFormat::Lz4.to_string(), "LZ4");
}
#[test]
fn test_compressed_block_verify() {
let block = CompressedBlock {
compressed_offset: 0,
compressed_len: 100,
uncompressed_len: Some(200),
literals: b"hello world ERROR message".to_vec(),
};
assert!(block.verify_contains(b"ERROR"));
assert!(block.verify_contains(b"hello"));
assert!(!block.verify_contains(b"FATAL"));
assert!(block.verify_contains(b""));
}
#[test]
fn test_compressed_block_empty_literals() {
let block = CompressedBlock {
compressed_offset: 0,
compressed_len: 0,
uncompressed_len: Some(0),
literals: Vec::new(),
};
assert!(!block.verify_contains(b"anything"));
assert!(block.verify_contains(b""));
}
#[test]
fn test_zift_error_feature_not_enabled() {
let err = ZiftError::FeatureNotEnabled {
format: CompressionFormat::Lz4,
feature: "lz4",
};
assert!(err.to_string().contains("lz4"));
}
#[test]
fn test_builder_pattern() {
let builder = CompressedIndexBuilder::new(CompressionFormat::Lz4)
.expected_items(1000)
.false_positive_rate(0.01);
assert_eq!(
builder
.expected_items(1000)
.expected_items(1000)
.bloom_bits(1024)
.bloom_hashes(3)
.expected_items,
Some(1000)
);
}
}