use super::{ArchiveEntry, ArchiveMetadata};
use crate::error::{KreuzbergError, Result};
use crate::extractors::security::SecurityLimits;
use flate2::read::GzDecoder;
use std::collections::HashMap;
use std::io::Read;
fn is_tar_archive(data: &[u8]) -> bool {
data.len() > 262 && &data[257..262] == b"ustar"
}
fn decompress_gzip_limited(bytes: &[u8], max_size: u64) -> Result<Vec<u8>> {
let decoder = GzDecoder::new(bytes);
let mut limited = decoder.take(max_size + 1);
let mut decompressed = Vec::new();
limited
.read_to_end(&mut decompressed)
.map_err(|e| KreuzbergError::parsing(format!("Failed to decompress gzip: {}", e)))?;
if decompressed.len() as u64 > max_size {
return Err(KreuzbergError::validation(format!(
"Gzip decompressed size exceeds {} byte limit",
max_size
)));
}
Ok(decompressed)
}
pub fn decompress_gzip(bytes: &[u8], limits: &SecurityLimits) -> Result<Vec<u8>> {
decompress_gzip_limited(bytes, limits.max_archive_size as u64)
}
pub fn extract_gzip(bytes: &[u8], limits: &SecurityLimits) -> Result<(ArchiveMetadata, HashMap<String, String>)> {
let decompressed = decompress_gzip_limited(bytes, limits.max_archive_size as u64)?;
if is_tar_archive(&decompressed) {
let mut metadata = super::tar::extract_tar_metadata(&decompressed, limits)?;
metadata.format = "GZIP+TAR".to_string();
let contents = super::tar::extract_tar_text_content(&decompressed, limits)?;
return Ok((metadata, contents));
}
let mut decoder = GzDecoder::new(bytes);
let mut _discard = [0u8; 1];
let _ = decoder.read(&mut _discard); let filename = decoder
.header()
.and_then(|h| h.filename())
.and_then(|f| std::str::from_utf8(f).ok())
.unwrap_or("compressed_content")
.to_string();
let size = decompressed.len() as u64;
let metadata = ArchiveMetadata {
format: "GZIP".to_string(),
file_list: vec![ArchiveEntry {
path: filename.clone(),
size,
is_dir: false,
}],
file_count: 1,
total_size: size,
};
let mut contents = HashMap::new();
if let Ok(text) = String::from_utf8(decompressed) {
contents.insert(filename, text);
}
Ok((metadata, contents))
}
pub fn extract_gzip_metadata(bytes: &[u8], limits: &SecurityLimits) -> Result<ArchiveMetadata> {
let decompressed = decompress_gzip_limited(bytes, limits.max_archive_size as u64)?;
if is_tar_archive(&decompressed) {
let mut metadata = super::tar::extract_tar_metadata(&decompressed, limits)?;
metadata.format = "GZIP+TAR".to_string();
return Ok(metadata);
}
let mut decoder = GzDecoder::new(bytes);
let mut _discard = [0u8; 1];
let _ = decoder.read(&mut _discard);
let filename = decoder
.header()
.and_then(|h| h.filename())
.and_then(|f| std::str::from_utf8(f).ok())
.unwrap_or("compressed_content")
.to_string();
let size = decompressed.len() as u64;
Ok(ArchiveMetadata {
format: "GZIP".to_string(),
file_list: vec![ArchiveEntry {
path: filename,
size,
is_dir: false,
}],
file_count: 1,
total_size: size,
})
}
pub fn extract_gzip_text_content(bytes: &[u8], limits: &SecurityLimits) -> Result<HashMap<String, String>> {
let decompressed = decompress_gzip_limited(bytes, limits.max_archive_size as u64)?;
if is_tar_archive(&decompressed) {
return super::tar::extract_tar_text_content(&decompressed, limits);
}
let mut decoder = GzDecoder::new(bytes);
let mut _discard = [0u8; 1];
let _ = decoder.read(&mut _discard);
let filename = decoder
.header()
.and_then(|h| h.filename())
.and_then(|f| std::str::from_utf8(f).ok())
.unwrap_or("compressed_content")
.to_string();
let mut contents = HashMap::new();
if let Ok(text) = String::from_utf8(decompressed) {
contents.insert(filename, text);
}
Ok(contents)
}
type GzipWithBytesResult = (ArchiveMetadata, HashMap<String, String>, HashMap<String, Vec<u8>>);
pub fn extract_gzip_with_bytes(bytes: &[u8], limits: &SecurityLimits) -> Result<GzipWithBytesResult> {
let decompressed = decompress_gzip_limited(bytes, limits.max_archive_size as u64)?;
if is_tar_archive(&decompressed) {
let mut metadata = super::tar::extract_tar_metadata(&decompressed, limits)?;
metadata.format = "GZIP+TAR".to_string();
let contents = super::tar::extract_tar_text_content(&decompressed, limits)?;
let file_bytes = super::tar::extract_tar_file_bytes(&decompressed, limits)?;
return Ok((metadata, contents, file_bytes));
}
let mut decoder = GzDecoder::new(bytes);
let mut _discard = [0u8; 1];
let _ = decoder.read(&mut _discard);
let filename = decoder
.header()
.and_then(|h| h.filename())
.and_then(|f| std::str::from_utf8(f).ok())
.unwrap_or("compressed_content")
.to_string();
let size = decompressed.len() as u64;
let metadata = ArchiveMetadata {
format: "GZIP".to_string(),
file_list: vec![ArchiveEntry {
path: filename.clone(),
size,
is_dir: false,
}],
file_count: 1,
total_size: size,
};
let mut file_bytes = HashMap::new();
file_bytes.insert(filename.clone(), decompressed.clone());
let mut contents = HashMap::new();
if let Ok(text) = String::from_utf8(decompressed) {
contents.insert(filename, text);
}
Ok((metadata, contents, file_bytes))
}