use std::fs::File;
use std::io::{BufWriter, Cursor, Seek, Write};
use std::path::Path;
use zip::write::FileOptions;
use zip::ZipWriter;
use crate::{Manifest, Result};
use super::{validate_path, PHANTOMS_PATH, ZIP_COMMENT};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum CompressionMethod {
Stored,
#[default]
Deflate,
#[cfg(feature = "zstd")]
Zstd,
}
impl CompressionMethod {
fn to_zip_method(self) -> zip::CompressionMethod {
match self {
Self::Stored => zip::CompressionMethod::Stored,
Self::Deflate => zip::CompressionMethod::Deflated,
#[cfg(feature = "zstd")]
Self::Zstd => zip::CompressionMethod::Zstd,
}
}
}
pub struct CdxWriter<W: Write + Seek> {
zip: ZipWriter<W>,
manifest_written: bool,
files_written: Vec<String>,
}
impl CdxWriter<BufWriter<File>> {
pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::create(path.as_ref()).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
crate::Error::FileNotFound {
path: path.as_ref().to_path_buf(),
}
} else {
crate::Error::Io(e)
}
})?;
let writer = BufWriter::new(file);
Self::new(writer)
}
}
impl CdxWriter<Cursor<Vec<u8>>> {
#[must_use]
pub fn in_memory() -> Self {
let cursor = Cursor::new(Vec::new());
Self::new(cursor).expect("in-memory writer should not fail")
}
}
impl<W: Write + Seek> CdxWriter<W> {
pub fn new(writer: W) -> Result<Self> {
let mut zip = ZipWriter::new(writer);
zip.set_comment(ZIP_COMMENT);
Ok(Self {
zip,
manifest_written: false,
files_written: Vec::new(),
})
}
pub fn write_manifest(&mut self, manifest: &Manifest) -> Result<()> {
if self.manifest_written {
return Err(crate::Error::InvalidManifest {
reason: "manifest already written".to_string(),
});
}
if !self.files_written.is_empty() {
return Err(crate::Error::InvalidManifest {
reason: "manifest must be the first file in the archive".to_string(),
});
}
let json = serde_json::to_vec_pretty(manifest)?;
self.write_file_internal(super::MANIFEST_PATH, &json, CompressionMethod::Deflate)?;
self.manifest_written = true;
Ok(())
}
pub fn write_file(
&mut self,
path: &str,
data: &[u8],
compression: CompressionMethod,
) -> Result<()> {
if !self.manifest_written {
return Err(crate::Error::InvalidManifest {
reason: "manifest must be written before other files".to_string(),
});
}
validate_path(path)?;
if self.files_written.contains(&path.to_string()) {
return Err(crate::Error::InvalidManifest {
reason: format!("file already exists: {path}"),
});
}
self.write_file_internal(path, data, compression)
}
fn write_file_internal(
&mut self,
path: &str,
data: &[u8],
compression: CompressionMethod,
) -> Result<()> {
let options = FileOptions::<()>::default()
.compression_method(compression.to_zip_method())
.unix_permissions(0o644);
self.zip.start_file(path, options)?;
self.zip.write_all(data)?;
self.files_written.push(path.to_string());
Ok(())
}
pub fn write_file_hashed(
&mut self,
path: &str,
data: &[u8],
compression: CompressionMethod,
algorithm: crate::HashAlgorithm,
) -> Result<crate::DocumentId> {
let hash = crate::Hasher::hash(algorithm, data);
self.write_file(path, data, compression)?;
Ok(hash)
}
pub fn write_phantoms(&mut self, phantoms: &crate::extensions::PhantomClusters) -> Result<()> {
let json = serde_json::to_vec_pretty(phantoms)?;
self.write_file(PHANTOMS_PATH, &json, CompressionMethod::Deflate)
}
pub fn add_directory(&mut self, path: &str) -> Result<()> {
validate_path(path)?;
let dir_path = if path.ends_with('/') {
path.to_string()
} else {
format!("{path}/")
};
let options =
FileOptions::<()>::default().compression_method(zip::CompressionMethod::Stored);
self.zip.add_directory(&dir_path, options)?;
Ok(())
}
#[must_use]
pub fn manifest_written(&self) -> bool {
self.manifest_written
}
#[must_use]
pub fn files_written(&self) -> &[String] {
&self.files_written
}
pub fn finish(self) -> Result<W> {
if !self.manifest_written {
return Err(crate::Error::InvalidManifest {
reason: "manifest must be written before finishing".to_string(),
});
}
let writer = self.zip.finish()?;
Ok(writer)
}
#[must_use]
pub fn abort(self) -> W {
self.zip.finish().unwrap_or_else(|_| {
panic!("abort should not fail")
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::archive::{CONTENT_PATH, DUBLIN_CORE_PATH};
use crate::{ContentRef, DocumentId, Metadata};
fn create_test_manifest() -> Manifest {
let content = ContentRef {
path: CONTENT_PATH.to_string(),
hash: DocumentId::pending(),
compression: None,
merkle_root: None,
block_count: None,
};
let metadata = Metadata {
dublin_core: DUBLIN_CORE_PATH.to_string(),
custom: None,
};
Manifest::new(content, metadata)
}
#[test]
fn test_writer_in_memory() {
let mut writer = CdxWriter::in_memory();
let manifest = create_test_manifest();
writer.write_manifest(&manifest).unwrap();
writer
.write_file(
CONTENT_PATH,
br#"{"version":"0.1","blocks":[]}"#,
CompressionMethod::Deflate,
)
.unwrap();
writer
.write_file(
DUBLIN_CORE_PATH,
br#"{"title":"Test"}"#,
CompressionMethod::Deflate,
)
.unwrap();
let result = writer.finish().unwrap();
assert!(!result.into_inner().is_empty());
}
#[test]
fn test_writer_manifest_first() {
let mut writer = CdxWriter::in_memory();
let result = writer.write_file(CONTENT_PATH, b"test", CompressionMethod::Deflate);
assert!(result.is_err());
}
#[test]
fn test_writer_manifest_once() {
let mut writer = CdxWriter::in_memory();
let manifest = create_test_manifest();
writer.write_manifest(&manifest).unwrap();
let result = writer.write_manifest(&manifest);
assert!(result.is_err());
}
#[test]
fn test_writer_path_traversal_rejected() {
let mut writer = CdxWriter::in_memory();
let manifest = create_test_manifest();
writer.write_manifest(&manifest).unwrap();
let result = writer.write_file("../secret", b"data", CompressionMethod::Deflate);
assert!(result.is_err());
}
#[test]
fn test_writer_duplicate_file_rejected() {
let mut writer = CdxWriter::in_memory();
let manifest = create_test_manifest();
writer.write_manifest(&manifest).unwrap();
writer
.write_file(CONTENT_PATH, b"first", CompressionMethod::Deflate)
.unwrap();
let result = writer.write_file(CONTENT_PATH, b"second", CompressionMethod::Deflate);
assert!(result.is_err());
}
#[test]
fn test_writer_finish_requires_manifest() {
let writer = CdxWriter::in_memory();
let result = writer.finish();
assert!(result.is_err());
}
#[test]
fn test_writer_compression_stored() {
let mut writer = CdxWriter::in_memory();
let manifest = create_test_manifest();
writer.write_manifest(&manifest).unwrap();
writer
.write_file(CONTENT_PATH, b"test data", CompressionMethod::Stored)
.unwrap();
assert!(writer.files_written().contains(&CONTENT_PATH.to_string()));
}
#[test]
fn test_writer_hashed() {
let mut writer = CdxWriter::in_memory();
let manifest = create_test_manifest();
writer.write_manifest(&manifest).unwrap();
let data = b"test content";
let hash = writer
.write_file_hashed(
CONTENT_PATH,
data,
CompressionMethod::Deflate,
crate::HashAlgorithm::Sha256,
)
.unwrap();
assert!(!hash.is_pending());
assert_eq!(hash.algorithm(), crate::HashAlgorithm::Sha256);
}
}