1use std::fs::File;
4use std::io::{BufWriter, Cursor, Seek, Write};
5use std::path::Path;
6
7use zip::write::FileOptions;
8use zip::ZipWriter;
9
10use crate::{Manifest, Result};
11
12use super::{validate_path, PHANTOMS_PATH, ZIP_COMMENT};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum CompressionMethod {
17 Stored,
19 #[default]
21 Deflate,
22 #[cfg(feature = "zstd")]
24 Zstd,
25}
26
27impl CompressionMethod {
28 fn to_zip_method(self) -> zip::CompressionMethod {
29 match self {
30 Self::Stored => zip::CompressionMethod::Stored,
31 Self::Deflate => zip::CompressionMethod::Deflated,
32 #[cfg(feature = "zstd")]
33 Self::Zstd => zip::CompressionMethod::Zstd,
34 }
35 }
36}
37
38pub struct CdxWriter<W: Write + Seek> {
57 zip: ZipWriter<W>,
58 manifest_written: bool,
59 files_written: Vec<String>,
60}
61
62impl CdxWriter<BufWriter<File>> {
63 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
69 let file = File::create(path.as_ref()).map_err(|e| {
70 if e.kind() == std::io::ErrorKind::NotFound {
71 crate::Error::FileNotFound {
72 path: path.as_ref().to_path_buf(),
73 }
74 } else {
75 crate::Error::Io(e)
76 }
77 })?;
78 let writer = BufWriter::new(file);
79 Self::new(writer)
80 }
81}
82
83impl CdxWriter<Cursor<Vec<u8>>> {
84 #[must_use]
91 pub fn in_memory() -> Self {
92 let cursor = Cursor::new(Vec::new());
93 Self::new(cursor).expect("in-memory writer should not fail")
95 }
96}
97
98impl<W: Write + Seek> CdxWriter<W> {
99 pub fn new(writer: W) -> Result<Self> {
105 let mut zip = ZipWriter::new(writer);
106 zip.set_comment(ZIP_COMMENT);
107
108 Ok(Self {
109 zip,
110 manifest_written: false,
111 files_written: Vec::new(),
112 })
113 }
114
115 pub fn write_manifest(&mut self, manifest: &Manifest) -> Result<()> {
126 if self.manifest_written {
127 return Err(crate::Error::InvalidManifest {
128 reason: "manifest already written".to_string(),
129 });
130 }
131
132 if !self.files_written.is_empty() {
133 return Err(crate::Error::InvalidManifest {
134 reason: "manifest must be the first file in the archive".to_string(),
135 });
136 }
137
138 let json = serde_json::to_vec_pretty(manifest)?;
139 self.write_file_internal(super::MANIFEST_PATH, &json, CompressionMethod::Deflate)?;
140 self.manifest_written = true;
141
142 Ok(())
143 }
144
145 pub fn write_file(
155 &mut self,
156 path: &str,
157 data: &[u8],
158 compression: CompressionMethod,
159 ) -> Result<()> {
160 if !self.manifest_written {
161 return Err(crate::Error::InvalidManifest {
162 reason: "manifest must be written before other files".to_string(),
163 });
164 }
165
166 validate_path(path)?;
167
168 if self.files_written.contains(&path.to_string()) {
169 return Err(crate::Error::InvalidManifest {
170 reason: format!("file already exists: {path}"),
171 });
172 }
173
174 self.write_file_internal(path, data, compression)
175 }
176
177 fn write_file_internal(
179 &mut self,
180 path: &str,
181 data: &[u8],
182 compression: CompressionMethod,
183 ) -> Result<()> {
184 let options = FileOptions::<()>::default()
185 .compression_method(compression.to_zip_method())
186 .unix_permissions(0o644);
187
188 self.zip.start_file(path, options)?;
189 self.zip.write_all(data)?;
190 self.files_written.push(path.to_string());
191
192 Ok(())
193 }
194
195 pub fn write_file_hashed(
203 &mut self,
204 path: &str,
205 data: &[u8],
206 compression: CompressionMethod,
207 algorithm: crate::HashAlgorithm,
208 ) -> Result<crate::DocumentId> {
209 let hash = crate::Hasher::hash(algorithm, data);
210 self.write_file(path, data, compression)?;
211 Ok(hash)
212 }
213
214 pub fn write_phantoms(&mut self, phantoms: &crate::extensions::PhantomClusters) -> Result<()> {
224 let json = serde_json::to_vec_pretty(phantoms)?;
225 self.write_file(PHANTOMS_PATH, &json, CompressionMethod::Deflate)
226 }
227
228 pub fn add_directory(&mut self, path: &str) -> Result<()> {
237 validate_path(path)?;
238
239 let dir_path = if path.ends_with('/') {
240 path.to_string()
241 } else {
242 format!("{path}/")
243 };
244
245 let options =
246 FileOptions::<()>::default().compression_method(zip::CompressionMethod::Stored);
247
248 self.zip.add_directory(&dir_path, options)?;
249
250 Ok(())
251 }
252
253 #[must_use]
255 pub fn manifest_written(&self) -> bool {
256 self.manifest_written
257 }
258
259 #[must_use]
261 pub fn files_written(&self) -> &[String] {
262 &self.files_written
263 }
264
265 pub fn finish(self) -> Result<W> {
273 if !self.manifest_written {
274 return Err(crate::Error::InvalidManifest {
275 reason: "manifest must be written before finishing".to_string(),
276 });
277 }
278
279 let writer = self.zip.finish()?;
280 Ok(writer)
281 }
282
283 #[must_use]
292 pub fn abort(self) -> W {
293 self.zip.finish().unwrap_or_else(|_| {
294 panic!("abort should not fail")
296 })
297 }
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303 use crate::archive::{CONTENT_PATH, DUBLIN_CORE_PATH};
304 use crate::{ContentRef, DocumentId, Metadata};
305
306 fn create_test_manifest() -> Manifest {
307 let content = ContentRef {
308 path: CONTENT_PATH.to_string(),
309 hash: DocumentId::pending(),
310 compression: None,
311 merkle_root: None,
312 block_count: None,
313 };
314 let metadata = Metadata {
315 dublin_core: DUBLIN_CORE_PATH.to_string(),
316 custom: None,
317 };
318 Manifest::new(content, metadata)
319 }
320
321 #[test]
322 fn test_writer_in_memory() {
323 let mut writer = CdxWriter::in_memory();
324 let manifest = create_test_manifest();
325
326 writer.write_manifest(&manifest).unwrap();
327 writer
328 .write_file(
329 CONTENT_PATH,
330 br#"{"version":"0.1","blocks":[]}"#,
331 CompressionMethod::Deflate,
332 )
333 .unwrap();
334 writer
335 .write_file(
336 DUBLIN_CORE_PATH,
337 br#"{"title":"Test"}"#,
338 CompressionMethod::Deflate,
339 )
340 .unwrap();
341
342 let result = writer.finish().unwrap();
343 assert!(!result.into_inner().is_empty());
344 }
345
346 #[test]
347 fn test_writer_manifest_first() {
348 let mut writer = CdxWriter::in_memory();
349
350 let result = writer.write_file(CONTENT_PATH, b"test", CompressionMethod::Deflate);
352 assert!(result.is_err());
353 }
354
355 #[test]
356 fn test_writer_manifest_once() {
357 let mut writer = CdxWriter::in_memory();
358 let manifest = create_test_manifest();
359
360 writer.write_manifest(&manifest).unwrap();
361
362 let result = writer.write_manifest(&manifest);
364 assert!(result.is_err());
365 }
366
367 #[test]
368 fn test_writer_path_traversal_rejected() {
369 let mut writer = CdxWriter::in_memory();
370 let manifest = create_test_manifest();
371 writer.write_manifest(&manifest).unwrap();
372
373 let result = writer.write_file("../secret", b"data", CompressionMethod::Deflate);
374 assert!(result.is_err());
375 }
376
377 #[test]
378 fn test_writer_duplicate_file_rejected() {
379 let mut writer = CdxWriter::in_memory();
380 let manifest = create_test_manifest();
381 writer.write_manifest(&manifest).unwrap();
382
383 writer
384 .write_file(CONTENT_PATH, b"first", CompressionMethod::Deflate)
385 .unwrap();
386
387 let result = writer.write_file(CONTENT_PATH, b"second", CompressionMethod::Deflate);
388 assert!(result.is_err());
389 }
390
391 #[test]
392 fn test_writer_finish_requires_manifest() {
393 let writer = CdxWriter::in_memory();
394 let result = writer.finish();
395 assert!(result.is_err());
396 }
397
398 #[test]
399 fn test_writer_compression_stored() {
400 let mut writer = CdxWriter::in_memory();
401 let manifest = create_test_manifest();
402 writer.write_manifest(&manifest).unwrap();
403
404 writer
405 .write_file(CONTENT_PATH, b"test data", CompressionMethod::Stored)
406 .unwrap();
407
408 assert!(writer.files_written().contains(&CONTENT_PATH.to_string()));
409 }
410
411 #[test]
412 fn test_writer_hashed() {
413 let mut writer = CdxWriter::in_memory();
414 let manifest = create_test_manifest();
415 writer.write_manifest(&manifest).unwrap();
416
417 let data = b"test content";
418 let hash = writer
419 .write_file_hashed(
420 CONTENT_PATH,
421 data,
422 CompressionMethod::Deflate,
423 crate::HashAlgorithm::Sha256,
424 )
425 .unwrap();
426
427 assert!(!hash.is_pending());
428 assert_eq!(hash.algorithm(), crate::HashAlgorithm::Sha256);
429 }
430}