1use std::fs::File;
4use std::io::{BufWriter, Cursor, Seek, Write};
5use std::path::Path;
6
7use zip::write::FileOptions;
8use zip::ZipWriter;
9
10use crate::{Manifest, Result};
11
12use super::{validate_path, PHANTOMS_PATH, ZIP_COMMENT};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum CompressionMethod {
17 Stored,
19 #[default]
21 Deflate,
22 #[cfg(feature = "zstd")]
24 Zstd,
25}
26
27impl CompressionMethod {
28 fn to_zip_method(self) -> zip::CompressionMethod {
29 match self {
30 Self::Stored => zip::CompressionMethod::Stored,
31 Self::Deflate => zip::CompressionMethod::Deflated,
32 #[cfg(feature = "zstd")]
33 Self::Zstd => zip::CompressionMethod::Zstd,
34 }
35 }
36}
37
38pub struct CdxWriter<W: Write + Seek> {
57 zip: ZipWriter<W>,
58 manifest_written: bool,
59 files_written: Vec<String>,
60}
61
62impl CdxWriter<BufWriter<File>> {
63 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
69 let file = File::create(path)?;
70 let writer = BufWriter::new(file);
71 Self::new(writer)
72 }
73}
74
75impl CdxWriter<Cursor<Vec<u8>>> {
76 #[must_use]
83 pub fn in_memory() -> Self {
84 let cursor = Cursor::new(Vec::new());
85 Self::new(cursor).expect("in-memory writer should not fail")
87 }
88}
89
90impl<W: Write + Seek> CdxWriter<W> {
91 pub fn new(writer: W) -> Result<Self> {
97 let mut zip = ZipWriter::new(writer);
98 zip.set_comment(ZIP_COMMENT);
99
100 Ok(Self {
101 zip,
102 manifest_written: false,
103 files_written: Vec::new(),
104 })
105 }
106
107 pub fn write_manifest(&mut self, manifest: &Manifest) -> Result<()> {
118 if self.manifest_written {
119 return Err(crate::Error::InvalidManifest {
120 reason: "manifest already written".to_string(),
121 });
122 }
123
124 if !self.files_written.is_empty() {
125 return Err(crate::Error::InvalidManifest {
126 reason: "manifest must be the first file in the archive".to_string(),
127 });
128 }
129
130 let json = serde_json::to_vec_pretty(manifest)?;
131 self.write_file_internal(super::MANIFEST_PATH, &json, CompressionMethod::Deflate)?;
132 self.manifest_written = true;
133
134 Ok(())
135 }
136
137 pub fn write_file(
147 &mut self,
148 path: &str,
149 data: &[u8],
150 compression: CompressionMethod,
151 ) -> Result<()> {
152 if !self.manifest_written {
153 return Err(crate::Error::InvalidManifest {
154 reason: "manifest must be written before other files".to_string(),
155 });
156 }
157
158 validate_path(path)?;
159
160 if self.files_written.contains(&path.to_string()) {
161 return Err(crate::Error::InvalidManifest {
162 reason: format!("file already exists: {path}"),
163 });
164 }
165
166 self.write_file_internal(path, data, compression)
167 }
168
169 fn write_file_internal(
171 &mut self,
172 path: &str,
173 data: &[u8],
174 compression: CompressionMethod,
175 ) -> Result<()> {
176 let options = FileOptions::<()>::default()
177 .compression_method(compression.to_zip_method())
178 .unix_permissions(0o644);
179
180 self.zip.start_file(path, options)?;
181 self.zip.write_all(data)?;
182 self.files_written.push(path.to_string());
183
184 Ok(())
185 }
186
187 pub fn write_file_hashed(
195 &mut self,
196 path: &str,
197 data: &[u8],
198 compression: CompressionMethod,
199 algorithm: crate::HashAlgorithm,
200 ) -> Result<crate::DocumentId> {
201 let hash = crate::Hasher::hash(algorithm, data);
202 self.write_file(path, data, compression)?;
203 Ok(hash)
204 }
205
206 pub fn write_phantoms(&mut self, phantoms: &crate::extensions::PhantomClusters) -> Result<()> {
216 let json = serde_json::to_vec_pretty(phantoms)?;
217 self.write_file(PHANTOMS_PATH, &json, CompressionMethod::Deflate)
218 }
219
220 pub fn add_directory(&mut self, path: &str) -> Result<()> {
229 validate_path(path)?;
230
231 let dir_path = if path.ends_with('/') {
232 path.to_string()
233 } else {
234 format!("{path}/")
235 };
236
237 let options =
238 FileOptions::<()>::default().compression_method(zip::CompressionMethod::Stored);
239
240 self.zip.add_directory(&dir_path, options)?;
241
242 Ok(())
243 }
244
245 #[must_use]
247 pub fn manifest_written(&self) -> bool {
248 self.manifest_written
249 }
250
251 #[must_use]
253 pub fn files_written(&self) -> &[String] {
254 &self.files_written
255 }
256
257 pub fn finish(self) -> Result<W> {
265 if !self.manifest_written {
266 return Err(crate::Error::InvalidManifest {
267 reason: "manifest must be written before finishing".to_string(),
268 });
269 }
270
271 let writer = self.zip.finish()?;
272 Ok(writer)
273 }
274
275 #[must_use]
284 pub fn abort(self) -> W {
285 self.zip.finish().unwrap_or_else(|_| {
286 panic!("abort should not fail")
288 })
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295 use crate::archive::{CONTENT_PATH, DUBLIN_CORE_PATH};
296 use crate::{ContentRef, DocumentId, Metadata};
297
298 fn create_test_manifest() -> Manifest {
299 let content = ContentRef {
300 path: CONTENT_PATH.to_string(),
301 hash: DocumentId::pending(),
302 compression: None,
303 merkle_root: None,
304 block_count: None,
305 };
306 let metadata = Metadata {
307 dublin_core: DUBLIN_CORE_PATH.to_string(),
308 custom: None,
309 };
310 Manifest::new(content, metadata)
311 }
312
313 #[test]
314 fn test_writer_in_memory() {
315 let mut writer = CdxWriter::in_memory();
316 let manifest = create_test_manifest();
317
318 writer.write_manifest(&manifest).unwrap();
319 writer
320 .write_file(
321 CONTENT_PATH,
322 br#"{"version":"0.1","blocks":[]}"#,
323 CompressionMethod::Deflate,
324 )
325 .unwrap();
326 writer
327 .write_file(
328 DUBLIN_CORE_PATH,
329 br#"{"title":"Test"}"#,
330 CompressionMethod::Deflate,
331 )
332 .unwrap();
333
334 let result = writer.finish().unwrap();
335 assert!(!result.into_inner().is_empty());
336 }
337
338 #[test]
339 fn test_writer_manifest_first() {
340 let mut writer = CdxWriter::in_memory();
341
342 let result = writer.write_file(CONTENT_PATH, b"test", CompressionMethod::Deflate);
344 assert!(result.is_err());
345 }
346
347 #[test]
348 fn test_writer_manifest_once() {
349 let mut writer = CdxWriter::in_memory();
350 let manifest = create_test_manifest();
351
352 writer.write_manifest(&manifest).unwrap();
353
354 let result = writer.write_manifest(&manifest);
356 assert!(result.is_err());
357 }
358
359 #[test]
360 fn test_writer_path_traversal_rejected() {
361 let mut writer = CdxWriter::in_memory();
362 let manifest = create_test_manifest();
363 writer.write_manifest(&manifest).unwrap();
364
365 let result = writer.write_file("../secret", b"data", CompressionMethod::Deflate);
366 assert!(result.is_err());
367 }
368
369 #[test]
370 fn test_writer_duplicate_file_rejected() {
371 let mut writer = CdxWriter::in_memory();
372 let manifest = create_test_manifest();
373 writer.write_manifest(&manifest).unwrap();
374
375 writer
376 .write_file(CONTENT_PATH, b"first", CompressionMethod::Deflate)
377 .unwrap();
378
379 let result = writer.write_file(CONTENT_PATH, b"second", CompressionMethod::Deflate);
380 assert!(result.is_err());
381 }
382
383 #[test]
384 fn test_writer_finish_requires_manifest() {
385 let writer = CdxWriter::in_memory();
386 let result = writer.finish();
387 assert!(result.is_err());
388 }
389
390 #[test]
391 fn test_writer_compression_stored() {
392 let mut writer = CdxWriter::in_memory();
393 let manifest = create_test_manifest();
394 writer.write_manifest(&manifest).unwrap();
395
396 writer
397 .write_file(CONTENT_PATH, b"test data", CompressionMethod::Stored)
398 .unwrap();
399
400 assert!(writer.files_written().contains(&CONTENT_PATH.to_string()));
401 }
402
403 #[test]
404 fn test_writer_hashed() {
405 let mut writer = CdxWriter::in_memory();
406 let manifest = create_test_manifest();
407 writer.write_manifest(&manifest).unwrap();
408
409 let data = b"test content";
410 let hash = writer
411 .write_file_hashed(
412 CONTENT_PATH,
413 data,
414 CompressionMethod::Deflate,
415 crate::HashAlgorithm::Sha256,
416 )
417 .unwrap();
418
419 assert!(!hash.is_pending());
420 assert_eq!(hash.algorithm(), crate::HashAlgorithm::Sha256);
421 }
422}