Skip to main content

cdx_core/archive/
writer.rs

1//! Archive writer for Codex documents.
2
3use std::fs::File;
4use std::io::{BufWriter, Cursor, Seek, Write};
5use std::path::Path;
6
7use zip::write::FileOptions;
8use zip::ZipWriter;
9
10use crate::{Manifest, Result};
11
12use super::{validate_path, PHANTOMS_PATH, ZIP_COMMENT};
13
14/// Compression method for files in the archive.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum CompressionMethod {
17    /// Store without compression (for pre-compressed content like images).
18    Stored,
19    /// Deflate compression (widely compatible, required support).
20    #[default]
21    Deflate,
22    /// Zstandard compression (better ratio, optional support).
23    #[cfg(feature = "zstd")]
24    Zstd,
25}
26
27impl CompressionMethod {
28    fn to_zip_method(self) -> zip::CompressionMethod {
29        match self {
30            Self::Stored => zip::CompressionMethod::Stored,
31            Self::Deflate => zip::CompressionMethod::Deflated,
32            #[cfg(feature = "zstd")]
33            Self::Zstd => zip::CompressionMethod::Zstd,
34        }
35    }
36}
37
38/// Writer for creating Codex document archives.
39///
40/// `CdxWriter` creates properly formatted `.cdx` files, ensuring the manifest
41/// is written first and all required structure is maintained.
42///
43/// # Example
44///
45/// ```rust,ignore
46/// use cdx_core::archive::{CdxWriter, CompressionMethod};
47///
48/// let mut writer = CdxWriter::create("output.cdx")?;
49///
50/// writer.write_manifest(&manifest)?;
51/// writer.write_file("content/document.json", &content, CompressionMethod::Deflate)?;
52/// writer.write_file("metadata/dublin-core.json", &metadata, CompressionMethod::Deflate)?;
53///
54/// writer.finish()?;
55/// ```
56pub struct CdxWriter<W: Write + Seek> {
57    zip: ZipWriter<W>,
58    manifest_written: bool,
59    files_written: Vec<String>,
60}
61
62impl CdxWriter<BufWriter<File>> {
63    /// Create a new Codex document at the given file path.
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if the file cannot be created.
68    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
69        let file = File::create(path)?;
70        let writer = BufWriter::new(file);
71        Self::new(writer)
72    }
73}
74
75impl CdxWriter<Cursor<Vec<u8>>> {
76    /// Create a new Codex document in memory.
77    ///
78    /// # Panics
79    ///
80    /// This function will not panic in practice, as initializing
81    /// a `ZipWriter` on an in-memory buffer cannot fail.
82    #[must_use]
83    pub fn in_memory() -> Self {
84        let cursor = Cursor::new(Vec::new());
85        // This cannot fail for an in-memory buffer
86        Self::new(cursor).expect("in-memory writer should not fail")
87    }
88}
89
90impl<W: Write + Seek> CdxWriter<W> {
91    /// Create a new writer wrapping any `Write + Seek` destination.
92    ///
93    /// # Errors
94    ///
95    /// Returns an error if initialization fails.
96    pub fn new(writer: W) -> Result<Self> {
97        let mut zip = ZipWriter::new(writer);
98        zip.set_comment(ZIP_COMMENT);
99
100        Ok(Self {
101            zip,
102            manifest_written: false,
103            files_written: Vec::new(),
104        })
105    }
106
107    /// Write the manifest to the archive.
108    ///
109    /// This must be called before writing any other files, as the manifest
110    /// must be the first file in the archive per the Codex specification.
111    ///
112    /// # Errors
113    ///
114    /// Returns an error if:
115    /// - Writing fails
116    /// - The manifest has already been written
117    pub fn write_manifest(&mut self, manifest: &Manifest) -> Result<()> {
118        if self.manifest_written {
119            return Err(crate::Error::InvalidManifest {
120                reason: "manifest already written".to_string(),
121            });
122        }
123
124        if !self.files_written.is_empty() {
125            return Err(crate::Error::InvalidManifest {
126                reason: "manifest must be the first file in the archive".to_string(),
127            });
128        }
129
130        let json = serde_json::to_vec_pretty(manifest)?;
131        self.write_file_internal(super::MANIFEST_PATH, &json, CompressionMethod::Deflate)?;
132        self.manifest_written = true;
133
134        Ok(())
135    }
136
137    /// Write a file to the archive.
138    ///
139    /// # Errors
140    ///
141    /// Returns an error if:
142    /// - The manifest has not been written yet
143    /// - The path contains traversal patterns (security check)
144    /// - Writing fails
145    /// - A file with the same path already exists
146    pub fn write_file(
147        &mut self,
148        path: &str,
149        data: &[u8],
150        compression: CompressionMethod,
151    ) -> Result<()> {
152        if !self.manifest_written {
153            return Err(crate::Error::InvalidManifest {
154                reason: "manifest must be written before other files".to_string(),
155            });
156        }
157
158        validate_path(path)?;
159
160        if self.files_written.contains(&path.to_string()) {
161            return Err(crate::Error::InvalidManifest {
162                reason: format!("file already exists: {path}"),
163            });
164        }
165
166        self.write_file_internal(path, data, compression)
167    }
168
169    /// Internal file writing without manifest check (for manifest itself).
170    fn write_file_internal(
171        &mut self,
172        path: &str,
173        data: &[u8],
174        compression: CompressionMethod,
175    ) -> Result<()> {
176        let options = FileOptions::<()>::default()
177            .compression_method(compression.to_zip_method())
178            .unix_permissions(0o644);
179
180        self.zip.start_file(path, options)?;
181        self.zip.write_all(data)?;
182        self.files_written.push(path.to_string());
183
184        Ok(())
185    }
186
187    /// Write a file with automatic hash computation.
188    ///
189    /// Returns the computed hash for inclusion in the manifest.
190    ///
191    /// # Errors
192    ///
193    /// Returns an error if writing fails.
194    pub fn write_file_hashed(
195        &mut self,
196        path: &str,
197        data: &[u8],
198        compression: CompressionMethod,
199        algorithm: crate::HashAlgorithm,
200    ) -> Result<crate::DocumentId> {
201        let hash = crate::Hasher::hash(algorithm, data);
202        self.write_file(path, data, compression)?;
203        Ok(hash)
204    }
205
206    /// Write phantom clusters to the archive.
207    ///
208    /// Phantom clusters are stored at `phantoms/clusters.json` and are
209    /// not included in the content hash since they exist outside the
210    /// core content boundary.
211    ///
212    /// # Errors
213    ///
214    /// Returns an error if writing fails.
215    pub fn write_phantoms(&mut self, phantoms: &crate::extensions::PhantomClusters) -> Result<()> {
216        let json = serde_json::to_vec_pretty(phantoms)?;
217        self.write_file(PHANTOMS_PATH, &json, CompressionMethod::Deflate)
218    }
219
220    /// Start a directory in the archive.
221    ///
222    /// This is optional, as ZIP archives create directories implicitly,
223    /// but can be useful for clarity.
224    ///
225    /// # Errors
226    ///
227    /// Returns an error if adding the directory fails.
228    pub fn add_directory(&mut self, path: &str) -> Result<()> {
229        validate_path(path)?;
230
231        let dir_path = if path.ends_with('/') {
232            path.to_string()
233        } else {
234            format!("{path}/")
235        };
236
237        let options =
238            FileOptions::<()>::default().compression_method(zip::CompressionMethod::Stored);
239
240        self.zip.add_directory(&dir_path, options)?;
241
242        Ok(())
243    }
244
245    /// Check if the manifest has been written.
246    #[must_use]
247    pub fn manifest_written(&self) -> bool {
248        self.manifest_written
249    }
250
251    /// Get the list of files that have been written.
252    #[must_use]
253    pub fn files_written(&self) -> &[String] {
254        &self.files_written
255    }
256
257    /// Finish writing and close the archive.
258    ///
259    /// # Errors
260    ///
261    /// Returns an error if:
262    /// - The manifest was not written
263    /// - Finalizing the archive fails
264    pub fn finish(self) -> Result<W> {
265        if !self.manifest_written {
266            return Err(crate::Error::InvalidManifest {
267                reason: "manifest must be written before finishing".to_string(),
268            });
269        }
270
271        let writer = self.zip.finish()?;
272        Ok(writer)
273    }
274
275    /// Abort writing and return the underlying writer without finalizing.
276    ///
277    /// The resulting archive will be invalid.
278    ///
279    /// # Panics
280    ///
281    /// Panics if the ZIP finalization fails, which should not happen
282    /// for valid writer implementations.
283    #[must_use]
284    pub fn abort(self) -> W {
285        self.zip.finish().unwrap_or_else(|_| {
286            // If finish fails, we've already aborted, which is fine
287            panic!("abort should not fail")
288        })
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use crate::archive::{CONTENT_PATH, DUBLIN_CORE_PATH};
296    use crate::{ContentRef, DocumentId, Metadata};
297
298    fn create_test_manifest() -> Manifest {
299        let content = ContentRef {
300            path: CONTENT_PATH.to_string(),
301            hash: DocumentId::pending(),
302            compression: None,
303            merkle_root: None,
304            block_count: None,
305        };
306        let metadata = Metadata {
307            dublin_core: DUBLIN_CORE_PATH.to_string(),
308            custom: None,
309        };
310        Manifest::new(content, metadata)
311    }
312
313    #[test]
314    fn test_writer_in_memory() {
315        let mut writer = CdxWriter::in_memory();
316        let manifest = create_test_manifest();
317
318        writer.write_manifest(&manifest).unwrap();
319        writer
320            .write_file(
321                CONTENT_PATH,
322                br#"{"version":"0.1","blocks":[]}"#,
323                CompressionMethod::Deflate,
324            )
325            .unwrap();
326        writer
327            .write_file(
328                DUBLIN_CORE_PATH,
329                br#"{"title":"Test"}"#,
330                CompressionMethod::Deflate,
331            )
332            .unwrap();
333
334        let result = writer.finish().unwrap();
335        assert!(!result.into_inner().is_empty());
336    }
337
338    #[test]
339    fn test_writer_manifest_first() {
340        let mut writer = CdxWriter::in_memory();
341
342        // Try to write a file before manifest
343        let result = writer.write_file(CONTENT_PATH, b"test", CompressionMethod::Deflate);
344        assert!(result.is_err());
345    }
346
347    #[test]
348    fn test_writer_manifest_once() {
349        let mut writer = CdxWriter::in_memory();
350        let manifest = create_test_manifest();
351
352        writer.write_manifest(&manifest).unwrap();
353
354        // Try to write manifest again
355        let result = writer.write_manifest(&manifest);
356        assert!(result.is_err());
357    }
358
359    #[test]
360    fn test_writer_path_traversal_rejected() {
361        let mut writer = CdxWriter::in_memory();
362        let manifest = create_test_manifest();
363        writer.write_manifest(&manifest).unwrap();
364
365        let result = writer.write_file("../secret", b"data", CompressionMethod::Deflate);
366        assert!(result.is_err());
367    }
368
369    #[test]
370    fn test_writer_duplicate_file_rejected() {
371        let mut writer = CdxWriter::in_memory();
372        let manifest = create_test_manifest();
373        writer.write_manifest(&manifest).unwrap();
374
375        writer
376            .write_file(CONTENT_PATH, b"first", CompressionMethod::Deflate)
377            .unwrap();
378
379        let result = writer.write_file(CONTENT_PATH, b"second", CompressionMethod::Deflate);
380        assert!(result.is_err());
381    }
382
383    #[test]
384    fn test_writer_finish_requires_manifest() {
385        let writer = CdxWriter::in_memory();
386        let result = writer.finish();
387        assert!(result.is_err());
388    }
389
390    #[test]
391    fn test_writer_compression_stored() {
392        let mut writer = CdxWriter::in_memory();
393        let manifest = create_test_manifest();
394        writer.write_manifest(&manifest).unwrap();
395
396        writer
397            .write_file(CONTENT_PATH, b"test data", CompressionMethod::Stored)
398            .unwrap();
399
400        assert!(writer.files_written().contains(&CONTENT_PATH.to_string()));
401    }
402
403    #[test]
404    fn test_writer_hashed() {
405        let mut writer = CdxWriter::in_memory();
406        let manifest = create_test_manifest();
407        writer.write_manifest(&manifest).unwrap();
408
409        let data = b"test content";
410        let hash = writer
411            .write_file_hashed(
412                CONTENT_PATH,
413                data,
414                CompressionMethod::Deflate,
415                crate::HashAlgorithm::Sha256,
416            )
417            .unwrap();
418
419        assert!(!hash.is_pending());
420        assert_eq!(hash.algorithm(), crate::HashAlgorithm::Sha256);
421    }
422}