Skip to main content

cdx_core/document/
io.rs

1use std::fs::File;
2use std::io::{Cursor, Read, Seek, Write};
3use std::path::Path;
4
5#[cfg(feature = "encryption")]
6use crate::archive::ENCRYPTION_PATH;
7#[cfg(feature = "signatures")]
8use crate::archive::SIGNATURES_PATH;
9use crate::archive::{
10    CdxReader, CdxWriter, CompressionMethod, ACADEMIC_NUMBERING_PATH, BIBLIOGRAPHY_PATH,
11    COMMENTS_PATH, CONTENT_PATH, DUBLIN_CORE_PATH, FORMS_DATA_PATH, JSONLD_PATH, PHANTOMS_PATH,
12};
13use crate::content::Content;
14use crate::metadata::DublinCore;
15use crate::{Hasher, Result};
16
17#[cfg(any(feature = "signatures", feature = "encryption"))]
18use crate::manifest::SecurityRef;
19
20use super::Document;
21
22impl Document {
23    /// Open a document from a file path.
24    ///
25    /// # Errors
26    ///
27    /// Returns an error if:
28    /// - The file cannot be opened
29    /// - The archive is invalid
30    /// - Required files are missing or malformed
31    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
32        let mut reader = CdxReader::open(path)?;
33        Self::from_reader(&mut reader)
34    }
35
36    /// Open a document from any `Read + Seek` source.
37    ///
38    /// # Errors
39    ///
40    /// Returns an error if:
41    /// - The source is not a valid Codex archive
42    /// - Required files are missing or malformed
43    pub fn open_from_reader<R: Read + Seek>(reader: R) -> Result<Self> {
44        let mut cdx_reader = CdxReader::new(reader)?;
45        Self::from_reader(&mut cdx_reader)
46    }
47
48    /// Open a document from bytes.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error if the data is not a valid Codex document.
53    pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
54        let mut reader = CdxReader::from_bytes(data)?;
55        Self::from_reader(&mut reader)
56    }
57
58    /// Read document from a `CdxReader`.
59    fn from_reader<R: Read + Seek>(reader: &mut CdxReader<R>) -> Result<Self> {
60        let manifest = reader.manifest().clone();
61
62        // Read and parse content
63        let content_data = reader.read_content()?;
64        let content: Content = serde_json::from_slice(&content_data)?;
65
66        // Read and parse Dublin Core
67        let dc_data = reader.read_dublin_core()?;
68        let dublin_core: DublinCore = serde_json::from_slice(&dc_data)?;
69
70        // Helper closure to read and parse optional JSON extension files
71        let mut read_optional_json = |path: &str| -> Result<Option<Vec<u8>>> {
72            if reader.file_exists(path)? {
73                Ok(Some(reader.read_file(path)?))
74            } else {
75                Ok(None)
76            }
77        };
78
79        // Read signatures if present (only when signatures feature is enabled)
80        #[cfg(feature = "signatures")]
81        let signature_file = if let Some(ref security) = manifest.security {
82            if let Some(ref sig_path) = security.signatures {
83                read_optional_json(sig_path)?
84                    .map(|data| serde_json::from_slice(&data))
85                    .transpose()?
86            } else {
87                None
88            }
89        } else {
90            None
91        };
92
93        // Read encryption metadata if present (only when encryption feature is enabled)
94        #[cfg(feature = "encryption")]
95        let encryption_metadata = if let Some(ref security) = manifest.security {
96            if let Some(ref enc_path) = security.encryption {
97                read_optional_json(enc_path)?
98                    .map(|data| serde_json::from_slice(&data))
99                    .transpose()?
100            } else {
101                None
102            }
103        } else {
104            None
105        };
106
107        // Read extension files using the helper closure
108        let academic_numbering = read_optional_json(ACADEMIC_NUMBERING_PATH)?
109            .map(|data| serde_json::from_slice(&data))
110            .transpose()?;
111
112        let comments = read_optional_json(COMMENTS_PATH)?
113            .map(|data| serde_json::from_slice(&data))
114            .transpose()?;
115
116        let phantom_clusters = read_optional_json(PHANTOMS_PATH)?
117            .map(|data| serde_json::from_slice(&data))
118            .transpose()?;
119
120        let form_data = read_optional_json(FORMS_DATA_PATH)?
121            .map(|data| serde_json::from_slice(&data))
122            .transpose()?;
123
124        let bibliography = read_optional_json(BIBLIOGRAPHY_PATH)?
125            .map(|data| serde_json::from_slice(&data))
126            .transpose()?;
127
128        let jsonld_metadata = read_optional_json(JSONLD_PATH)?
129            .map(|data| serde_json::from_slice(&data))
130            .transpose()?;
131
132        Ok(Self {
133            manifest,
134            content,
135            dublin_core,
136            #[cfg(feature = "signatures")]
137            signature_file,
138            #[cfg(feature = "encryption")]
139            encryption_metadata,
140            academic_numbering,
141            comments,
142            phantom_clusters,
143            form_data,
144            bibliography,
145            jsonld_metadata,
146        })
147    }
148
149    /// Save the document to a file.
150    ///
151    /// # Errors
152    ///
153    /// Returns an error if:
154    /// - The file cannot be created
155    /// - Writing fails
156    pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()> {
157        let file = File::create(path)?;
158        let writer = std::io::BufWriter::new(file);
159        self.write_to(writer)
160    }
161
162    /// Write the document to any `Write + Seek` destination.
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if writing fails.
167    pub fn write_to<W: Write + Seek>(&self, writer: W) -> Result<()> {
168        let mut cdx_writer = CdxWriter::new(writer)?;
169
170        // Serialize content and dublin core
171        let content_json = serde_json::to_vec_pretty(&self.content)?;
172        let dc_json = serde_json::to_vec_pretty(&self.dublin_core)?;
173
174        // Compute hashes
175        let content_hash = Hasher::hash(self.manifest.hash_algorithm, &content_json);
176
177        // Update manifest with computed hashes
178        let mut manifest = self.manifest.clone();
179        manifest.content.hash = content_hash;
180
181        // Update security reference if we have signatures or encryption
182        #[cfg(any(feature = "signatures", feature = "encryption"))]
183        {
184            #[cfg(feature = "signatures")]
185            let has_signatures = self
186                .signature_file
187                .as_ref()
188                .is_some_and(|sf| !sf.is_empty());
189            #[cfg(not(feature = "signatures"))]
190            let has_signatures = false;
191
192            #[cfg(feature = "encryption")]
193            let has_encryption = self.encryption_metadata.is_some();
194            #[cfg(not(feature = "encryption"))]
195            let has_encryption = false;
196
197            if has_signatures || has_encryption {
198                #[cfg(feature = "signatures")]
199                let signatures_ref = if has_signatures {
200                    Some(SIGNATURES_PATH.to_string())
201                } else {
202                    None
203                };
204                #[cfg(not(feature = "signatures"))]
205                let signatures_ref = None;
206
207                #[cfg(feature = "encryption")]
208                let encryption_ref = if has_encryption {
209                    Some(ENCRYPTION_PATH.to_string())
210                } else {
211                    None
212                };
213                #[cfg(not(feature = "encryption"))]
214                let encryption_ref = None;
215
216                manifest.security = Some(SecurityRef {
217                    signatures: signatures_ref,
218                    encryption: encryption_ref,
219                });
220            }
221        }
222
223        // Write files
224        cdx_writer.write_manifest(&manifest)?;
225        cdx_writer.write_file(CONTENT_PATH, &content_json, CompressionMethod::Deflate)?;
226        cdx_writer.write_file(DUBLIN_CORE_PATH, &dc_json, CompressionMethod::Deflate)?;
227
228        // Write signatures if present
229        #[cfg(feature = "signatures")]
230        if let Some(ref sig_file) = self.signature_file {
231            if !sig_file.is_empty() {
232                let sig_json = sig_file.to_json()?;
233                cdx_writer.write_file(
234                    SIGNATURES_PATH,
235                    sig_json.as_bytes(),
236                    CompressionMethod::Deflate,
237                )?;
238            }
239        }
240
241        // Write encryption metadata if present
242        #[cfg(feature = "encryption")]
243        if let Some(ref enc_meta) = self.encryption_metadata {
244            let enc_json = serde_json::to_vec_pretty(enc_meta)?;
245            cdx_writer.write_file(ENCRYPTION_PATH, &enc_json, CompressionMethod::Deflate)?;
246        }
247
248        // Write optional extension files
249        // Using a local macro to avoid repetition while maintaining type safety
250        macro_rules! write_optional_json {
251            ($path:expr, $value:expr) => {
252                if let Some(ref v) = $value {
253                    let json = serde_json::to_vec_pretty(v)?;
254                    cdx_writer.write_file($path, &json, CompressionMethod::Deflate)?;
255                }
256            };
257        }
258
259        write_optional_json!(ACADEMIC_NUMBERING_PATH, self.academic_numbering);
260        write_optional_json!(COMMENTS_PATH, self.comments);
261        write_optional_json!(PHANTOMS_PATH, self.phantom_clusters);
262        write_optional_json!(FORMS_DATA_PATH, self.form_data);
263        write_optional_json!(BIBLIOGRAPHY_PATH, self.bibliography);
264        write_optional_json!(JSONLD_PATH, self.jsonld_metadata);
265
266        cdx_writer.finish()?;
267        Ok(())
268    }
269
270    /// Write the document to bytes.
271    ///
272    /// # Errors
273    ///
274    /// Returns an error if serialization fails.
275    pub fn to_bytes(&self) -> Result<Vec<u8>> {
276        let cursor = Cursor::new(Vec::new());
277        let mut temp = cursor;
278        self.write_to(&mut temp)?;
279        Ok(temp.into_inner())
280    }
281}