Skip to main content

cdx_core/document/
verification.rs

1use crate::content::{Block, Text};
2use crate::{DocumentId, Hasher, Result};
3
4use super::Document;
5
6impl Document {
7    /// Compute the document ID from content.
8    ///
9    /// The document ID is computed by hashing the canonicalized semantic content layer.
10    /// This covers only the content blocks and their structure, not presentation/layout
11    /// information. Presentation layers have their own hashes in the manifest.
12    ///
13    /// # Errors
14    ///
15    /// Returns an error if canonicalization fails.
16    pub fn compute_id(&self) -> Result<DocumentId> {
17        // Serialize content to canonical JSON
18        let content_json = serde_json::to_vec(&self.content)?;
19        let canonical =
20            json_canon::to_string(&serde_json::from_slice::<serde_json::Value>(&content_json)?)?;
21
22        Ok(Hasher::hash(
23            self.manifest.hash_algorithm,
24            canonical.as_bytes(),
25        ))
26    }
27
28    /// Verify the document integrity.
29    ///
30    /// This checks:
31    /// - Content hash matches manifest
32    /// - Document ID is valid (if not pending)
33    ///
34    /// # Errors
35    ///
36    /// Returns an error if verification fails.
37    pub fn verify(&self) -> Result<VerificationReport> {
38        let mut report = VerificationReport {
39            content_valid: true,
40            id_valid: true,
41            errors: Vec::new(),
42        };
43
44        // Verify content hash
45        // Note: must use to_vec_pretty to match what write_to uses
46        if !self.manifest.content.hash.is_pending() {
47            let content_json = serde_json::to_vec_pretty(&self.content)?;
48            let actual_hash = Hasher::hash(self.manifest.content.hash.algorithm(), &content_json);
49
50            if actual_hash != self.manifest.content.hash {
51                report.content_valid = false;
52                report.errors.push(format!(
53                    "Content hash mismatch: expected {}, got {}",
54                    self.manifest.content.hash, actual_hash
55                ));
56            }
57        }
58
59        // Verify document ID
60        if !self.manifest.id.is_pending() {
61            let computed_id = self.compute_id()?;
62            if computed_id != self.manifest.id {
63                report.id_valid = false;
64                report.errors.push(format!(
65                    "Document ID mismatch: expected {}, got {}",
66                    self.manifest.id, computed_id
67                ));
68            }
69        }
70
71        Ok(report)
72    }
73
74    /// Validate extension declarations.
75    ///
76    /// This checks that all extension namespaces used in the document's content
77    /// (blocks and marks) are declared in the manifest's extensions list.
78    ///
79    /// # Returns
80    ///
81    /// An `ExtensionValidationReport` containing:
82    /// - List of used extension namespaces
83    /// - List of declared extension namespaces
84    /// - List of undeclared (used but not declared) namespaces
85    /// - Warnings for any issues found
86    #[must_use]
87    pub fn validate_extensions(&self) -> ExtensionValidationReport {
88        // Collect declared namespaces
89        let declared_namespaces: Vec<String> = self
90            .manifest
91            .extensions
92            .iter()
93            .map(|e| e.namespace().to_string())
94            .collect();
95
96        // Collect used namespaces from content
97        let mut used = std::collections::HashSet::new();
98        Self::collect_extension_namespaces(&self.content.blocks, &mut used);
99
100        let mut used_namespaces: Vec<String> = used.iter().cloned().collect();
101        used_namespaces.sort();
102
103        // Find undeclared namespaces
104        let mut undeclared = Vec::new();
105        let mut warnings = Vec::new();
106        for namespace in &used_namespaces {
107            if !self.manifest.has_extension(namespace) {
108                undeclared.push(namespace.clone());
109                warnings.push(format!(
110                    "Extension namespace '{namespace}' is used but not declared in manifest"
111                ));
112            }
113        }
114
115        ExtensionValidationReport {
116            used_namespaces,
117            declared_namespaces,
118            undeclared,
119            unsupported_required: Vec::new(),
120            warnings,
121        }
122    }
123
124    /// Recursively collect extension namespaces from blocks.
125    fn collect_extension_namespaces(
126        blocks: &[Block],
127        namespaces: &mut std::collections::HashSet<String>,
128    ) {
129        for block in blocks {
130            // Check if this is an extension block
131            if let Some(ext) = block.as_extension() {
132                namespaces.insert(ext.namespace.clone());
133            }
134
135            // Recursively check children and collect marks from text nodes
136            match block {
137                Block::Paragraph { children, .. }
138                | Block::Heading { children, .. }
139                | Block::CodeBlock { children, .. }
140                | Block::DefinitionTerm { children, .. } => {
141                    Self::collect_marks_namespaces(children, namespaces);
142                }
143                Block::List { children, .. }
144                | Block::ListItem { children, .. }
145                | Block::Blockquote { children, .. }
146                | Block::Table { children, .. }
147                | Block::TableRow { children, .. }
148                | Block::DefinitionItem { children, .. }
149                | Block::DefinitionDescription { children, .. } => {
150                    Self::collect_extension_namespaces(children, namespaces);
151                }
152                Block::DefinitionList(dl) => {
153                    Self::collect_extension_namespaces(&dl.children, namespaces);
154                }
155                Block::TableCell(cell) => {
156                    Self::collect_marks_namespaces(&cell.children, namespaces);
157                }
158                Block::Figure(fig) => {
159                    Self::collect_extension_namespaces(&fig.children, namespaces);
160                }
161                Block::FigCaption(fc) => {
162                    Self::collect_marks_namespaces(&fc.children, namespaces);
163                }
164                Block::Admonition(adm) => {
165                    Self::collect_extension_namespaces(&adm.children, namespaces);
166                }
167                Block::Extension(ext) => {
168                    // Already handled above, but also check children
169                    Self::collect_extension_namespaces(&ext.children, namespaces);
170                }
171                // Leaf blocks without children
172                Block::HorizontalRule { .. }
173                | Block::Image(_)
174                | Block::Math(_)
175                | Block::Break { .. }
176                | Block::Measurement(_)
177                | Block::Signature(_)
178                | Block::Svg(_)
179                | Block::Barcode(_) => {}
180            }
181        }
182    }
183
184    /// Collect extension namespaces from text marks.
185    fn collect_marks_namespaces(
186        texts: &[Text],
187        namespaces: &mut std::collections::HashSet<String>,
188    ) {
189        for text in texts {
190            for mark in &text.marks {
191                if let Some(ext) = mark.as_extension() {
192                    namespaces.insert(ext.namespace.clone());
193                }
194            }
195        }
196    }
197}
198
199/// Report from document verification.
200#[derive(Debug, Clone)]
201pub struct VerificationReport {
202    /// Whether content hash is valid.
203    pub content_valid: bool,
204    /// Whether document ID is valid.
205    pub id_valid: bool,
206    /// Error messages.
207    pub errors: Vec<String>,
208}
209
210impl VerificationReport {
211    /// Check if verification passed.
212    #[must_use]
213    pub fn is_valid(&self) -> bool {
214        self.content_valid && self.id_valid && self.errors.is_empty()
215    }
216}
217
218/// Report from extension validation.
219///
220/// This report identifies which extension namespaces are used in the document
221/// content but not declared in the manifest's extensions list.
222#[derive(Debug, Clone, Default)]
223pub struct ExtensionValidationReport {
224    /// Extension namespaces used in content (from blocks and marks).
225    pub used_namespaces: Vec<String>,
226    /// Extension namespaces that are declared in the manifest.
227    pub declared_namespaces: Vec<String>,
228    /// Extension namespaces used but not declared.
229    pub undeclared: Vec<String>,
230    /// Extension namespaces declared as required but not supported by this reader.
231    /// (Currently empty since we support all built-in extensions)
232    pub unsupported_required: Vec<String>,
233    /// Warning messages.
234    pub warnings: Vec<String>,
235}
236
237impl ExtensionValidationReport {
238    /// Check if extension validation passed without warnings.
239    #[must_use]
240    pub fn is_valid(&self) -> bool {
241        self.undeclared.is_empty() && self.unsupported_required.is_empty()
242    }
243
244    /// Check if there are any warnings.
245    #[must_use]
246    pub fn has_warnings(&self) -> bool {
247        !self.warnings.is_empty()
248    }
249}