Skip to main content

cdx_core/validation/
mod.rs

1//! JSON Schema validation for Codex Document Format files.
2//!
3//! This module provides validation functions for the core CDX file types:
4//! - Manifest (`manifest.json`)
5//! - Content (`content/content.json`)
6//! - Dublin Core metadata (`metadata/dublin-core.json`)
7//!
8//! # Feature Flag
9//!
10//! This module requires the `validation` feature:
11//!
12//! ```toml
13//! [dependencies]
14//! cdx-core = { version = "0.1", features = ["validation"] }
15//! ```
16//!
17//! # Example
18//!
19//! ```rust,ignore
20//! use cdx_core::validation::{validate_manifest, validate_content};
21//!
22//! let manifest_json = r#"{"version": "0.1", "id": "sha256:abc..."}"#;
23//! let errors = validate_manifest(manifest_json)?;
24//! if errors.is_empty() {
25//!     println!("Manifest is valid");
26//! } else {
27//!     for error in errors {
28//!         println!("Validation error: {}", error);
29//!     }
30//! }
31//! ```
32
33use std::fmt;
34
35/// JSON schema validation error for manifest and metadata files.
36///
37/// Reports type mismatches, missing required properties, and invalid
38/// enum values when validating manifest, content, Dublin Core metadata,
39/// block index, and signature JSON files against their schemas.
40///
41/// See also [`crate::content::ValidationError`] for content structure
42/// validation (block hierarchy, unique IDs, etc.).
43#[derive(Debug, Clone)]
44pub struct SchemaValidationError {
45    /// JSON path to the invalid element (empty for root-level errors).
46    pub path: String,
47    /// Description of the validation failure.
48    pub message: String,
49}
50
51impl fmt::Display for SchemaValidationError {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        if self.path.is_empty() {
54            write!(f, "{}", self.message)
55        } else {
56            write!(f, "{}: {}", self.path, self.message)
57        }
58    }
59}
60
61impl std::error::Error for SchemaValidationError {}
62
63/// Result type for validation operations.
64pub type ValidationResult = Result<Vec<SchemaValidationError>, crate::Error>;
65
66/// Validate a manifest JSON string against the CDX manifest schema.
67///
68/// # Arguments
69///
70/// * `json` - The JSON string to validate
71///
72/// # Returns
73///
74/// A vector of validation errors. An empty vector means the JSON is valid.
75///
76/// # Errors
77///
78/// Returns an error if the JSON cannot be parsed.
79pub fn validate_manifest(json: &str) -> ValidationResult {
80    validate_json(json, SchemaType::Manifest)
81}
82
83/// Validate a content JSON string against the CDX content schema.
84///
85/// # Arguments
86///
87/// * `json` - The JSON string to validate
88///
89/// # Returns
90///
91/// A vector of validation errors. An empty vector means the JSON is valid.
92///
93/// # Errors
94///
95/// Returns an error if the JSON cannot be parsed.
96pub fn validate_content(json: &str) -> ValidationResult {
97    validate_json(json, SchemaType::Content)
98}
99
100/// Validate a Dublin Core metadata JSON string against the schema.
101///
102/// # Arguments
103///
104/// * `json` - The JSON string to validate
105///
106/// # Returns
107///
108/// A vector of validation errors. An empty vector means the JSON is valid.
109///
110/// # Errors
111///
112/// Returns an error if the JSON cannot be parsed.
113pub fn validate_dublin_core(json: &str) -> ValidationResult {
114    validate_json(json, SchemaType::DublinCore)
115}
116
117/// Validate a block index JSON string against the schema.
118///
119/// # Arguments
120///
121/// * `json` - The JSON string to validate
122///
123/// # Returns
124///
125/// A vector of validation errors. An empty vector means the JSON is valid.
126///
127/// # Errors
128///
129/// Returns an error if the JSON cannot be parsed.
130pub fn validate_block_index(json: &str) -> ValidationResult {
131    validate_json(json, SchemaType::BlockIndex)
132}
133
134/// Validate a signatures JSON string against the schema.
135///
136/// # Arguments
137///
138/// * `json` - The JSON string to validate
139///
140/// # Returns
141///
142/// A vector of validation errors. An empty vector means the JSON is valid.
143///
144/// # Errors
145///
146/// Returns an error if the JSON cannot be parsed.
147pub fn validate_signatures(json: &str) -> ValidationResult {
148    validate_json(json, SchemaType::Signatures)
149}
150
151/// Schema types for validation.
152#[derive(Debug, Clone, Copy, PartialEq, Eq)]
153enum SchemaType {
154    Manifest,
155    Content,
156    DublinCore,
157    BlockIndex,
158    Signatures,
159}
160
161/// Internal validation function.
162fn validate_json(json: &str, schema_type: SchemaType) -> ValidationResult {
163    // Parse the JSON
164    let value: serde_json::Value =
165        serde_json::from_str(json).map_err(|e| crate::Error::InvalidManifest {
166            reason: format!("Invalid JSON: {e}"),
167        })?;
168
169    // Get the schema for this type
170    let schema = get_schema(schema_type);
171
172    // Validate against schema
173    let mut errors = Vec::new();
174    validate_value(&value, &schema, "", &mut errors);
175
176    Ok(errors)
177}
178
179/// Get the schema definition for a given type.
180fn get_schema(schema_type: SchemaType) -> Schema {
181    match schema_type {
182        SchemaType::Manifest => manifest_schema(),
183        SchemaType::Content => content_schema(),
184        SchemaType::DublinCore => dublin_core_schema(),
185        SchemaType::BlockIndex => block_index_schema(),
186        SchemaType::Signatures => signatures_schema(),
187    }
188}
189
190/// Simple schema representation for validation.
191#[derive(Debug, Clone)]
192struct Schema {
193    /// Required properties.
194    required: Vec<&'static str>,
195    /// Property schemas.
196    properties: Vec<(&'static str, PropertySchema)>,
197}
198
199#[derive(Debug, Clone)]
200enum PropertySchema {
201    String,
202    Object,
203    Array,
204    Any,
205    StringEnum(Vec<&'static str>),
206}
207
208/// Validate a JSON value against a schema.
209fn validate_value(
210    value: &serde_json::Value,
211    schema: &Schema,
212    path: &str,
213    errors: &mut Vec<SchemaValidationError>,
214) {
215    let Some(obj) = value.as_object() else {
216        errors.push(SchemaValidationError {
217            path: path.to_string(),
218            message: "expected object".to_string(),
219        });
220        return;
221    };
222
223    // Check required properties
224    for required in &schema.required {
225        if !obj.contains_key(*required) {
226            errors.push(SchemaValidationError {
227                path: if path.is_empty() {
228                    (*required).to_string()
229                } else {
230                    format!("{path}.{required}")
231                },
232                message: format!("missing required property '{required}'"),
233            });
234        }
235    }
236
237    // Validate property types
238    for (prop_name, prop_schema) in &schema.properties {
239        if let Some(prop_value) = obj.get(*prop_name) {
240            let prop_path = if path.is_empty() {
241                (*prop_name).to_string()
242            } else {
243                format!("{path}.{prop_name}")
244            };
245            validate_property(prop_value, prop_schema, &prop_path, errors);
246        }
247    }
248}
249
250/// Validate a property value against its schema.
251fn validate_property(
252    value: &serde_json::Value,
253    schema: &PropertySchema,
254    path: &str,
255    errors: &mut Vec<SchemaValidationError>,
256) {
257    match schema {
258        PropertySchema::String => {
259            if !value.is_string() {
260                errors.push(SchemaValidationError {
261                    path: path.to_string(),
262                    message: format!("expected string, got {}", value_type_name(value)),
263                });
264            }
265        }
266        PropertySchema::Object => {
267            if !value.is_object() {
268                errors.push(SchemaValidationError {
269                    path: path.to_string(),
270                    message: format!("expected object, got {}", value_type_name(value)),
271                });
272            }
273        }
274        PropertySchema::Array => {
275            if !value.is_array() {
276                errors.push(SchemaValidationError {
277                    path: path.to_string(),
278                    message: format!("expected array, got {}", value_type_name(value)),
279                });
280            }
281        }
282        PropertySchema::Any => {
283            // Any type is valid
284        }
285        PropertySchema::StringEnum(variants) => {
286            if let Some(s) = value.as_str() {
287                if !variants.contains(&s) {
288                    errors.push(SchemaValidationError {
289                        path: path.to_string(),
290                        message: format!(
291                            "invalid value '{}', expected one of: {}",
292                            s,
293                            variants.join(", ")
294                        ),
295                    });
296                }
297            } else {
298                errors.push(SchemaValidationError {
299                    path: path.to_string(),
300                    message: format!("expected string, got {}", value_type_name(value)),
301                });
302            }
303        }
304    }
305}
306
307/// Get a human-readable type name for a JSON value.
308fn value_type_name(value: &serde_json::Value) -> &'static str {
309    match value {
310        serde_json::Value::Null => "null",
311        serde_json::Value::Bool(_) => "boolean",
312        serde_json::Value::Number(_) => "number",
313        serde_json::Value::String(_) => "string",
314        serde_json::Value::Array(_) => "array",
315        serde_json::Value::Object(_) => "object",
316    }
317}
318
319// Schema definitions
320
321fn manifest_schema() -> Schema {
322    Schema {
323        required: vec!["version"],
324        properties: vec![
325            ("version", PropertySchema::String),
326            ("id", PropertySchema::String),
327            (
328                "state",
329                PropertySchema::StringEnum(vec!["draft", "review", "frozen", "published"]),
330            ),
331            ("created", PropertySchema::String),
332            ("modified", PropertySchema::String),
333            ("content", PropertySchema::Object),
334            ("metadata", PropertySchema::Object),
335            ("security", PropertySchema::Object),
336            ("presentation", PropertySchema::Object),
337            ("assets", PropertySchema::Object),
338            ("lineage", PropertySchema::Object),
339        ],
340    }
341}
342
343fn content_schema() -> Schema {
344    Schema {
345        required: vec!["version", "blocks"],
346        properties: vec![
347            ("version", PropertySchema::String),
348            ("blocks", PropertySchema::Array),
349        ],
350    }
351}
352
353fn dublin_core_schema() -> Schema {
354    Schema {
355        required: vec!["version"],
356        properties: vec![
357            ("version", PropertySchema::String),
358            ("title", PropertySchema::String),
359            ("creator", PropertySchema::Any), // Can be string or array
360            ("subject", PropertySchema::Any),
361            ("description", PropertySchema::String),
362            ("publisher", PropertySchema::String),
363            ("contributor", PropertySchema::Any),
364            ("date", PropertySchema::String),
365            ("type", PropertySchema::String),
366            ("format", PropertySchema::String),
367            ("identifier", PropertySchema::String),
368            ("source", PropertySchema::String),
369            ("language", PropertySchema::String),
370            ("relation", PropertySchema::String),
371            ("coverage", PropertySchema::String),
372            ("rights", PropertySchema::String),
373        ],
374    }
375}
376
377fn block_index_schema() -> Schema {
378    Schema {
379        required: vec!["version", "algorithm", "root", "blocks"],
380        properties: vec![
381            ("version", PropertySchema::String),
382            (
383                "algorithm",
384                PropertySchema::StringEnum(vec!["sha256", "sha384", "sha512", "blake3"]),
385            ),
386            ("root", PropertySchema::String),
387            ("blocks", PropertySchema::Array),
388        ],
389    }
390}
391
392fn signatures_schema() -> Schema {
393    Schema {
394        required: vec!["version", "signatures"],
395        properties: vec![
396            ("version", PropertySchema::String),
397            ("signatures", PropertySchema::Array),
398        ],
399    }
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn test_validate_manifest_valid() {
408        let json = r#"{
409            "version": "0.1",
410            "state": "draft",
411            "created": "2024-01-01T00:00:00Z"
412        }"#;
413
414        let errors = validate_manifest(json).unwrap();
415        assert!(errors.is_empty(), "Expected no errors: {errors:?}");
416    }
417
418    #[test]
419    fn test_validate_manifest_missing_version() {
420        let json = r#"{
421            "state": "draft"
422        }"#;
423
424        let errors = validate_manifest(json).unwrap();
425        assert_eq!(errors.len(), 1);
426        assert!(errors[0].message.contains("version"));
427    }
428
429    #[test]
430    fn test_validate_manifest_invalid_state() {
431        let json = r#"{
432            "version": "0.1",
433            "state": "invalid"
434        }"#;
435
436        let errors = validate_manifest(json).unwrap();
437        assert_eq!(errors.len(), 1);
438        assert!(errors[0].message.contains("invalid"));
439    }
440
441    #[test]
442    fn test_validate_manifest_wrong_type() {
443        let json = r#"{
444            "version": 123
445        }"#;
446
447        let errors = validate_manifest(json).unwrap();
448        assert_eq!(errors.len(), 1);
449        assert!(errors[0].message.contains("string"));
450    }
451
452    #[test]
453    fn test_validate_content_valid() {
454        let json = r#"{
455            "version": "0.1",
456            "blocks": []
457        }"#;
458
459        let errors = validate_content(json).unwrap();
460        assert!(errors.is_empty());
461    }
462
463    #[test]
464    fn test_validate_content_missing_blocks() {
465        let json = r#"{
466            "version": "0.1"
467        }"#;
468
469        let errors = validate_content(json).unwrap();
470        assert_eq!(errors.len(), 1);
471        assert!(errors[0].message.contains("blocks"));
472    }
473
474    #[test]
475    fn test_validate_dublin_core_valid() {
476        let json = r#"{
477            "version": "0.1",
478            "title": "Test Document",
479            "creator": "Test Author"
480        }"#;
481
482        let errors = validate_dublin_core(json).unwrap();
483        assert!(errors.is_empty());
484    }
485
486    #[test]
487    fn test_validate_dublin_core_array_creator() {
488        let json = r#"{
489            "version": "0.1",
490            "title": "Test Document",
491            "creator": ["Author 1", "Author 2"]
492        }"#;
493
494        let errors = validate_dublin_core(json).unwrap();
495        assert!(errors.is_empty());
496    }
497
498    #[test]
499    fn test_validate_block_index_valid() {
500        let json = r#"{
501            "version": "0.1",
502            "algorithm": "sha256",
503            "root": "abc123",
504            "blocks": []
505        }"#;
506
507        let errors = validate_block_index(json).unwrap();
508        assert!(errors.is_empty());
509    }
510
511    #[test]
512    fn test_validate_block_index_invalid_algorithm() {
513        let json = r#"{
514            "version": "0.1",
515            "algorithm": "md5",
516            "root": "abc123",
517            "blocks": []
518        }"#;
519
520        let errors = validate_block_index(json).unwrap();
521        assert_eq!(errors.len(), 1);
522        assert!(errors[0].message.contains("md5"));
523    }
524
525    #[test]
526    fn test_validate_signatures_valid() {
527        let json = r#"{
528            "version": "0.1",
529            "signatures": []
530        }"#;
531
532        let errors = validate_signatures(json).unwrap();
533        assert!(errors.is_empty());
534    }
535
536    #[test]
537    fn test_validate_invalid_json() {
538        let json = "not valid json";
539
540        let result = validate_manifest(json);
541        assert!(result.is_err());
542    }
543
544    #[test]
545    fn test_error_display() {
546        let error = SchemaValidationError {
547            path: "manifest.version".to_string(),
548            message: "expected string".to_string(),
549        };
550        assert_eq!(error.to_string(), "manifest.version: expected string");
551    }
552
553    #[test]
554    fn test_error_display_empty_path() {
555        let error = SchemaValidationError {
556            path: String::new(),
557            message: "expected object".to_string(),
558        };
559        assert_eq!(error.to_string(), "expected object");
560    }
561}