Skip to main content

cdx_core/validation/
mod.rs

1//! JSON Schema validation for Codex Document Format files.
2//!
3//! This module provides validation functions for the core CDX file types:
4//! - Manifest (`manifest.json`)
5//! - Content (`content/content.json`)
6//! - Dublin Core metadata (`metadata/dublin-core.json`)
7//!
8//! # Feature Flag
9//!
10//! This module requires the `validation` feature:
11//!
12//! ```toml
13//! [dependencies]
14//! cdx-core = { version = "0.1", features = ["validation"] }
15//! ```
16//!
17//! # Example
18//!
19//! ```rust,ignore
20//! use cdx_core::validation::{validate_manifest, validate_content};
21//!
22//! let manifest_json = r#"{"version": "0.1", "id": "sha256:abc..."}"#;
23//! let errors = validate_manifest(manifest_json)?;
24//! if errors.is_empty() {
25//!     println!("Manifest is valid");
26//! } else {
27//!     for error in errors {
28//!         println!("Validation error: {}", error);
29//!     }
30//! }
31//! ```
32
33use std::fmt;
34
35/// JSON schema validation error for manifest and metadata files.
36///
37/// Reports type mismatches, missing required properties, and invalid
38/// enum values when validating manifest, content, Dublin Core metadata,
39/// block index, and signature JSON files against their schemas.
40///
41/// See also [`crate::content::ValidationError`] for content structure
42/// validation (block hierarchy, unique IDs, etc.).
43#[derive(Debug, Clone)]
44pub struct SchemaValidationError {
45    /// JSON path to the invalid element (empty for root-level errors).
46    pub path: String,
47    /// Description of the validation failure.
48    pub message: String,
49}
50
51impl fmt::Display for SchemaValidationError {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        if self.path.is_empty() {
54            write!(f, "{}", self.message)
55        } else {
56            write!(f, "{}: {}", self.path, self.message)
57        }
58    }
59}
60
61impl std::error::Error for SchemaValidationError {}
62
63/// Result type for validation operations.
64pub type ValidationResult = Result<Vec<SchemaValidationError>, crate::Error>;
65
66/// Validate a manifest JSON string against the CDX manifest schema.
67///
68/// # Arguments
69///
70/// * `json` - The JSON string to validate
71///
72/// # Returns
73///
74/// A vector of validation errors. An empty vector means the JSON is valid.
75///
76/// # Errors
77///
78/// Returns an error if the JSON cannot be parsed.
79pub fn validate_manifest(json: &str) -> ValidationResult {
80    validate_json(json, SchemaType::Manifest)
81}
82
83/// Validate a content JSON string against the CDX content schema.
84///
85/// # Arguments
86///
87/// * `json` - The JSON string to validate
88///
89/// # Returns
90///
91/// A vector of validation errors. An empty vector means the JSON is valid.
92///
93/// # Errors
94///
95/// Returns an error if the JSON cannot be parsed.
96pub fn validate_content(json: &str) -> ValidationResult {
97    validate_json(json, SchemaType::Content)
98}
99
100/// Validate a Dublin Core metadata JSON string against the schema.
101///
102/// # Arguments
103///
104/// * `json` - The JSON string to validate
105///
106/// # Returns
107///
108/// A vector of validation errors. An empty vector means the JSON is valid.
109///
110/// # Errors
111///
112/// Returns an error if the JSON cannot be parsed.
113pub fn validate_dublin_core(json: &str) -> ValidationResult {
114    validate_json(json, SchemaType::DublinCore)
115}
116
117/// Validate a block index JSON string against the schema.
118///
119/// # Arguments
120///
121/// * `json` - The JSON string to validate
122///
123/// # Returns
124///
125/// A vector of validation errors. An empty vector means the JSON is valid.
126///
127/// # Errors
128///
129/// Returns an error if the JSON cannot be parsed.
130pub fn validate_block_index(json: &str) -> ValidationResult {
131    validate_json(json, SchemaType::BlockIndex)
132}
133
134/// Validate a signatures JSON string against the schema.
135///
136/// # Arguments
137///
138/// * `json` - The JSON string to validate
139///
140/// # Returns
141///
142/// A vector of validation errors. An empty vector means the JSON is valid.
143///
144/// # Errors
145///
146/// Returns an error if the JSON cannot be parsed.
147pub fn validate_signatures(json: &str) -> ValidationResult {
148    validate_json(json, SchemaType::Signatures)
149}
150
151/// Schema types for validation.
152#[derive(Debug, Clone, Copy, PartialEq, Eq)]
153enum SchemaType {
154    Manifest,
155    Content,
156    DublinCore,
157    BlockIndex,
158    Signatures,
159}
160
161/// Internal validation function.
162fn validate_json(json: &str, schema_type: SchemaType) -> ValidationResult {
163    // Parse the JSON
164    let type_name = match schema_type {
165        SchemaType::Manifest => "manifest",
166        SchemaType::Content => "content",
167        SchemaType::DublinCore => "Dublin Core metadata",
168        SchemaType::BlockIndex => "block index",
169        SchemaType::Signatures => "signatures",
170    };
171    let value: serde_json::Value =
172        serde_json::from_str(json).map_err(|e| crate::Error::ValidationFailed {
173            reason: format!("Invalid {type_name} JSON: {e}"),
174        })?;
175
176    // Get the schema for this type
177    let schema = get_schema(schema_type);
178
179    // Validate against schema
180    let mut errors = Vec::new();
181    validate_value(&value, &schema, "", &mut errors);
182
183    Ok(errors)
184}
185
186/// Get the schema definition for a given type.
187fn get_schema(schema_type: SchemaType) -> Schema {
188    match schema_type {
189        SchemaType::Manifest => manifest_schema(),
190        SchemaType::Content => content_schema(),
191        SchemaType::DublinCore => dublin_core_schema(),
192        SchemaType::BlockIndex => block_index_schema(),
193        SchemaType::Signatures => signatures_schema(),
194    }
195}
196
197/// Simple schema representation for validation.
198#[derive(Debug, Clone)]
199struct Schema {
200    /// Required properties.
201    required: Vec<&'static str>,
202    /// Property schemas.
203    properties: Vec<(&'static str, PropertySchema)>,
204}
205
206#[derive(Debug, Clone)]
207enum PropertySchema {
208    String,
209    Object,
210    Array,
211    Any,
212    StringEnum(Vec<&'static str>),
213}
214
215/// Validate a JSON value against a schema.
216fn validate_value(
217    value: &serde_json::Value,
218    schema: &Schema,
219    path: &str,
220    errors: &mut Vec<SchemaValidationError>,
221) {
222    let Some(obj) = value.as_object() else {
223        errors.push(SchemaValidationError {
224            path: path.to_string(),
225            message: "expected object".to_string(),
226        });
227        return;
228    };
229
230    // Check required properties
231    for required in &schema.required {
232        if !obj.contains_key(*required) {
233            errors.push(SchemaValidationError {
234                path: if path.is_empty() {
235                    (*required).to_string()
236                } else {
237                    format!("{path}.{required}")
238                },
239                message: format!("missing required property '{required}'"),
240            });
241        }
242    }
243
244    // Validate property types
245    for (prop_name, prop_schema) in &schema.properties {
246        if let Some(prop_value) = obj.get(*prop_name) {
247            let prop_path = if path.is_empty() {
248                (*prop_name).to_string()
249            } else {
250                format!("{path}.{prop_name}")
251            };
252            validate_property(prop_value, prop_schema, &prop_path, errors);
253        }
254    }
255}
256
257/// Validate a property value against its schema.
258fn validate_property(
259    value: &serde_json::Value,
260    schema: &PropertySchema,
261    path: &str,
262    errors: &mut Vec<SchemaValidationError>,
263) {
264    match schema {
265        PropertySchema::String => {
266            if !value.is_string() {
267                errors.push(SchemaValidationError {
268                    path: path.to_string(),
269                    message: format!("expected string, got {}", value_type_name(value)),
270                });
271            }
272        }
273        PropertySchema::Object => {
274            if !value.is_object() {
275                errors.push(SchemaValidationError {
276                    path: path.to_string(),
277                    message: format!("expected object, got {}", value_type_name(value)),
278                });
279            }
280        }
281        PropertySchema::Array => {
282            if !value.is_array() {
283                errors.push(SchemaValidationError {
284                    path: path.to_string(),
285                    message: format!("expected array, got {}", value_type_name(value)),
286                });
287            }
288        }
289        PropertySchema::Any => {
290            // Any type is valid
291        }
292        PropertySchema::StringEnum(variants) => {
293            if let Some(s) = value.as_str() {
294                if !variants.contains(&s) {
295                    errors.push(SchemaValidationError {
296                        path: path.to_string(),
297                        message: format!(
298                            "invalid value '{}', expected one of: {}",
299                            s,
300                            variants.join(", ")
301                        ),
302                    });
303                }
304            } else {
305                errors.push(SchemaValidationError {
306                    path: path.to_string(),
307                    message: format!("expected string, got {}", value_type_name(value)),
308                });
309            }
310        }
311    }
312}
313
314/// Get a human-readable type name for a JSON value.
315fn value_type_name(value: &serde_json::Value) -> &'static str {
316    match value {
317        serde_json::Value::Null => "null",
318        serde_json::Value::Bool(_) => "boolean",
319        serde_json::Value::Number(_) => "number",
320        serde_json::Value::String(_) => "string",
321        serde_json::Value::Array(_) => "array",
322        serde_json::Value::Object(_) => "object",
323    }
324}
325
326// Schema definitions
327
328fn manifest_schema() -> Schema {
329    Schema {
330        required: vec!["version"],
331        properties: vec![
332            ("version", PropertySchema::String),
333            ("id", PropertySchema::String),
334            (
335                "state",
336                PropertySchema::StringEnum(vec!["draft", "review", "frozen", "published"]),
337            ),
338            ("created", PropertySchema::String),
339            ("modified", PropertySchema::String),
340            ("content", PropertySchema::Object),
341            ("metadata", PropertySchema::Object),
342            ("security", PropertySchema::Object),
343            ("presentation", PropertySchema::Object),
344            ("assets", PropertySchema::Object),
345            ("lineage", PropertySchema::Object),
346        ],
347    }
348}
349
350fn content_schema() -> Schema {
351    Schema {
352        required: vec!["version", "blocks"],
353        properties: vec![
354            ("version", PropertySchema::String),
355            ("blocks", PropertySchema::Array),
356        ],
357    }
358}
359
360fn dublin_core_schema() -> Schema {
361    Schema {
362        required: vec!["version"],
363        properties: vec![
364            ("version", PropertySchema::String),
365            ("title", PropertySchema::String),
366            ("creator", PropertySchema::Any), // Can be string or array
367            ("subject", PropertySchema::Any),
368            ("description", PropertySchema::String),
369            ("publisher", PropertySchema::String),
370            ("contributor", PropertySchema::Any),
371            ("date", PropertySchema::String),
372            ("type", PropertySchema::String),
373            ("format", PropertySchema::String),
374            ("identifier", PropertySchema::String),
375            ("source", PropertySchema::String),
376            ("language", PropertySchema::String),
377            ("relation", PropertySchema::String),
378            ("coverage", PropertySchema::String),
379            ("rights", PropertySchema::String),
380        ],
381    }
382}
383
384fn block_index_schema() -> Schema {
385    Schema {
386        required: vec!["version", "algorithm", "root", "blocks"],
387        properties: vec![
388            ("version", PropertySchema::String),
389            (
390                "algorithm",
391                PropertySchema::StringEnum(vec!["sha256", "sha384", "sha512", "blake3"]),
392            ),
393            ("root", PropertySchema::String),
394            ("blocks", PropertySchema::Array),
395        ],
396    }
397}
398
399fn signatures_schema() -> Schema {
400    Schema {
401        required: vec!["version", "signatures"],
402        properties: vec![
403            ("version", PropertySchema::String),
404            ("signatures", PropertySchema::Array),
405        ],
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn test_validate_manifest_valid() {
415        let json = r#"{
416            "version": "0.1",
417            "state": "draft",
418            "created": "2024-01-01T00:00:00Z"
419        }"#;
420
421        let errors = validate_manifest(json).unwrap();
422        assert!(errors.is_empty(), "Expected no errors: {errors:?}");
423    }
424
425    #[test]
426    fn test_validate_manifest_missing_version() {
427        let json = r#"{
428            "state": "draft"
429        }"#;
430
431        let errors = validate_manifest(json).unwrap();
432        assert_eq!(errors.len(), 1);
433        assert!(errors[0].message.contains("version"));
434    }
435
436    #[test]
437    fn test_validate_manifest_invalid_state() {
438        let json = r#"{
439            "version": "0.1",
440            "state": "invalid"
441        }"#;
442
443        let errors = validate_manifest(json).unwrap();
444        assert_eq!(errors.len(), 1);
445        assert!(errors[0].message.contains("invalid"));
446    }
447
448    #[test]
449    fn test_validate_manifest_wrong_type() {
450        let json = r#"{
451            "version": 123
452        }"#;
453
454        let errors = validate_manifest(json).unwrap();
455        assert_eq!(errors.len(), 1);
456        assert!(errors[0].message.contains("string"));
457    }
458
459    #[test]
460    fn test_validate_content_valid() {
461        let json = r#"{
462            "version": "0.1",
463            "blocks": []
464        }"#;
465
466        let errors = validate_content(json).unwrap();
467        assert!(errors.is_empty());
468    }
469
470    #[test]
471    fn test_validate_content_missing_blocks() {
472        let json = r#"{
473            "version": "0.1"
474        }"#;
475
476        let errors = validate_content(json).unwrap();
477        assert_eq!(errors.len(), 1);
478        assert!(errors[0].message.contains("blocks"));
479    }
480
481    #[test]
482    fn test_validate_dublin_core_valid() {
483        let json = r#"{
484            "version": "0.1",
485            "title": "Test Document",
486            "creator": "Test Author"
487        }"#;
488
489        let errors = validate_dublin_core(json).unwrap();
490        assert!(errors.is_empty());
491    }
492
493    #[test]
494    fn test_validate_dublin_core_array_creator() {
495        let json = r#"{
496            "version": "0.1",
497            "title": "Test Document",
498            "creator": ["Author 1", "Author 2"]
499        }"#;
500
501        let errors = validate_dublin_core(json).unwrap();
502        assert!(errors.is_empty());
503    }
504
505    #[test]
506    fn test_validate_block_index_valid() {
507        let json = r#"{
508            "version": "0.1",
509            "algorithm": "sha256",
510            "root": "abc123",
511            "blocks": []
512        }"#;
513
514        let errors = validate_block_index(json).unwrap();
515        assert!(errors.is_empty());
516    }
517
518    #[test]
519    fn test_validate_block_index_invalid_algorithm() {
520        let json = r#"{
521            "version": "0.1",
522            "algorithm": "md5",
523            "root": "abc123",
524            "blocks": []
525        }"#;
526
527        let errors = validate_block_index(json).unwrap();
528        assert_eq!(errors.len(), 1);
529        assert!(errors[0].message.contains("md5"));
530    }
531
532    #[test]
533    fn test_validate_signatures_valid() {
534        let json = r#"{
535            "version": "0.1",
536            "signatures": []
537        }"#;
538
539        let errors = validate_signatures(json).unwrap();
540        assert!(errors.is_empty());
541    }
542
543    #[test]
544    fn test_validate_invalid_json() {
545        let json = "not valid json";
546
547        let result = validate_manifest(json);
548        assert!(result.is_err());
549    }
550
551    #[test]
552    fn test_error_display() {
553        let error = SchemaValidationError {
554            path: "manifest.version".to_string(),
555            message: "expected string".to_string(),
556        };
557        assert_eq!(error.to_string(), "manifest.version: expected string");
558    }
559
560    #[test]
561    fn test_error_display_empty_path() {
562        let error = SchemaValidationError {
563            path: String::new(),
564            message: "expected object".to_string(),
565        };
566        assert_eq!(error.to_string(), "expected object");
567    }
568}