quillmark_core/
schema.rs

1//! Schema validation and utilities for Quillmark.
2//!
3//! This module provides utilities for converting TOML field definitions to JSON Schema
4//! and validating ParsedDocument data against schemas.
5
6use crate::{quill::FieldSchema, QuillValue, RenderError};
7use serde_json::{json, Map, Value};
8use std::collections::HashMap;
9
10/// Convert a HashMap of FieldSchema to a JSON Schema object
11pub fn build_schema_from_fields(
12    field_schemas: &HashMap<String, FieldSchema>,
13) -> Result<QuillValue, RenderError> {
14    let mut properties = Map::new();
15    let mut required_fields = Vec::new();
16
17    for (field_name, field_schema) in field_schemas {
18        // Build property schema
19        let mut property = Map::new();
20
21        // Add name
22        property.insert("name".to_string(), Value::String(field_schema.name.clone()));
23
24        // Add type if specified
25        if let Some(ref field_type) = field_schema.r#type {
26            let json_type = match field_type.as_str() {
27                "str" => "string",
28                "number" => "number",
29                "array" => "array",
30                "dict" => "object",
31                "date" => "string",
32                "datetime" => "string",
33                _ => "string", // default to string for unknown types
34            };
35            property.insert("type".to_string(), Value::String(json_type.to_string()));
36
37            // Add format for date types
38            if field_type == "date" {
39                property.insert("format".to_string(), Value::String("date".to_string()));
40            } else if field_type == "datetime" {
41                property.insert("format".to_string(), Value::String("date-time".to_string()));
42            }
43        }
44
45        // Add description
46        property.insert(
47            "description".to_string(),
48            Value::String(field_schema.description.clone()),
49        );
50
51        let mut examples_array = if let Some(ref examples) = field_schema.examples {
52            examples.as_array().cloned().unwrap_or_else(Vec::new)
53        } else {
54            Vec::new()
55        };
56
57        // Add example (singular) if specified after examples
58        if let Some(ref example) = field_schema.example {
59            examples_array.push(example.as_json().clone());
60        }
61        if !examples_array.is_empty() {
62            property.insert("examples".to_string(), Value::Array(examples_array));
63        }
64
65        // Add default if specified
66        if let Some(ref default) = field_schema.default {
67            property.insert("default".to_string(), default.as_json().clone());
68        }
69
70        properties.insert(field_name.clone(), Value::Object(property));
71
72        // Determine if field is required based on the spec:
73        // - If default is present → field is optional
74        // - If default is absent → field is required
75        if field_schema.default.is_none() {
76            required_fields.push(field_name.clone());
77        }
78    }
79
80    // Build the complete JSON Schema
81    let schema = json!({
82        "$schema": "https://json-schema.org/draft/2019-09/schema",
83        "type": "object",
84        "properties": properties,
85        "required": required_fields,
86        "additionalProperties": true
87    });
88
89    Ok(QuillValue::from_json(schema))
90}
91
92/// Extract default values from a JSON Schema
93///
94/// Parses the JSON schema's "properties" object and extracts any "default" values
95/// defined for each property. Returns a HashMap mapping field names to their default
96/// values.
97///
98/// # Arguments
99///
100/// * `schema` - A JSON Schema object (must have "properties" field)
101///
102/// # Returns
103///
104/// A HashMap of field names to their default QuillValues
105pub fn extract_defaults_from_schema(
106    schema: &QuillValue,
107) -> HashMap<String, crate::value::QuillValue> {
108    let mut defaults = HashMap::new();
109
110    // Get the properties object from the schema
111    if let Some(properties) = schema.as_json().get("properties") {
112        if let Some(properties_obj) = properties.as_object() {
113            for (field_name, field_schema) in properties_obj {
114                // Check if this field has a default value
115                if let Some(default_value) = field_schema.get("default") {
116                    defaults.insert(
117                        field_name.clone(),
118                        QuillValue::from_json(default_value.clone()),
119                    );
120                }
121            }
122        }
123    }
124
125    defaults
126}
127
128/// Extract example values from a JSON Schema
129///
130/// Parses the JSON schema's "properties" object and extracts any "examples" arrays
131/// defined for each property. Returns a HashMap mapping field names to their examples
132/// (as an array of QuillValues).
133///
134/// # Arguments
135///
136/// * `schema` - A JSON Schema object (must have "properties" field)
137///
138/// # Returns
139///
140/// A HashMap of field names to their examples (``Vec<QuillValue>``)
141pub fn extract_examples_from_schema(
142    schema: &QuillValue,
143) -> HashMap<String, Vec<crate::value::QuillValue>> {
144    let mut examples = HashMap::new();
145
146    // Get the properties object from the schema
147    if let Some(properties) = schema.as_json().get("properties") {
148        if let Some(properties_obj) = properties.as_object() {
149            for (field_name, field_schema) in properties_obj {
150                // Check if this field has examples
151                if let Some(examples_value) = field_schema.get("examples") {
152                    if let Some(examples_array) = examples_value.as_array() {
153                        let examples_vec: Vec<QuillValue> = examples_array
154                            .iter()
155                            .map(|v| QuillValue::from_json(v.clone()))
156                            .collect();
157                        if !examples_vec.is_empty() {
158                            examples.insert(field_name.clone(), examples_vec);
159                        }
160                    }
161                }
162            }
163        }
164    }
165
166    examples
167}
168
169/// Validate a document's fields against a JSON Schema
170pub fn validate_document(
171    schema: &QuillValue,
172    fields: &HashMap<String, crate::value::QuillValue>,
173) -> Result<(), Vec<String>> {
174    // Convert fields to JSON Value for validation
175    let mut doc_json = Map::new();
176    for (key, value) in fields {
177        doc_json.insert(key.clone(), value.as_json().clone());
178    }
179    let doc_value = Value::Object(doc_json);
180
181    // Compile the schema
182    let compiled = match jsonschema::Validator::new(schema.as_json()) {
183        Ok(c) => c,
184        Err(e) => return Err(vec![format!("Failed to compile schema: {}", e)]),
185    };
186
187    // Validate the document and collect errors immediately
188    let validation_result = compiled.validate(&doc_value);
189
190    match validation_result {
191        Ok(_) => Ok(()),
192        Err(error) => {
193            let path = error.instance_path.to_string();
194            let path_display = if path.is_empty() {
195                "document".to_string()
196            } else {
197                path
198            };
199            let message = format!("Validation error at {}: {}", path_display, error);
200            Err(vec![message])
201        }
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use crate::quill::FieldSchema;
209    use crate::value::QuillValue;
210
211    #[test]
212    fn test_build_schema_simple() {
213        let mut fields = HashMap::new();
214        let mut schema = FieldSchema::new(
215            "Author name".to_string(),
216            "The name of the author".to_string(),
217        );
218        schema.r#type = Some("str".to_string());
219        fields.insert("author".to_string(), schema);
220
221        let json_schema = build_schema_from_fields(&fields).unwrap().as_json().clone();
222        assert_eq!(json_schema["type"], "object");
223        assert_eq!(json_schema["properties"]["author"]["type"], "string");
224        assert_eq!(json_schema["properties"]["author"]["name"], "Author name");
225        assert_eq!(
226            json_schema["properties"]["author"]["description"],
227            "The name of the author"
228        );
229    }
230
231    #[test]
232    fn test_build_schema_with_default() {
233        let mut fields = HashMap::new();
234        let mut schema = FieldSchema::new(
235            "Field with default".to_string(),
236            "A field with a default value".to_string(),
237        );
238        schema.r#type = Some("str".to_string());
239        schema.default = Some(QuillValue::from_json(json!("default value")));
240        // When default is present, field should be optional regardless of required flag
241        fields.insert("with_default".to_string(), schema);
242
243        build_schema_from_fields(&fields).unwrap();
244    }
245
246    #[test]
247    fn test_build_schema_date_types() {
248        let mut fields = HashMap::new();
249
250        let mut date_schema =
251            FieldSchema::new("Date field".to_string(), "A field for dates".to_string());
252        date_schema.r#type = Some("date".to_string());
253        fields.insert("date_field".to_string(), date_schema);
254
255        let mut datetime_schema = FieldSchema::new(
256            "DateTime field".to_string(),
257            "A field for date and time".to_string(),
258        );
259        datetime_schema.r#type = Some("datetime".to_string());
260        fields.insert("datetime_field".to_string(), datetime_schema);
261
262        let json_schema = build_schema_from_fields(&fields).unwrap().as_json().clone();
263        assert_eq!(json_schema["properties"]["date_field"]["type"], "string");
264        assert_eq!(json_schema["properties"]["date_field"]["format"], "date");
265        assert_eq!(
266            json_schema["properties"]["datetime_field"]["type"],
267            "string"
268        );
269        assert_eq!(
270            json_schema["properties"]["datetime_field"]["format"],
271            "date-time"
272        );
273    }
274
275    #[test]
276    fn test_validate_document_success() {
277        let schema = json!({
278            "$schema": "https://json-schema.org/draft/2019-09/schema",
279            "type": "object",
280            "properties": {
281                "title": {"type": "string"},
282                "count": {"type": "number"}
283            },
284            "required": ["title"],
285            "additionalProperties": true
286        });
287
288        let mut fields = HashMap::new();
289        fields.insert(
290            "title".to_string(),
291            QuillValue::from_json(json!("Test Title")),
292        );
293        fields.insert("count".to_string(), QuillValue::from_json(json!(42)));
294
295        let result = validate_document(&QuillValue::from_json(schema), &fields);
296        assert!(result.is_ok());
297    }
298
299    #[test]
300    fn test_validate_document_missing_required() {
301        let schema = json!({
302            "$schema": "https://json-schema.org/draft/2019-09/schema",
303            "type": "object",
304            "properties": {
305                "title": {"type": "string"}
306            },
307            "required": ["title"],
308            "additionalProperties": true
309        });
310
311        let fields = HashMap::new(); // empty, missing required field
312
313        let result = validate_document(&QuillValue::from_json(schema), &fields);
314        assert!(result.is_err());
315        let errors = result.unwrap_err();
316        assert!(!errors.is_empty());
317    }
318
319    #[test]
320    fn test_validate_document_wrong_type() {
321        let schema = json!({
322            "$schema": "https://json-schema.org/draft/2019-09/schema",
323            "type": "object",
324            "properties": {
325                "count": {"type": "number"}
326            },
327            "additionalProperties": true
328        });
329
330        let mut fields = HashMap::new();
331        fields.insert(
332            "count".to_string(),
333            QuillValue::from_json(json!("not a number")),
334        );
335
336        let result = validate_document(&QuillValue::from_json(schema), &fields);
337        assert!(result.is_err());
338    }
339
340    #[test]
341    fn test_validate_document_allows_extra_fields() {
342        let schema = json!({
343            "$schema": "https://json-schema.org/draft/2019-09/schema",
344            "type": "object",
345            "properties": {
346                "title": {"type": "string"}
347            },
348            "required": ["title"],
349            "additionalProperties": true
350        });
351
352        let mut fields = HashMap::new();
353        fields.insert("title".to_string(), QuillValue::from_json(json!("Test")));
354        fields.insert("extra".to_string(), QuillValue::from_json(json!("allowed")));
355
356        let result = validate_document(&QuillValue::from_json(schema), &fields);
357        assert!(result.is_ok());
358    }
359
360    #[test]
361    fn test_build_schema_with_example() {
362        let mut fields = HashMap::new();
363        let mut schema = FieldSchema::new(
364            "memo_for".to_string(),
365            "List of recipient organization symbols".to_string(),
366        );
367        schema.r#type = Some("array".to_string());
368        schema.example = Some(QuillValue::from_json(json!(["ORG1/SYMBOL", "ORG2/SYMBOL"])));
369        fields.insert("memo_for".to_string(), schema);
370
371        let json_schema = build_schema_from_fields(&fields).unwrap().as_json().clone();
372
373        // Verify that example field is present in the schema
374        assert!(json_schema["properties"]["memo_for"]
375            .as_object()
376            .unwrap()
377            .contains_key("examples"));
378
379        let example_value = &json_schema["properties"]["memo_for"]["examples"][0];
380        assert_eq!(example_value, &json!(["ORG1/SYMBOL", "ORG2/SYMBOL"]));
381    }
382
383    #[test]
384    fn test_build_schema_includes_default_in_properties() {
385        let mut fields = HashMap::new();
386        let mut schema = FieldSchema::new(
387            "ice_cream".to_string(),
388            "favorite ice cream flavor".to_string(),
389        );
390        schema.r#type = Some("string".to_string());
391        schema.default = Some(QuillValue::from_json(json!("taro")));
392        fields.insert("ice_cream".to_string(), schema);
393
394        let json_schema = build_schema_from_fields(&fields).unwrap().as_json().clone();
395
396        // Verify that default field is present in the schema
397        assert!(json_schema["properties"]["ice_cream"]
398            .as_object()
399            .unwrap()
400            .contains_key("default"));
401
402        let default_value = &json_schema["properties"]["ice_cream"]["default"];
403        assert_eq!(default_value, &json!("taro"));
404
405        // Verify that field with default is not required
406        let required_fields = json_schema["required"].as_array().unwrap();
407        assert!(!required_fields.contains(&json!("ice_cream")));
408    }
409
410    #[test]
411    fn test_extract_defaults_from_schema() {
412        // Create a JSON schema with defaults
413        let schema = json!({
414            "$schema": "https://json-schema.org/draft/2019-09/schema",
415            "type": "object",
416            "properties": {
417                "title": {
418                    "type": "string",
419                    "description": "Document title"
420                },
421                "author": {
422                    "type": "string",
423                    "description": "Document author",
424                    "default": "Anonymous"
425                },
426                "status": {
427                    "type": "string",
428                    "description": "Document status",
429                    "default": "draft"
430                },
431                "count": {
432                    "type": "number",
433                    "default": 42
434                }
435            },
436            "required": ["title"]
437        });
438
439        let defaults = extract_defaults_from_schema(&QuillValue::from_json(schema));
440
441        // Verify that only fields with defaults are extracted
442        assert_eq!(defaults.len(), 3);
443        assert!(!defaults.contains_key("title")); // no default
444        assert!(defaults.contains_key("author"));
445        assert!(defaults.contains_key("status"));
446        assert!(defaults.contains_key("count"));
447
448        // Verify the default values
449        assert_eq!(defaults.get("author").unwrap().as_str(), Some("Anonymous"));
450        assert_eq!(defaults.get("status").unwrap().as_str(), Some("draft"));
451        assert_eq!(defaults.get("count").unwrap().as_json().as_i64(), Some(42));
452    }
453
454    #[test]
455    fn test_extract_defaults_from_schema_empty() {
456        // Schema with no defaults
457        let schema = json!({
458            "$schema": "https://json-schema.org/draft/2019-09/schema",
459            "type": "object",
460            "properties": {
461                "title": {"type": "string"},
462                "author": {"type": "string"}
463            },
464            "required": ["title"]
465        });
466
467        let defaults = extract_defaults_from_schema(&QuillValue::from_json(schema));
468        assert_eq!(defaults.len(), 0);
469    }
470
471    #[test]
472    fn test_extract_defaults_from_schema_no_properties() {
473        // Schema without properties field
474        let schema = json!({
475            "$schema": "https://json-schema.org/draft/2019-09/schema",
476            "type": "object"
477        });
478
479        let defaults = extract_defaults_from_schema(&QuillValue::from_json(schema));
480        assert_eq!(defaults.len(), 0);
481    }
482
483    #[test]
484    fn test_extract_examples_from_schema() {
485        // Create a JSON schema with examples
486        let schema = json!({
487            "$schema": "https://json-schema.org/draft/2019-09/schema",
488            "type": "object",
489            "properties": {
490                "title": {
491                    "type": "string",
492                    "description": "Document title"
493                },
494                "memo_for": {
495                    "type": "array",
496                    "description": "List of recipients",
497                    "examples": [
498                        ["ORG1/SYMBOL", "ORG2/SYMBOL"],
499                        ["DEPT/OFFICE"]
500                    ]
501                },
502                "author": {
503                    "type": "string",
504                    "description": "Document author",
505                    "examples": ["John Doe", "Jane Smith"]
506                },
507                "status": {
508                    "type": "string",
509                    "description": "Document status"
510                }
511            }
512        });
513
514        let examples = extract_examples_from_schema(&QuillValue::from_json(schema));
515
516        // Verify that only fields with examples are extracted
517        assert_eq!(examples.len(), 2);
518        assert!(!examples.contains_key("title")); // no examples
519        assert!(examples.contains_key("memo_for"));
520        assert!(examples.contains_key("author"));
521        assert!(!examples.contains_key("status")); // no examples
522
523        // Verify the example values for memo_for
524        let memo_for_examples = examples.get("memo_for").unwrap();
525        assert_eq!(memo_for_examples.len(), 2);
526        assert_eq!(
527            memo_for_examples[0].as_json(),
528            &json!(["ORG1/SYMBOL", "ORG2/SYMBOL"])
529        );
530        assert_eq!(memo_for_examples[1].as_json(), &json!(["DEPT/OFFICE"]));
531
532        // Verify the example values for author
533        let author_examples = examples.get("author").unwrap();
534        assert_eq!(author_examples.len(), 2);
535        assert_eq!(author_examples[0].as_str(), Some("John Doe"));
536        assert_eq!(author_examples[1].as_str(), Some("Jane Smith"));
537    }
538
539    #[test]
540    fn test_extract_examples_from_schema_empty() {
541        // Schema with no examples
542        let schema = json!({
543            "$schema": "https://json-schema.org/draft/2019-09/schema",
544            "type": "object",
545            "properties": {
546                "title": {"type": "string"},
547                "author": {"type": "string"}
548            }
549        });
550
551        let examples = extract_examples_from_schema(&QuillValue::from_json(schema));
552        assert_eq!(examples.len(), 0);
553    }
554
555    #[test]
556    fn test_extract_examples_from_schema_no_properties() {
557        // Schema without properties field
558        let schema = json!({
559            "$schema": "https://json-schema.org/draft/2019-09/schema",
560            "type": "object"
561        });
562
563        let examples = extract_examples_from_schema(&QuillValue::from_json(schema));
564        assert_eq!(examples.len(), 0);
565    }
566}