omnivore_core/parser/
schema.rs1use crate::{Error, Result};
2use serde::{Deserialize, Serialize};
3use serde_json::Value;
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct Schema {
8 pub name: String,
9 pub version: String,
10 pub fields: Vec<Field>,
11 pub required: Vec<String>,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct Field {
16 pub name: String,
17 pub field_type: FieldType,
18 pub description: Option<String>,
19 pub default: Option<Value>,
20 pub validators: Vec<Validator>,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum FieldType {
26 String,
27 Number,
28 Boolean,
29 Date,
30 Url,
31 Email,
32 Array(Box<FieldType>),
33 Object(HashMap<String, FieldType>),
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37#[serde(tag = "type")]
38pub enum Validator {
39 MinLength { value: usize },
40 MaxLength { value: usize },
41 Pattern { regex: String },
42 Min { value: f64 },
43 Max { value: f64 },
44 Enum { values: Vec<Value> },
45}
46
47impl Schema {
48 pub fn validate(&self, data: &Value) -> Result<()> {
49 let obj = data
50 .as_object()
51 .ok_or_else(|| Error::Parse("Data must be an object".to_string()))?;
52
53 for required_field in &self.required {
54 if !obj.contains_key(required_field) {
55 return Err(Error::Parse(format!(
56 "Required field '{required_field}' is missing"
57 )));
58 }
59 }
60
61 for field in &self.fields {
62 if let Some(value) = obj.get(&field.name) {
63 self.validate_field(field, value)?;
64 }
65 }
66
67 Ok(())
68 }
69
70 fn validate_field(&self, field: &Field, value: &Value) -> Result<()> {
71 self.validate_type(&field.field_type, value)?;
72
73 for validator in &field.validators {
74 self.apply_validator(validator, value)?;
75 }
76
77 Ok(())
78 }
79
80 #[allow(clippy::only_used_in_recursion)]
81 fn validate_type(&self, field_type: &FieldType, value: &Value) -> Result<()> {
82 match (field_type, value) {
83 (FieldType::String, Value::String(_)) => Ok(()),
84 (FieldType::Number, Value::Number(_)) => Ok(()),
85 (FieldType::Boolean, Value::Bool(_)) => Ok(()),
86 (FieldType::Array(inner_type), Value::Array(arr)) => {
87 for item in arr {
88 self.validate_type(inner_type, item)?;
89 }
90 Ok(())
91 }
92 (FieldType::Object(schema), Value::Object(obj)) => {
93 for (key, expected_type) in schema {
94 if let Some(val) = obj.get(key) {
95 self.validate_type(expected_type, val)?;
96 }
97 }
98 Ok(())
99 }
100 _ => Err(Error::Parse(format!(
101 "Type mismatch: expected {field_type:?}, got {value:?}"
102 ))),
103 }
104 }
105
106 fn apply_validator(&self, validator: &Validator, value: &Value) -> Result<()> {
107 match validator {
108 Validator::MinLength { value: min } => {
109 if let Value::String(s) = value {
110 if s.len() < *min {
111 return Err(Error::Parse(format!(
112 "String length {} is less than minimum {}",
113 s.len(),
114 min
115 )));
116 }
117 }
118 }
119 Validator::MaxLength { value: max } => {
120 if let Value::String(s) = value {
121 if s.len() > *max {
122 return Err(Error::Parse(format!(
123 "String length {} exceeds maximum {}",
124 s.len(),
125 max
126 )));
127 }
128 }
129 }
130 Validator::Pattern { regex } => {
131 if let Value::String(s) = value {
132 let re = regex::Regex::new(regex)
133 .map_err(|e| Error::Parse(format!("Invalid regex: {e}")))?;
134 if !re.is_match(s) {
135 return Err(Error::Parse(format!(
136 "String '{s}' does not match pattern '{regex}'"
137 )));
138 }
139 }
140 }
141 Validator::Min { value: min } => {
142 if let Value::Number(n) = value {
143 if let Some(num) = n.as_f64() {
144 if num < *min {
145 return Err(Error::Parse(format!(
146 "Number {num} is less than minimum {min}"
147 )));
148 }
149 }
150 }
151 }
152 Validator::Max { value: max } => {
153 if let Value::Number(n) = value {
154 if let Some(num) = n.as_f64() {
155 if num > *max {
156 return Err(Error::Parse(format!(
157 "Number {num} exceeds maximum {max}"
158 )));
159 }
160 }
161 }
162 }
163 Validator::Enum { values } => {
164 if !values.contains(value) {
165 return Err(Error::Parse(format!(
166 "Value {value:?} is not in allowed values {values:?}"
167 )));
168 }
169 }
170 }
171
172 Ok(())
173 }
174
175 pub fn normalize(&self, data: &mut Value) -> Result<()> {
176 if let Value::Object(obj) = data {
177 for field in &self.fields {
178 if !obj.contains_key(&field.name) {
179 if let Some(default) = &field.default {
180 obj.insert(field.name.clone(), default.clone());
181 }
182 }
183 }
184 }
185
186 Ok(())
187 }
188}