omnivore_core/parser/
schema.rs

1use crate::{Error, Result};
2use serde::{Deserialize, Serialize};
3use serde_json::Value;
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct Schema {
8    pub name: String,
9    pub version: String,
10    pub fields: Vec<Field>,
11    pub required: Vec<String>,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct Field {
16    pub name: String,
17    pub field_type: FieldType,
18    pub description: Option<String>,
19    pub default: Option<Value>,
20    pub validators: Vec<Validator>,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum FieldType {
26    String,
27    Number,
28    Boolean,
29    Date,
30    Url,
31    Email,
32    Array(Box<FieldType>),
33    Object(HashMap<String, FieldType>),
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37#[serde(tag = "type")]
38pub enum Validator {
39    MinLength { value: usize },
40    MaxLength { value: usize },
41    Pattern { regex: String },
42    Min { value: f64 },
43    Max { value: f64 },
44    Enum { values: Vec<Value> },
45}
46
47impl Schema {
48    pub fn validate(&self, data: &Value) -> Result<()> {
49        let obj = data
50            .as_object()
51            .ok_or_else(|| Error::Parse("Data must be an object".to_string()))?;
52
53        for required_field in &self.required {
54            if !obj.contains_key(required_field) {
55                return Err(Error::Parse(format!(
56                    "Required field '{required_field}' is missing"
57                )));
58            }
59        }
60
61        for field in &self.fields {
62            if let Some(value) = obj.get(&field.name) {
63                self.validate_field(field, value)?;
64            }
65        }
66
67        Ok(())
68    }
69
70    fn validate_field(&self, field: &Field, value: &Value) -> Result<()> {
71        self.validate_type(&field.field_type, value)?;
72
73        for validator in &field.validators {
74            self.apply_validator(validator, value)?;
75        }
76
77        Ok(())
78    }
79
80    #[allow(clippy::only_used_in_recursion)]
81    fn validate_type(&self, field_type: &FieldType, value: &Value) -> Result<()> {
82        match (field_type, value) {
83            (FieldType::String, Value::String(_)) => Ok(()),
84            (FieldType::Number, Value::Number(_)) => Ok(()),
85            (FieldType::Boolean, Value::Bool(_)) => Ok(()),
86            (FieldType::Array(inner_type), Value::Array(arr)) => {
87                for item in arr {
88                    self.validate_type(inner_type, item)?;
89                }
90                Ok(())
91            }
92            (FieldType::Object(schema), Value::Object(obj)) => {
93                for (key, expected_type) in schema {
94                    if let Some(val) = obj.get(key) {
95                        self.validate_type(expected_type, val)?;
96                    }
97                }
98                Ok(())
99            }
100            _ => Err(Error::Parse(format!(
101                "Type mismatch: expected {field_type:?}, got {value:?}"
102            ))),
103        }
104    }
105
106    fn apply_validator(&self, validator: &Validator, value: &Value) -> Result<()> {
107        match validator {
108            Validator::MinLength { value: min } => {
109                if let Value::String(s) = value {
110                    if s.len() < *min {
111                        return Err(Error::Parse(format!(
112                            "String length {} is less than minimum {}",
113                            s.len(),
114                            min
115                        )));
116                    }
117                }
118            }
119            Validator::MaxLength { value: max } => {
120                if let Value::String(s) = value {
121                    if s.len() > *max {
122                        return Err(Error::Parse(format!(
123                            "String length {} exceeds maximum {}",
124                            s.len(),
125                            max
126                        )));
127                    }
128                }
129            }
130            Validator::Pattern { regex } => {
131                if let Value::String(s) = value {
132                    let re = regex::Regex::new(regex)
133                        .map_err(|e| Error::Parse(format!("Invalid regex: {e}")))?;
134                    if !re.is_match(s) {
135                        return Err(Error::Parse(format!(
136                            "String '{s}' does not match pattern '{regex}'"
137                        )));
138                    }
139                }
140            }
141            Validator::Min { value: min } => {
142                if let Value::Number(n) = value {
143                    if let Some(num) = n.as_f64() {
144                        if num < *min {
145                            return Err(Error::Parse(format!(
146                                "Number {num} is less than minimum {min}"
147                            )));
148                        }
149                    }
150                }
151            }
152            Validator::Max { value: max } => {
153                if let Value::Number(n) = value {
154                    if let Some(num) = n.as_f64() {
155                        if num > *max {
156                            return Err(Error::Parse(format!(
157                                "Number {num} exceeds maximum {max}"
158                            )));
159                        }
160                    }
161                }
162            }
163            Validator::Enum { values } => {
164                if !values.contains(value) {
165                    return Err(Error::Parse(format!(
166                        "Value {value:?} is not in allowed values {values:?}"
167                    )));
168                }
169            }
170        }
171
172        Ok(())
173    }
174
175    pub fn normalize(&self, data: &mut Value) -> Result<()> {
176        if let Value::Object(obj) = data {
177            for field in &self.fields {
178                if !obj.contains_key(&field.name) {
179                    if let Some(default) = &field.default {
180                        obj.insert(field.name.clone(), default.clone());
181                    }
182                }
183            }
184        }
185
186        Ok(())
187    }
188}