Skip to main content

mdmodels_core/json/
schema.rs

1/*
2 * Copyright (c) 2025 Jan Range
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24use serde::{Deserialize, Serialize};
25use serde_json::Value;
26use std::sync::atomic::{AtomicUsize, Ordering};
27use std::{
28    collections::{BTreeMap, HashMap},
29    fmt::{self, Display},
30    str::FromStr,
31};
32use variantly::Variantly;
33
34use crate::attribute;
35
36// Atomic counter to ensure thread-safe uniqueness
37static TITLE_COUNTER: AtomicUsize = AtomicUsize::new(1);
38
39fn generate_unique_title() -> String {
40    let unique_id = TITLE_COUNTER.fetch_add(1, Ordering::SeqCst); // Increment the counter atomically
41    format!("untitled_{unique_id}")
42}
43
44#[derive(Debug, Deserialize, Serialize, Clone)]
45#[serde(untagged)]
46pub enum SchemaType {
47    Object(SchemaObject),
48    Enum(EnumObject),
49}
50
51#[derive(Debug, Deserialize, Serialize, Clone)]
52pub struct SchemaObject {
53    #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
54    pub schema: Option<String>,
55    #[serde(rename = "$id", skip_serializing_if = "Option::is_none")]
56    pub id: Option<String>,
57    #[serde(
58        default = "generate_unique_title",
59        deserialize_with = "deserialize_title_with_whitespace_removal"
60    )]
61    pub title: String,
62    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
63    pub dtype: Option<DataType>,
64    #[serde(skip_serializing_if = "skip_empty_string")]
65    pub description: Option<String>,
66    pub properties: BTreeMap<String, Property>,
67    #[serde(
68        rename = "$defs",
69        skip_serializing_if = "BTreeMap::is_empty",
70        alias = "definitions"
71    )]
72    #[serde(default)]
73    pub definitions: BTreeMap<String, SchemaType>,
74    #[serde(default)]
75    pub required: Vec<String>,
76    #[serde(
77        rename = "additionalProperties",
78        default = "default_false",
79        deserialize_with = "deserialize_additional_properties"
80    )]
81    pub additional_properties: bool,
82}
83
84impl SchemaObject {
85    pub fn to_value(&self) -> Result<Value, serde_json::Error> {
86        serde_json::to_value(self)
87    }
88}
89
90#[derive(Debug, Deserialize, Serialize, Clone)]
91pub struct EnumObject {
92    #[serde(default = "generate_unique_title")]
93    pub title: String,
94    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
95    pub dtype: Option<DataType>,
96    #[serde(skip_serializing_if = "skip_empty_string")]
97    pub description: Option<String>,
98    #[serde(rename = "enum")]
99    pub enum_values: Vec<String>,
100}
101
102#[derive(Debug, Deserialize, Serialize, Clone)]
103pub struct Property {
104    #[serde(alias = "name", skip_serializing_if = "Option::is_none")]
105    pub title: Option<String>,
106    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
107    pub dtype: Option<DataType>,
108    #[serde(rename = "default", skip_serializing_if = "Option::is_none")]
109    pub default: Option<PrimitiveType>,
110    #[serde(skip_serializing_if = "skip_empty_string")]
111    pub description: Option<String>,
112    #[serde(rename = "$term", skip_serializing_if = "skip_empty_string")]
113    pub term: Option<String>,
114    #[serde(rename = "$ref", skip_serializing_if = "Option::is_none")]
115    pub reference: Option<String>,
116    #[serde(flatten)]
117    pub options: HashMap<String, PrimitiveType>,
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub items: Option<Item>,
120    #[serde(rename = "oneOf", skip_serializing_if = "skip_empty")]
121    pub one_of: Option<Vec<Item>>,
122    #[serde(rename = "anyOf", skip_serializing_if = "skip_empty")]
123    pub any_of: Option<Vec<Item>>,
124    #[serde(rename = "allOf", skip_serializing_if = "skip_empty")]
125    pub all_of: Option<Vec<Item>>,
126    #[serde(skip_serializing_if = "skip_empty", rename = "enum")]
127    pub enum_values: Option<Vec<String>>,
128    #[serde(default, skip_serializing_if = "Vec::is_empty")]
129    pub examples: Vec<Value>,
130}
131
132#[derive(Debug, Deserialize, Variantly, Clone)]
133#[serde(untagged)]
134pub enum Item {
135    ReferenceItem(ReferenceItemType),
136    OneOfItem(OneOfItemType),
137    AnyOfItem(AnyOfItemType),
138    DataTypeItem(DataTypeItemType),
139    // TODO: Add PropertyItem?
140}
141
142impl Item {
143    /// Returns a vector of all the types that can be found in the item.
144    /// This is useful for getting all the types that can be found in a property.
145    pub(crate) fn get_types(&self) -> Vec<String> {
146        match self {
147            Item::ReferenceItem(ref_item) => vec![ref_item.reference.clone()],
148            Item::OneOfItem(one_of_item) => one_of_item
149                .one_of
150                .iter()
151                .flat_map(|item| item.get_types())
152                .collect(),
153            Item::AnyOfItem(any_of_item) => any_of_item
154                .any_of
155                .iter()
156                .flat_map(|item| item.get_types())
157                .collect(),
158            Item::DataTypeItem(data_type_item) => vec![data_type_item.dtype.to_string()],
159        }
160    }
161}
162
163impl Serialize for Item {
164    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
165    where
166        S: serde::Serializer,
167    {
168        match self {
169            Item::ReferenceItem(ref_item) => ref_item.serialize(serializer),
170            Item::OneOfItem(one_of_item) => one_of_item.serialize(serializer),
171            Item::AnyOfItem(any_of_item) => any_of_item.serialize(serializer),
172            Item::DataTypeItem(data_type_item) => data_type_item.serialize(serializer),
173        }
174    }
175}
176
177#[derive(Debug, Deserialize, Serialize, Clone)]
178pub struct ReferenceItemType {
179    #[serde(rename = "$ref")]
180    pub reference: String,
181}
182
183#[derive(Debug, Deserialize, Serialize, Clone)]
184pub struct OneOfItemType {
185    #[serde(rename = "oneOf")]
186    pub one_of: Vec<Item>,
187}
188
189#[derive(Debug, Deserialize, Serialize, Clone)]
190pub struct AnyOfItemType {
191    #[serde(rename = "anyOf")]
192    pub any_of: Vec<Item>,
193}
194
195#[derive(Debug, Deserialize, Serialize, Clone)]
196pub struct DataTypeItemType {
197    #[serde(rename = "type")]
198    pub dtype: DataType,
199}
200
201/// Represents various data types that can be used in a JSON schema.
202#[derive(Debug, Deserialize, Serialize, PartialEq, Variantly, Clone, Hash, Eq)]
203pub enum DataType {
204    #[serde(rename = "string")]
205    String,
206    #[serde(rename = "integer")]
207    Integer,
208    #[serde(rename = "number")]
209    Number,
210    #[serde(rename = "boolean")]
211    Boolean,
212    #[serde(rename = "object")]
213    Object,
214    #[serde(rename = "array")]
215    Array,
216    #[serde(rename = "null")]
217    Null,
218    #[serde(untagged)]
219    Multiple(Box<Vec<DataType>>),
220}
221
222impl Default for DataType {
223    /// Provides a default value for the DataType, which is `String`.
224    fn default() -> Self {
225        DataType::String
226    }
227}
228
229impl FromStr for DataType {
230    type Err = String;
231
232    /// Converts a string representation of a data type into a `DataType` enum.
233    ///
234    /// # Errors
235    ///
236    /// Returns an error if the string is empty or does not match any known data type.
237    fn from_str(s: &str) -> Result<Self, Self::Err> {
238        match s {
239            "string" => Ok(DataType::String),
240            "number" => Ok(DataType::Number),
241            "float" => Ok(DataType::Number),
242            "integer" => Ok(DataType::Integer),
243            "boolean" => Ok(DataType::Boolean),
244            "object" => Ok(DataType::Object),
245            "array" => Ok(DataType::Array),
246            _ => Err(format!("Invalid data type: {s}")),
247        }
248    }
249}
250
251impl Display for DataType {
252    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253        match self {
254            DataType::String => write!(f, "string"),
255            DataType::Number => write!(f, "number"),
256            DataType::Integer => write!(f, "integer"),
257            DataType::Boolean => write!(f, "boolean"),
258            DataType::Object => write!(f, "object"),
259            DataType::Array => write!(f, "array"),
260            DataType::Null => write!(f, "null"),
261            DataType::Multiple(types) => write!(
262                f,
263                "multiple({})",
264                types
265                    .iter()
266                    .map(|t| t.to_string())
267                    .collect::<Vec<String>>()
268                    .join(", ")
269            ),
270        }
271    }
272}
273
274impl TryFrom<&String> for DataType {
275    type Error = String;
276
277    fn try_from(s: &String) -> Result<Self, Self::Error> {
278        match s.as_str() {
279            "string" => Ok(DataType::String),
280            "number" => Ok(DataType::Number),
281            "integer" => Ok(DataType::Integer),
282            "boolean" => Ok(DataType::Boolean),
283            "array" => Ok(DataType::Array),
284            "float" => Ok(DataType::Number),
285            _ => Ok(DataType::Object),
286        }
287    }
288}
289
290#[derive(Debug, Deserialize, Serialize, Clone)]
291#[serde(untagged)]
292pub enum PrimitiveType {
293    String(String),
294    Number(f64),
295    Integer(i64),
296    Boolean(bool),
297}
298
299impl From<&String> for PrimitiveType {
300    /// Converts a string reference into a `PrimitiveType` enum.
301    ///
302    /// # Arguments
303    ///
304    /// * `s` - A reference to the string to be converted.
305    ///
306    /// # Returns
307    ///
308    /// A `PrimitiveType` enum variant corresponding to the parsed value.
309    fn from(s: &String) -> Self {
310        if let Ok(number) = s.parse::<f64>() {
311            return PrimitiveType::Number(number);
312        }
313
314        if let Ok(boolean) = s.to_lowercase().parse::<bool>() {
315            return PrimitiveType::Boolean(boolean);
316        }
317
318        if let Ok(integer) = s.parse::<i64>() {
319            return PrimitiveType::Integer(integer);
320        }
321
322        PrimitiveType::String(s.clone())
323    }
324}
325
326impl Display for PrimitiveType {
327    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
328        match self {
329            PrimitiveType::String(s) => write!(f, "{s}"),
330            PrimitiveType::Number(n) => write!(f, "{n}"),
331            PrimitiveType::Integer(i) => write!(f, "{i}"),
332            PrimitiveType::Boolean(b) => write!(f, "{b}"),
333        }
334    }
335}
336
337impl From<attribute::DataType> for PrimitiveType {
338    fn from(dtype: attribute::DataType) -> Self {
339        match dtype {
340            attribute::DataType::String(s) => {
341                PrimitiveType::String(s.trim_matches('"').to_string())
342            }
343            attribute::DataType::Integer(i) => PrimitiveType::Integer(i),
344            attribute::DataType::Float(f) => PrimitiveType::Number(f),
345            attribute::DataType::Boolean(b) => PrimitiveType::Boolean(b),
346        }
347    }
348}
349
350fn skip_empty<T>(option: &Option<Vec<T>>) -> bool {
351    match option {
352        Some(vec) => vec.is_empty(),
353        None => true,
354    }
355}
356
357fn skip_empty_string(option: &Option<String>) -> bool {
358    match option {
359        Some(string) => string.is_empty(),
360        None => true,
361    }
362}
363
364fn default_false() -> bool {
365    false
366}
367
368fn deserialize_title_with_whitespace_removal<'de, D>(deserializer: D) -> Result<String, D::Error>
369where
370    D: serde::Deserializer<'de>,
371{
372    use serde::de::{self, Visitor};
373    use std::fmt;
374
375    struct TitleVisitor;
376
377    impl<'de> Visitor<'de> for TitleVisitor {
378        type Value = String;
379
380        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
381            formatter.write_str("a string")
382        }
383
384        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
385        where
386            E: de::Error,
387        {
388            Ok(value.chars().filter(|c| !c.is_whitespace()).collect())
389        }
390
391        fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
392        where
393            E: de::Error,
394        {
395            Ok(value.chars().filter(|c| !c.is_whitespace()).collect())
396        }
397    }
398
399    deserializer.deserialize_str(TitleVisitor)
400}
401
402fn deserialize_additional_properties<'de, D>(deserializer: D) -> Result<bool, D::Error>
403where
404    D: serde::Deserializer<'de>,
405{
406    use serde::de::{self, Visitor};
407    use serde_json::Value;
408    use std::fmt;
409
410    struct AdditionalPropertiesVisitor;
411
412    impl<'de> Visitor<'de> for AdditionalPropertiesVisitor {
413        type Value = bool;
414
415        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
416            formatter.write_str("a boolean or an object")
417        }
418
419        fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E>
420        where
421            E: de::Error,
422        {
423            Ok(value)
424        }
425
426        fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
427        where
428            M: de::MapAccess<'de>,
429        {
430            // Consume all entries in the map to avoid "trailing characters" error
431            while map.next_entry::<String, Value>()?.is_some() {
432                // Just consume and discard
433            }
434            // Any object/map means additionalProperties = true
435            Ok(true)
436        }
437    }
438
439    deserializer.deserialize_any(AdditionalPropertiesVisitor)
440}
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445
446    #[test]
447    /// Tests the conversion from string to DataType enum variants.
448    /// It checks for correct parsing of basic types and custom references.
449    fn test_from_str() {
450        assert_eq!(DataType::from_str("string").unwrap(), DataType::String);
451        assert_eq!(DataType::from_str("number").unwrap(), DataType::Number);
452        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Integer);
453        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
454        assert_eq!(DataType::from_str("object").unwrap(), DataType::Object);
455        assert_eq!(DataType::from_str("array").unwrap(), DataType::Array);
456    }
457
458    #[test]
459    /// Tests that title deserialization removes all whitespaces from the title string.
460    fn test_title_whitespace_removal() {
461        use serde_json;
462
463        // Test with spaces, tabs, and newlines
464        let json_with_whitespace = r#"
465        {
466            "title": "My Test Title",
467            "type": "object",
468            "properties": {}
469        }
470        "#;
471
472        let schema: SchemaObject = serde_json::from_str(json_with_whitespace).unwrap();
473        assert_eq!(schema.title, "MyTestTitle");
474
475        // Test with various whitespace characters
476        let json_with_various_whitespace = r#"
477        {
478            "title": "  My\t\nTest\r Title  ",
479            "type": "object",
480            "properties": {}
481        }
482        "#;
483
484        let schema2: SchemaObject = serde_json::from_str(json_with_various_whitespace).unwrap();
485        assert_eq!(schema2.title, "MyTestTitle");
486
487        // Test with no whitespace (should remain unchanged)
488        let json_no_whitespace = r#"
489        {
490            "title": "MyTitle",
491            "type": "object",
492            "properties": {}
493        }
494        "#;
495
496        let schema3: SchemaObject = serde_json::from_str(json_no_whitespace).unwrap();
497        assert_eq!(schema3.title, "MyTitle");
498    }
499}