Skip to main content

mdmodels_core/json/
schema.rs

1/*
2 * Copyright (c) 2025 Jan Range
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 */
23
24use serde::{Deserialize, Serialize};
25use serde_json::Value;
26use std::sync::atomic::{AtomicUsize, Ordering};
27use std::{
28    collections::{BTreeMap, HashMap},
29    fmt::{self, Display},
30    str::FromStr,
31};
32use variantly::Variantly;
33
34use crate::attribute;
35
36// Atomic counter to ensure thread-safe uniqueness
37static TITLE_COUNTER: AtomicUsize = AtomicUsize::new(1);
38
39fn generate_unique_title() -> String {
40    let unique_id = TITLE_COUNTER.fetch_add(1, Ordering::SeqCst); // Increment the counter atomically
41    format!("untitled_{unique_id}")
42}
43
44#[derive(Debug, Deserialize, Serialize, Clone)]
45#[serde(untagged)]
46pub enum SchemaType {
47    Object(SchemaObject),
48    Enum(EnumObject),
49}
50
51#[derive(Debug, Deserialize, Serialize, Clone)]
52pub struct SchemaObject {
53    #[serde(rename = "$schema", skip_serializing_if = "Option::is_none")]
54    pub schema: Option<String>,
55    #[serde(rename = "$id", skip_serializing_if = "Option::is_none")]
56    pub id: Option<String>,
57    #[serde(
58        default = "generate_unique_title",
59        deserialize_with = "deserialize_title_with_whitespace_removal"
60    )]
61    pub title: String,
62    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
63    pub dtype: Option<DataType>,
64    #[serde(skip_serializing_if = "skip_empty_string")]
65    pub description: Option<String>,
66    pub properties: BTreeMap<String, Property>,
67    #[serde(
68        rename = "$defs",
69        skip_serializing_if = "BTreeMap::is_empty",
70        alias = "definitions"
71    )]
72    #[serde(default)]
73    pub definitions: BTreeMap<String, SchemaType>,
74    #[serde(default)]
75    pub required: Vec<String>,
76    #[serde(default, skip_serializing_if = "Vec::is_empty")]
77    pub optional: Vec<String>,
78    #[serde(
79        rename = "additionalProperties",
80        default = "default_false",
81        deserialize_with = "deserialize_additional_properties"
82    )]
83    pub additional_properties: bool,
84}
85
86impl SchemaObject {
87    pub fn to_value(&self) -> Result<Value, serde_json::Error> {
88        serde_json::to_value(self)
89    }
90}
91
92#[derive(Debug, Deserialize, Serialize, Clone)]
93pub struct EnumObject {
94    #[serde(default = "generate_unique_title")]
95    pub title: String,
96    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
97    pub dtype: Option<DataType>,
98    #[serde(skip_serializing_if = "skip_empty_string")]
99    pub description: Option<String>,
100    #[serde(rename = "enum")]
101    pub enum_values: Vec<String>,
102}
103
104#[derive(Debug, Deserialize, Serialize, Clone, Default)]
105pub struct Property {
106    #[serde(alias = "name", skip_serializing_if = "Option::is_none")]
107    pub title: Option<String>,
108    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
109    pub dtype: Option<DataType>,
110    #[serde(rename = "default", skip_serializing_if = "Option::is_none")]
111    pub default: Option<PrimitiveType>,
112    #[serde(skip_serializing_if = "skip_empty_string")]
113    pub description: Option<String>,
114    #[serde(rename = "$term", skip_serializing_if = "skip_empty_string")]
115    pub term: Option<String>,
116    #[serde(rename = "$ref", skip_serializing_if = "Option::is_none")]
117    pub reference: Option<String>,
118    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
119    pub properties: BTreeMap<String, Property>,
120    #[serde(default, skip_serializing_if = "Vec::is_empty")]
121    pub required: Vec<String>,
122    #[serde(default, skip_serializing_if = "Vec::is_empty")]
123    pub optional: Vec<String>,
124    #[serde(
125        rename = "additionalProperties",
126        default,
127        skip_serializing_if = "Option::is_none",
128        deserialize_with = "deserialize_optional_additional_properties"
129    )]
130    pub additional_properties: Option<bool>,
131    #[serde(flatten)]
132    pub options: HashMap<String, PrimitiveType>,
133    #[serde(skip_serializing_if = "Option::is_none")]
134    pub items: Option<Item>,
135    #[serde(rename = "oneOf", skip_serializing_if = "skip_empty")]
136    pub one_of: Option<Vec<Item>>,
137    #[serde(rename = "anyOf", skip_serializing_if = "skip_empty")]
138    pub any_of: Option<Vec<Item>>,
139    #[serde(rename = "allOf", skip_serializing_if = "skip_empty")]
140    pub all_of: Option<Vec<Item>>,
141    #[serde(skip_serializing_if = "skip_empty", rename = "enum")]
142    pub enum_values: Option<Vec<String>>,
143    #[serde(default, skip_serializing_if = "Vec::is_empty")]
144    pub examples: Vec<Value>,
145}
146
147#[derive(Debug, Deserialize, Variantly, Clone)]
148#[serde(untagged)]
149pub enum Item {
150    ReferenceItem(ReferenceItemType),
151    OneOfItem(OneOfItemType),
152    AnyOfItem(AnyOfItemType),
153    PropertyItem(Box<Property>),
154}
155
156impl Item {
157    /// Returns a vector of all the types that can be found in the item.
158    /// This is useful for getting all the types that can be found in a property.
159    pub(crate) fn get_types(&self) -> Vec<String> {
160        match self {
161            Item::ReferenceItem(ref_item) => vec![ref_item.reference.clone()],
162            Item::OneOfItem(one_of_item) => one_of_item
163                .one_of
164                .iter()
165                .flat_map(|item| item.get_types())
166                .collect(),
167            Item::AnyOfItem(any_of_item) => any_of_item
168                .any_of
169                .iter()
170                .flat_map(|item| item.get_types())
171                .collect(),
172            Item::PropertyItem(property) => property.get_types(),
173        }
174    }
175
176    pub(crate) fn as_property(&self) -> Option<&Property> {
177        match self {
178            Item::PropertyItem(property) => Some(property),
179            _ => None,
180        }
181    }
182}
183
184impl Property {
185    pub(crate) fn get_types(&self) -> Vec<String> {
186        if let Some(reference) = &self.reference {
187            return vec![reference.clone()];
188        }
189
190        if let Some(dtype) = &self.dtype {
191            return vec![dtype.to_string()];
192        }
193
194        Vec::new()
195    }
196
197    pub(crate) fn has_inline_object(&self) -> bool {
198        !self.properties.is_empty()
199    }
200}
201
202impl Serialize for Item {
203    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
204    where
205        S: serde::Serializer,
206    {
207        match self {
208            Item::ReferenceItem(ref_item) => ref_item.serialize(serializer),
209            Item::OneOfItem(one_of_item) => one_of_item.serialize(serializer),
210            Item::AnyOfItem(any_of_item) => any_of_item.serialize(serializer),
211            Item::PropertyItem(property) => property.serialize(serializer),
212        }
213    }
214}
215
216#[derive(Debug, Deserialize, Serialize, Clone)]
217pub struct ReferenceItemType {
218    #[serde(rename = "$ref")]
219    pub reference: String,
220}
221
222#[derive(Debug, Deserialize, Serialize, Clone)]
223pub struct OneOfItemType {
224    #[serde(rename = "oneOf")]
225    pub one_of: Vec<Item>,
226}
227
228#[derive(Debug, Deserialize, Serialize, Clone)]
229pub struct AnyOfItemType {
230    #[serde(rename = "anyOf")]
231    pub any_of: Vec<Item>,
232}
233
234/// Represents various data types that can be used in a JSON schema.
235#[derive(Debug, Deserialize, Serialize, PartialEq, Variantly, Clone, Hash, Eq)]
236pub enum DataType {
237    #[serde(rename = "string")]
238    String,
239    #[serde(rename = "integer")]
240    Integer,
241    #[serde(rename = "number")]
242    Number,
243    #[serde(rename = "boolean")]
244    Boolean,
245    #[serde(rename = "object")]
246    Object,
247    #[serde(rename = "array")]
248    Array,
249    #[serde(rename = "null")]
250    Null,
251    #[serde(untagged)]
252    Multiple(Box<Vec<DataType>>),
253}
254
255impl Default for DataType {
256    /// Provides a default value for the DataType, which is `String`.
257    fn default() -> Self {
258        DataType::String
259    }
260}
261
262impl FromStr for DataType {
263    type Err = String;
264
265    /// Converts a string representation of a data type into a `DataType` enum.
266    ///
267    /// # Errors
268    ///
269    /// Returns an error if the string is empty or does not match any known data type.
270    fn from_str(s: &str) -> Result<Self, Self::Err> {
271        match s {
272            "string" => Ok(DataType::String),
273            "number" => Ok(DataType::Number),
274            "float" => Ok(DataType::Number),
275            "integer" => Ok(DataType::Integer),
276            "boolean" => Ok(DataType::Boolean),
277            "object" => Ok(DataType::Object),
278            "array" => Ok(DataType::Array),
279            _ => Err(format!("Invalid data type: {s}")),
280        }
281    }
282}
283
284impl Display for DataType {
285    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
286        match self {
287            DataType::String => write!(f, "string"),
288            DataType::Number => write!(f, "number"),
289            DataType::Integer => write!(f, "integer"),
290            DataType::Boolean => write!(f, "boolean"),
291            DataType::Object => write!(f, "object"),
292            DataType::Array => write!(f, "array"),
293            DataType::Null => write!(f, "null"),
294            DataType::Multiple(types) => write!(
295                f,
296                "multiple({})",
297                types
298                    .iter()
299                    .map(|t| t.to_string())
300                    .collect::<Vec<String>>()
301                    .join(", ")
302            ),
303        }
304    }
305}
306
307impl TryFrom<&String> for DataType {
308    type Error = String;
309
310    fn try_from(s: &String) -> Result<Self, Self::Error> {
311        match s.as_str() {
312            "string" => Ok(DataType::String),
313            "number" => Ok(DataType::Number),
314            "integer" => Ok(DataType::Integer),
315            "boolean" => Ok(DataType::Boolean),
316            "array" => Ok(DataType::Array),
317            "float" => Ok(DataType::Number),
318            _ => Ok(DataType::Object),
319        }
320    }
321}
322
323#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
324#[serde(untagged)]
325pub enum PrimitiveType {
326    String(String),
327    Number(f64),
328    Integer(i64),
329    Boolean(bool),
330}
331
332impl From<&String> for PrimitiveType {
333    /// Converts a string reference into a `PrimitiveType` enum.
334    ///
335    /// # Arguments
336    ///
337    /// * `s` - A reference to the string to be converted.
338    ///
339    /// # Returns
340    ///
341    /// A `PrimitiveType` enum variant corresponding to the parsed value.
342    fn from(s: &String) -> Self {
343        if let Ok(number) = s.parse::<f64>() {
344            return PrimitiveType::Number(number);
345        }
346
347        if let Ok(boolean) = s.to_lowercase().parse::<bool>() {
348            return PrimitiveType::Boolean(boolean);
349        }
350
351        if let Ok(integer) = s.parse::<i64>() {
352            return PrimitiveType::Integer(integer);
353        }
354
355        PrimitiveType::String(s.clone())
356    }
357}
358
359impl Display for PrimitiveType {
360    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361        match self {
362            PrimitiveType::String(s) => write!(f, "{s}"),
363            PrimitiveType::Number(n) => write!(f, "{n}"),
364            PrimitiveType::Integer(i) => write!(f, "{i}"),
365            PrimitiveType::Boolean(b) => write!(f, "{b}"),
366        }
367    }
368}
369
370impl From<attribute::DataType> for PrimitiveType {
371    fn from(dtype: attribute::DataType) -> Self {
372        match dtype {
373            attribute::DataType::String(s) => {
374                PrimitiveType::String(s.trim_matches('"').to_string())
375            }
376            attribute::DataType::Integer(i) => PrimitiveType::Integer(i),
377            attribute::DataType::Float(f) => PrimitiveType::Number(f),
378            attribute::DataType::Boolean(b) => PrimitiveType::Boolean(b),
379        }
380    }
381}
382
383fn skip_empty<T>(option: &Option<Vec<T>>) -> bool {
384    match option {
385        Some(vec) => vec.is_empty(),
386        None => true,
387    }
388}
389
390fn skip_empty_string(option: &Option<String>) -> bool {
391    match option {
392        Some(string) => string.is_empty(),
393        None => true,
394    }
395}
396
397fn default_false() -> bool {
398    false
399}
400
401fn deserialize_title_with_whitespace_removal<'de, D>(deserializer: D) -> Result<String, D::Error>
402where
403    D: serde::Deserializer<'de>,
404{
405    use serde::de::{self, Visitor};
406    use std::fmt;
407
408    struct TitleVisitor;
409
410    impl<'de> Visitor<'de> for TitleVisitor {
411        type Value = String;
412
413        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
414            formatter.write_str("a string")
415        }
416
417        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
418        where
419            E: de::Error,
420        {
421            Ok(value.chars().filter(|c| !c.is_whitespace()).collect())
422        }
423
424        fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
425        where
426            E: de::Error,
427        {
428            Ok(value.chars().filter(|c| !c.is_whitespace()).collect())
429        }
430    }
431
432    deserializer.deserialize_str(TitleVisitor)
433}
434
435fn deserialize_optional_additional_properties<'de, D>(
436    deserializer: D,
437) -> Result<Option<bool>, D::Error>
438where
439    D: serde::Deserializer<'de>,
440{
441    use serde::de::{self, Visitor};
442    use serde_json::Value;
443    use std::fmt;
444
445    struct OptionalAdditionalPropertiesVisitor;
446
447    impl<'de> Visitor<'de> for OptionalAdditionalPropertiesVisitor {
448        type Value = Option<bool>;
449
450        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
451            formatter.write_str("a boolean or an object")
452        }
453
454        fn visit_none<E>(self) -> Result<Self::Value, E>
455        where
456            E: de::Error,
457        {
458            Ok(None)
459        }
460
461        fn visit_unit<E>(self) -> Result<Self::Value, E>
462        where
463            E: de::Error,
464        {
465            Ok(None)
466        }
467
468        fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E>
469        where
470            E: de::Error,
471        {
472            Ok(Some(value))
473        }
474
475        fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
476        where
477            M: de::MapAccess<'de>,
478        {
479            while map.next_entry::<String, Value>()?.is_some() {}
480            Ok(Some(true))
481        }
482    }
483
484    deserializer.deserialize_any(OptionalAdditionalPropertiesVisitor)
485}
486
487fn deserialize_additional_properties<'de, D>(deserializer: D) -> Result<bool, D::Error>
488where
489    D: serde::Deserializer<'de>,
490{
491    use serde::de::{self, Visitor};
492    use serde_json::Value;
493    use std::fmt;
494
495    struct AdditionalPropertiesVisitor;
496
497    impl<'de> Visitor<'de> for AdditionalPropertiesVisitor {
498        type Value = bool;
499
500        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
501            formatter.write_str("a boolean or an object")
502        }
503
504        fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E>
505        where
506            E: de::Error,
507        {
508            Ok(value)
509        }
510
511        fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
512        where
513            M: de::MapAccess<'de>,
514        {
515            // Consume all entries in the map to avoid "trailing characters" error
516            while map.next_entry::<String, Value>()?.is_some() {
517                // Just consume and discard
518            }
519            // Any object/map means additionalProperties = true
520            Ok(true)
521        }
522    }
523
524    deserializer.deserialize_any(AdditionalPropertiesVisitor)
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530    use serde_json::json;
531
532    #[test]
533    /// Tests the conversion from string to DataType enum variants.
534    /// It checks for correct parsing of basic types and custom references.
535    fn test_from_str() {
536        assert_eq!(DataType::from_str("string").unwrap(), DataType::String);
537        assert_eq!(DataType::from_str("number").unwrap(), DataType::Number);
538        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Integer);
539        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
540        assert_eq!(DataType::from_str("object").unwrap(), DataType::Object);
541        assert_eq!(DataType::from_str("array").unwrap(), DataType::Array);
542    }
543
544    #[test]
545    /// Tests that title deserialization removes all whitespaces from the title string.
546    fn test_title_whitespace_removal() {
547        use serde_json;
548
549        // Test with spaces, tabs, and newlines
550        let json_with_whitespace = r#"
551        {
552            "title": "My Test Title",
553            "type": "object",
554            "properties": {}
555        }
556        "#;
557
558        let schema: SchemaObject = serde_json::from_str(json_with_whitespace).unwrap();
559        assert_eq!(schema.title, "MyTestTitle");
560
561        // Test with various whitespace characters
562        let json_with_various_whitespace = r#"
563        {
564            "title": "  My\t\nTest\r Title  ",
565            "type": "object",
566            "properties": {}
567        }
568        "#;
569
570        let schema2: SchemaObject = serde_json::from_str(json_with_various_whitespace).unwrap();
571        assert_eq!(schema2.title, "MyTestTitle");
572
573        // Test with no whitespace (should remain unchanged)
574        let json_no_whitespace = r#"
575        {
576            "title": "MyTitle",
577            "type": "object",
578            "properties": {}
579        }
580        "#;
581
582        let schema3: SchemaObject = serde_json::from_str(json_no_whitespace).unwrap();
583        assert_eq!(schema3.title, "MyTitle");
584    }
585
586    #[test]
587    fn test_nested_property_deserialization() {
588        let property_json = json!({
589            "optional": ["name"],
590            "properties": {
591                "display_standard_values": {
592                    "default": false,
593                    "type": "boolean"
594                },
595                "filter": {
596                    "optional": ["type"],
597                    "properties": {
598                        "limit": {
599                            "default": 1e-16,
600                            "exclusiveMinimum": 0,
601                            "type": "number"
602                        },
603                        "type": {
604                            "enum": ["QR1", "QR2"],
605                            "type": "string"
606                        }
607                    },
608                    "required": ["limit"],
609                    "type": "object"
610                }
611            },
612            "required": [],
613            "type": "object"
614        });
615
616        let property: Property = serde_json::from_value(property_json).unwrap();
617        assert_eq!(property.properties.len(), 2);
618        assert_eq!(property.optional, vec!["name"]);
619        assert!(property.properties.contains_key("filter"));
620
621        let filter = property.properties.get("filter").unwrap();
622        assert_eq!(filter.properties.len(), 2);
623        assert_eq!(filter.required, vec!["limit"]);
624    }
625
626    #[test]
627    fn test_array_items_with_inline_object() {
628        let property_json = json!({
629            "type": "array",
630            "items": {
631                "properties": {
632                    "name": { "type": "string" },
633                    "solver": { "type": "string" }
634                },
635                "required": ["name", "solver"],
636                "type": "object"
637            },
638            "minItems": 1,
639            "uniqueItems": true
640        });
641
642        let property: Property = serde_json::from_value(property_json).unwrap();
643        let items = property.items.as_ref().unwrap();
644        let item_property = items.as_property().expect("expected inline object items");
645        assert_eq!(item_property.properties.len(), 2);
646        assert_eq!(
647            property.options.get("minItems"),
648            Some(&PrimitiveType::Number(1.0))
649        );
650        assert_eq!(
651            property.options.get("uniqueItems"),
652            Some(&PrimitiveType::Boolean(true))
653        );
654    }
655}