schema_analysis/
helpers.rs

1//! A module for any useful helper functions.
2
3pub mod xml {
4    //! A module for xml cleaning helper functions.
5    //! Check individual functions for details.
6
7    use crate::{Field, Schema};
8
9    /// A wrapper function that applies all XML cleaning transformations.
10    ///
11    /// [clean_solitary_nested_values]
12    /// + [turn_duplicates_into_sequence_field]
13    /// + [clean_empty_structs_in_field]
14    pub fn cleanup_xml_schema(schema: &mut Schema) {
15        clean_solitary_nested_values(schema);
16        turn_duplicates_into_sequence_field(schema);
17        clean_empty_structs_in_field(schema);
18    }
19
20    /// XML documents often result in uselessly nested values because the 'content' of a tag
21    /// that isn't itself a tag is put into the `$value` field.
22    ///
23    /// This function simply finds [Schema::Struct]s with a single field named `$value` and
24    /// replaces them with the schema inside the `$value` field.
25    pub fn clean_solitary_nested_values(schema: &mut Schema) {
26        use Schema::*;
27        match schema {
28            Null(_) | Boolean(_) | Integer(_) | Float(_) | String(_) | Bytes(_) => {}
29            Sequence { field, .. } => match &mut field.schema {
30                Some(schema) => clean_solitary_nested_values(schema),
31                None => {}
32            },
33            Struct { fields, .. } => {
34                // If the only field is $value, then we 'bring it up'.
35                if fields.len() == 1 && fields.contains_key("$value") {
36                    if let Some(Field {
37                        schema: Some(inner_schema),
38                        ..
39                    }) = fields.remove("$value")
40                    {
41                        *schema = inner_schema;
42                    }
43                } else {
44                    for (_, field) in fields.iter_mut() {
45                        match &mut field.schema {
46                            Some(schema) => clean_solitary_nested_values(schema),
47                            None => {}
48                        }
49                    }
50                }
51            }
52            Union { variants } => {
53                for value in variants {
54                    clean_solitary_nested_values(value);
55                }
56            }
57        }
58    }
59
60    /// XML documents do not have proper sequences, and an 'array' or 'list' is simply
61    /// represented as a tag appearing multiple times.
62    ///
63    /// To help with this the inference software annotates duplicate fields, and this function
64    /// takes the schema in that field and places it into a [Schema::Sequence].
65    pub fn turn_duplicates_into_sequence_field(schema: &mut Schema) {
66        clean_field_recursively(schema, _inner_field_cleaning);
67
68        fn _inner_field_cleaning(field: &mut Field) {
69            match &mut field.schema {
70                Some(schema) => clean_field_recursively(schema, _inner_field_cleaning),
71                None => {}
72            }
73            // In xml, sequences are simply registered as a field appearing more than once,
74            // the parser records this but now we need to move the duplicate field into its own sequence.
75            if field.status.may_be_duplicate {
76                *field = Field {
77                    status: field.status.clone(),
78                    schema: Some(Schema::Sequence {
79                        field: Box::new(field.clone()),
80                        context: Default::default(),
81                    }),
82                };
83                field.status.may_be_duplicate = false;
84            }
85        }
86    }
87
88    /// When a tag is empty, the parser interprets it as as an empty [Schema::Struct].
89    ///
90    /// This function replaces those fields with empty [Schema::Struct] with fields of
91    /// unknown schema.
92    pub fn clean_empty_structs_in_field(schema: &mut Schema) {
93        clean_field_recursively(schema, _inner_field_cleaning);
94
95        fn _inner_field_cleaning(field: &mut Field) {
96            match &mut field.schema {
97                Some(Schema::Struct { fields, .. }) if fields.is_empty() => {
98                    field.schema = None;
99                }
100                Some(schema) => clean_field_recursively(schema, _inner_field_cleaning),
101                None => {}
102            }
103        }
104    }
105
106    fn clean_field_recursively(schema: &mut Schema, clean_field: fn(&mut Field)) {
107        use Schema::*;
108        match schema {
109            Null(_) | Boolean(_) | Integer(_) | Float(_) | String(_) | Bytes(_) => {}
110            Schema::Sequence { field, .. } => clean_field(field),
111            Schema::Struct { fields, .. } => {
112                for (_, field) in fields.iter_mut() {
113                    clean_field(field);
114                }
115            }
116            Schema::Union { variants } => {
117                for value in variants {
118                    clean_field_recursively(value, clean_field);
119                }
120            }
121        }
122    }
123}