Skip to main content

xsd_parser/models/
ident_cache.rs

1use std::borrow::Cow;
2use std::collections::{hash_set::Iter as HashSetIter, HashMap, HashSet};
3use std::iter::FusedIterator;
4use std::slice::Iter as SliceIter;
5
6use crate::models::schema::Dependency;
7use crate::{InterpreterError, Name};
8
9use super::{
10    schema::{NamespaceId, SchemaId},
11    IdentType, TypeIdent,
12};
13
14/// Cache that keeps track of different [`TypeIdent`]ifiers.
15///
16/// The [`IdentCache`] is created by the [`Interpreter`](crate::Interpreter)
17/// (see [`exec_interpreter_with_ident_cache`](crate::exec_interpreter_with_ident_cache)).
18///
19/// It contains all types that are created during the interpretation of the
20/// provided schemas. It is able to resolve half qualified types identifiers
21/// (identifiers with a missing schema or namespace ID), to the actual identifier
22/// that is used to identify a [`MetaType`](crate::models::meta::MetaType) inside
23/// the [`MetaTypes`](crate::MetaTypes) structure.
24#[derive(Default, Debug)]
25pub struct IdentCache {
26    schemas: HashMap<SchemaId, SchemaEntry>,
27    unknown_schema: HashMap<NamespaceId, SchemaEntry>,
28
29    namespaces: HashMap<NamespaceId, HashSet<SchemaId>>,
30    global_namespaces: Vec<NamespaceId>,
31}
32
33/// Iterator over all identifiers that are known to the [`IdentCache`] in the
34/// context of the specified `schema` set. Not including the global namespaces,
35/// the unknown schema and the identifiers referenced by a `xs:include`.
36#[derive(Debug)]
37pub struct SchemaSetIter<'a> {
38    cache: &'a IdentCache,
39
40    visited: HashSet<SchemaId>,
41    emitted: HashSet<(IdentType, Cow<'a, str>)>,
42
43    types_iter: Option<TypesIterTuple<'a>>,
44    dependencies_iter: Vec<SliceIter<'a, Dependency<SchemaId>>>,
45}
46
47#[derive(Debug)]
48struct SchemaEntry {
49    ns: NamespaceId,
50    schema: SchemaId,
51    types: HashSet<(IdentType, Cow<'static, str>)>,
52    dependencies: Vec<Dependency<SchemaId>>,
53}
54
55type TypesIterTuple<'a> = (
56    NamespaceId,
57    SchemaId,
58    HashSetIter<'a, (IdentType, Cow<'static, str>)>,
59);
60
61impl IdentCache {
62    /// Insert the passed `ident`ifier into the cache.
63    #[inline]
64    pub fn insert(&mut self, ident: TypeIdent) {
65        let entry = if ident.schema.is_unknown() {
66            self.unknown_schema
67                .entry(ident.ns)
68                .or_insert_with(|| SchemaEntry {
69                    ns: ident.ns,
70                    schema: SchemaId::UNKNOWN,
71                    types: HashSet::new(),
72                    dependencies: Vec::new(),
73                })
74        } else {
75            self.schemas
76                .entry(ident.schema)
77                .or_insert_with(|| SchemaEntry {
78                    ns: ident.ns,
79                    schema: ident.schema,
80                    types: HashSet::new(),
81                    dependencies: Vec::new(),
82                })
83        };
84
85        entry.types.insert((ident.type_, ident.name.into()));
86    }
87
88    /// Add a schema to the cache.
89    ///
90    /// This is required to be able to resolve identifiers that are defined in the
91    /// schema, and to be able to add dependencies to it.
92    pub fn add_schema(&mut self, ns: NamespaceId, schema: SchemaId) {
93        self.schemas.entry(schema).or_insert_with(|| SchemaEntry {
94            ns,
95            schema,
96            types: HashSet::new(),
97            dependencies: Vec::new(),
98        });
99
100        self.namespaces.entry(ns).or_default().insert(schema);
101    }
102
103    /// Add a dependency between two schemas.
104    ///
105    /// This means that when trying to resolve an identifier for `schema`, the
106    /// cache will also search for it in `dependency`.
107    ///
108    /// # Returns
109    /// Returns `true` if the dependency was added, or `false` if it already
110    /// existed or if `schema` is not known to the cache.
111    pub fn add_dependency(&mut self, schema: SchemaId, dependency: Dependency<SchemaId>) -> bool {
112        if let Some(entry) = self.schemas.get_mut(&schema) {
113            if !entry.dependencies.contains(&dependency) {
114                entry.dependencies.push(dependency);
115                return true;
116            }
117        }
118
119        false
120    }
121
122    /// Add a namespace that is always searched when trying to resolve an identifier.
123    pub fn add_global_namespace(&mut self, ns: NamespaceId) {
124        self.namespaces.entry(ns).or_default();
125        self.global_namespaces.push(ns);
126    }
127
128    /// Try to resolve the passed `ident`ifier to an actual existing identifier.
129    ///
130    /// This function will lookup the passed `ident`ifier in the global context.
131    /// This means that all schemas that are known to the cache will be searched
132    /// for a matching type, and if multiple matches are found, an
133    /// [`InterpreterError::AmbiguousType`] error will be returned.
134    ///
135    /// # Errors
136    ///
137    /// Returns a [`InterpreterError::UnknownType`] if the identifier is not known
138    /// to the cache, or [`InterpreterError::AmbiguousType`] if multiple identifiers
139    /// matches the passed one.
140    pub fn resolve(&self, ident: TypeIdent) -> Result<TypeIdent, InterpreterError> {
141        let schemas = match (ident.ns, ident.schema) {
142            (NamespaceId::UNKNOWN, SchemaId::UNKNOWN) => self.schemas.keys().copied().collect(),
143            (ns, SchemaId::UNKNOWN) => self
144                .namespaces
145                .get(&ns)
146                .into_iter()
147                .flatten()
148                .copied()
149                .collect(),
150            (_, schema) => vec![schema],
151        };
152
153        let mut ret = None;
154        for schema in schemas {
155            if let Some(entry) = self.schemas.get(&schema) {
156                if entry.matches(&ident) {
157                    if ret.is_some() {
158                        return Err(InterpreterError::AmbiguousType(ident));
159                    }
160
161                    ret = Some(entry.make_ident(ident.clone()));
162                }
163            }
164        }
165
166        if ident.schema.is_unknown() {
167            for entry in self.unknown_schema.values() {
168                if entry.matches(&ident) {
169                    if ret.is_some() {
170                        return Err(InterpreterError::AmbiguousType(ident));
171                    }
172
173                    ret = Some(entry.make_ident(ident.clone()));
174                }
175            }
176        }
177
178        if let Some(resolved_ident) = ret {
179            Ok(resolved_ident)
180        } else {
181            Err(InterpreterError::UnknownType(ident))
182        }
183    }
184
185    /// Same as [`resolve`](IdentCache::resolve), but instead of returning a
186    /// [`UnknownType`](InterpreterError::UnknownType) error for unknown
187    /// identifiers it returns the original identifier.
188    ///
189    /// # Errors
190    ///
191    /// Returns [`InterpreterError::AmbiguousType`] if multiple identifiers
192    /// matches the passed one.
193    pub fn resolve_allow_unknown(&self, ident: TypeIdent) -> Result<TypeIdent, InterpreterError> {
194        match self.resolve(ident) {
195            Ok(ident) => Ok(ident),
196            Err(InterpreterError::UnknownType(ident)) => Ok(ident),
197            Err(error) => Err(error),
198        }
199    }
200
201    /// Get an iterator over all identifiers that are known to the cache in the
202    /// context of the specified `schema` set. Not including the global namespaces,
203    /// the unknown schema and the identifiers referenced by a `xs:include`.
204    #[must_use]
205    pub fn schema_set(&self, schema: SchemaId) -> SchemaSetIter<'_> {
206        SchemaSetIter::new(self, schema)
207    }
208
209    /// Try to resolve the passed `ident`ifier to an actual existing identifier.
210    ///
211    /// In contrast to [`resolve`](IdentCache::resolve), this function will search
212    /// for the passed `ident`ifier in the context of the specified `schema`.
213    /// This means that it will try to resolve the type inside `schema`,
214    /// and if it is not found, it will try to resolve it in its dependencies.
215    /// As soon as a match is found, it will be returned, and the search will
216    /// not continue, so no error will be raised if multiple matches are found.
217    ///
218    /// # Errors
219    ///
220    /// Returns a [`InterpreterError::UnknownType`] if the identifier is not known
221    /// to the cache.
222    pub fn resolve_for_schema(
223        &self,
224        schema: SchemaId,
225        ident: TypeIdent,
226    ) -> Result<TypeIdent, InterpreterError> {
227        let mut visited = HashSet::new();
228
229        if let Some(entry) = self.search_in_schema(&mut visited, schema, &ident) {
230            return Ok(entry.make_ident(ident));
231        }
232
233        for ns in &self.global_namespaces {
234            for schema in self.namespaces.get(ns).into_iter().flatten() {
235                if let Some(entry) = self.search_in_schema(&mut visited, *schema, &ident) {
236                    return Ok(entry.make_ident(ident));
237                }
238            }
239        }
240
241        for entry in self.unknown_schema.values() {
242            if entry.matches(&ident) {
243                return Ok(entry.make_ident(ident));
244            }
245        }
246
247        Err(InterpreterError::UnknownType(ident))
248    }
249
250    fn search_in_schema(
251        &self,
252        visited: &mut HashSet<SchemaId>,
253        schema: SchemaId,
254        ident: &TypeIdent,
255    ) -> Option<&SchemaEntry> {
256        if !visited.insert(schema) {
257            return None;
258        }
259
260        let entry = self.schemas.get(&schema)?;
261        if entry.matches(ident) {
262            return Some(entry);
263        }
264
265        for dep in &entry.dependencies {
266            if let Some(found) = self.search_in_schema(visited, **dep, ident) {
267                return Some(found);
268            }
269        }
270
271        None
272    }
273}
274
275impl SchemaEntry {
276    fn matches(&self, ident: &TypeIdent) -> bool {
277        let ns_matches = ident.ns.is_unknown() || ident.ns == self.ns;
278        let schema_matches = ident.schema.is_unknown() || ident.schema == self.schema;
279        let contains_type = self
280            .types
281            .contains(&(ident.type_, Cow::Borrowed(ident.name.as_str())));
282
283        ns_matches && schema_matches && contains_type
284    }
285
286    fn make_ident(&self, ident: TypeIdent) -> TypeIdent {
287        TypeIdent {
288            ns: ident.ns.or(self.ns),
289            schema: ident.schema.or(self.schema),
290            type_: ident.type_,
291            name: ident.name,
292        }
293    }
294}
295
296impl<'a> SchemaSetIter<'a> {
297    fn new(cache: &'a IdentCache, schema: SchemaId) -> Self {
298        let schema = cache.schemas.get(&schema);
299        let types_iter = schema.map(|x| (x.ns, x.schema, x.types.iter()));
300        let dependencies_iter = schema.map(|x| x.dependencies.iter()).into_iter().collect();
301
302        Self {
303            cache,
304            visited: HashSet::new(),
305            emitted: HashSet::new(),
306            types_iter,
307            dependencies_iter,
308        }
309    }
310}
311
312impl Iterator for SchemaSetIter<'_> {
313    type Item = TypeIdent;
314
315    fn next(&mut self) -> Option<Self::Item> {
316        #[allow(clippy::redundant_else)]
317        loop {
318            if let Some((ns, schema, types_iter)) = &mut self.types_iter {
319                if let Some((type_, name)) = types_iter.next() {
320                    if !self.emitted.insert((*type_, Cow::Borrowed(name.as_ref()))) {
321                        continue;
322                    }
323
324                    break Some(TypeIdent {
325                        ns: *ns,
326                        schema: *schema,
327                        type_: *type_,
328                        name: Name::new_named(name.clone()),
329                    });
330                } else {
331                    self.types_iter = None;
332                }
333            } else if let Some(dependencies_iter) = self.dependencies_iter.last_mut() {
334                if let Some(dep) = dependencies_iter.next() {
335                    if self.visited.insert(**dep) {
336                        let Some(entry) = self.cache.schemas.get(&**dep) else {
337                            continue;
338                        };
339                        self.types_iter = Some((entry.ns, entry.schema, entry.types.iter()));
340                        self.dependencies_iter.push(entry.dependencies.iter());
341                    }
342                } else {
343                    self.dependencies_iter.pop();
344                }
345            } else {
346                break None;
347            }
348        }
349    }
350}
351
352impl FusedIterator for SchemaSetIter<'_> {}