1pub mod resolver;
4
5mod error;
6
7use std::collections::{BTreeMap, HashSet, VecDeque};
8use std::fmt::Debug;
9use std::io::BufRead;
10use std::path::Path;
11
12use quick_xml::{
13 events::Event,
14 name::{LocalName, PrefixIter, QName, ResolveResult},
15};
16use resolver::{FileResolver, NoOpResolver, ResolveRequest, Resolver};
17use tracing::instrument;
18use url::Url;
19
20use crate::quick_xml::{
21 DeserializeSync, Error as QuickXmlError, IoReader, SliceReader, XmlReader, XmlReaderSync,
22};
23use crate::schema::xs::{Import, Include, Schema, SchemaContent};
24use crate::schema::{Namespace, NamespacePrefix, Schemas};
25
26pub use self::error::Error;
27
28#[must_use]
39#[derive(Default, Debug)]
40pub struct Parser<TResolver = NoOpResolver> {
41 cache: HashSet<Url>,
42 schemas: Schemas,
43 pending: VecDeque<ResolveRequest>,
44
45 resolver: TResolver,
46 resolve_includes: bool,
47}
48
49impl Parser {
50 pub fn new() -> Self {
52 Self::default()
53 }
54}
55
56impl<TResolver> Parser<TResolver> {
57 pub fn with_default_resolver(self) -> Parser<FileResolver> {
61 self.with_resolver(FileResolver)
62 }
63
64 pub fn with_resolver<XResolver: Resolver + 'static>(
66 self,
67 resolver: XResolver,
68 ) -> Parser<XResolver> {
69 let Self { schemas, .. } = self;
70
71 let cache = HashSet::new();
72 let pending = VecDeque::new();
73
74 Parser {
75 cache,
76 schemas,
77 pending,
78
79 resolver,
80 resolve_includes: true,
81 }
82 }
83
84 pub fn resolve_includes(mut self, value: bool) -> Self {
86 self.resolve_includes = value;
87
88 self
89 }
90
91 pub fn finish(self) -> Schemas {
94 self.schemas
95 }
96}
97
98impl<TResolver> Parser<TResolver>
99where
100 TResolver: Resolver,
101{
102 pub fn with_default_namespaces(self) -> Self {
112 self.with_namespace(NamespacePrefix::XS, Namespace::XS)
113 .with_namespace(NamespacePrefix::XML, Namespace::XML)
114 }
115
116 pub fn with_namespace(mut self, prefix: NamespacePrefix, namespace: Namespace) -> Self {
129 self.schemas
130 .get_or_create_namespace_info_mut(Some(prefix), Some(namespace));
131
132 self
133 }
134}
135
136impl<TResolver> Parser<TResolver>
137where
138 TResolver: Resolver,
139 TResolver::Buffer: BufRead,
140{
141 #[instrument(err, level = "trace", skip(self, schema))]
151 pub fn add_schema_from_str(mut self, schema: &str) -> Result<Self, Error<TResolver::Error>> {
152 let reader = SliceReader::new(schema);
153 let mut reader = SchemaReader::new(reader);
154
155 let schema = Schema::deserialize(&mut reader)?;
156
157 self.add_schema(schema, &reader.namespaces, None);
158 self.resolve_pending()?;
159
160 Ok(self)
161 }
162
163 #[instrument(err, level = "trace", skip(self, reader))]
173 pub fn add_schema_from_reader<R: BufRead>(
174 mut self,
175 reader: R,
176 ) -> Result<Self, Error<TResolver::Error>> {
177 let reader = IoReader::new(reader);
178 let mut reader = SchemaReader::new(reader);
179
180 let schema = Schema::deserialize(&mut reader)?;
181
182 self.add_schema(schema, &reader.namespaces, None);
183 self.resolve_pending()?;
184
185 Ok(self)
186 }
187
188 #[instrument(err, level = "trace", skip(self))]
198 pub fn add_schema_from_file<P: AsRef<Path> + Debug>(
199 self,
200 path: P,
201 ) -> Result<Self, Error<TResolver::Error>> {
202 let path = path.as_ref().canonicalize()?;
203 let url = Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path))?;
204
205 self.add_schema_from_url(url)
206 }
207
208 #[instrument(err, level = "trace", skip(self))]
219 pub fn add_schema_from_url(mut self, url: Url) -> Result<Self, Error<TResolver::Error>> {
220 let req = ResolveRequest::new(url);
221
222 self.resolve_location(req)?;
223 self.resolve_pending()?;
224
225 Ok(self)
226 }
227
228 fn add_pending(&mut self, req: ResolveRequest) {
229 tracing::debug!("Add pending resolve request: {req:#?}");
230
231 self.pending.push_back(req);
232 }
233
234 fn resolve_pending(&mut self) -> Result<(), Error<TResolver::Error>> {
235 while let Some(req) = self.pending.pop_front() {
236 self.resolve_location(req)?;
237 }
238
239 Ok(())
240 }
241
242 #[instrument(err, level = "trace", skip(self))]
243 fn resolve_location(&mut self, req: ResolveRequest) -> Result<(), Error<TResolver::Error>> {
244 tracing::debug!("Process resolve request: {req:#?}");
245
246 let Some((location, buffer)) = self.resolver.resolve(&req).map_err(Error::resolver)? else {
247 return Err(Error::UnableToResolve(Box::new(req)));
248 };
249 if self.cache.contains(&location) {
250 return Ok(());
251 }
252
253 let reader = IoReader::new(buffer);
254 let reader = SchemaReader::new(reader);
255 let mut reader = reader.with_error_info();
256
257 let schema = Schema::deserialize(&mut reader)?;
258
259 let reader = reader.into_inner();
260
261 self.add_schema(schema, &reader.namespaces, Some(&location));
262 self.cache.insert(location);
263
264 Ok(())
265 }
266
267 fn add_schema(
268 &mut self,
269 schema: Schema,
270 namespaces: &Namespaces,
271 current_location: Option<&Url>,
272 ) {
273 tracing::debug!(
274 "Process schema (location={:?}, target_namespace={:?}",
275 current_location.as_ref().map(|url| url.as_str()),
276 &schema.target_namespace
277 );
278
279 let target_ns = schema
280 .target_namespace
281 .as_deref()
282 .map(|ns| Namespace::from(ns.as_bytes().to_owned()));
283 let prefix = namespaces.get(&target_ns).cloned().flatten();
284
285 if self.resolve_includes {
286 for content in &schema.content {
287 match content {
288 SchemaContent::Import(x) => {
289 if let Some(req) = import_req(x, target_ns.clone(), current_location) {
290 self.add_pending(req);
291 }
292 }
293 SchemaContent::Include(x) => {
294 self.add_pending(include_req(x, target_ns.clone(), current_location));
295 }
296 _ => (),
297 }
298 }
299 }
300
301 self.schemas.add_schema(prefix, target_ns, schema);
302 }
303}
304
305struct SchemaReader<R> {
306 inner: R,
307 namespaces: Namespaces,
308}
309
310type Namespaces = BTreeMap<Option<Namespace>, Option<NamespacePrefix>>;
311
312impl<R> SchemaReader<R> {
313 fn new(inner: R) -> Self {
314 Self {
315 inner,
316 namespaces: BTreeMap::new(),
317 }
318 }
319}
320
321impl<R> XmlReader for SchemaReader<R>
322where
323 R: XmlReader,
324{
325 fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult<'_>, LocalName<'n>) {
326 self.inner.resolve(name, attribute)
327 }
328
329 fn prefixes(&self) -> PrefixIter<'_> {
330 self.inner.prefixes()
331 }
332
333 fn current_position(&self) -> u64 {
334 self.inner.current_position()
335 }
336
337 fn error_position(&self) -> u64 {
338 self.inner.error_position()
339 }
340}
341
342impl<'a, R> XmlReaderSync<'a> for SchemaReader<R>
343where
344 R: XmlReaderSync<'a>,
345{
346 fn read_event(&mut self) -> Result<Event<'a>, QuickXmlError> {
347 let event = self.inner.read_event()?;
348
349 if let Event::Start(x) | Event::Empty(x) = &event {
350 for a in x.attributes() {
351 let a = a?;
352 if matches!(a.key.prefix(), Some(x) if x.as_ref() == b"xmlns") {
353 let prefix = NamespacePrefix::new(a.key.local_name().as_ref().to_owned());
354 let namespace = Namespace::new(a.value.into_owned());
355
356 self.namespaces
357 .entry(Some(namespace))
358 .or_insert(Some(prefix));
359 }
360 }
361 }
362
363 Ok(event)
364 }
365}
366
367fn import_req(
368 import: &Import,
369 current_ns: Option<Namespace>,
370 current_location: Option<&Url>,
371) -> Option<ResolveRequest> {
372 let location = import.schema_location.as_ref()?;
373
374 let mut req = ResolveRequest::new(location);
375
376 if let Some(ns) = current_ns {
377 req = req.current_ns(ns);
378 }
379
380 if let Some(ns) = &import.namespace {
381 req = req.requested_ns(Namespace::from(ns.as_bytes().to_owned()));
382 }
383
384 if let Some(current_location) = current_location {
385 req = req.current_location(current_location.clone());
386 }
387
388 Some(req)
389}
390
391fn include_req(
392 include: &Include,
393 current_ns: Option<Namespace>,
394 current_location: Option<&Url>,
395) -> ResolveRequest {
396 let mut req = ResolveRequest::new(&include.schema_location);
397
398 if let Some(ns) = current_ns {
399 req = req.current_ns(ns);
400 }
401
402 if let Some(current_location) = current_location {
403 req = req.current_location(current_location.clone());
404 }
405
406 req
407}