xsd_parser/pipeline/parser/
mod.rs1pub mod resolver;
21
22mod error;
23
24use std::collections::{BTreeMap, HashSet, VecDeque};
25use std::fmt::Debug;
26use std::io::BufRead;
27use std::path::Path;
28
29use quick_xml::{
30 events::Event,
31 name::{LocalName, QName, ResolveResult},
32};
33use resolver::{FileResolver, NoOpResolver, ResolveRequest};
34use tracing::instrument;
35use url::Url;
36
37use crate::models::schema::{
38 xs::{Import, Include, Schema, SchemaContent},
39 Namespace, NamespacePrefix, Schemas,
40};
41use crate::pipeline::parser::resolver::ResolveRequestType;
42use crate::quick_xml::{
43 DeserializeSync, Error as QuickXmlError, IoReader, SliceReader, XmlReader, XmlReaderSync,
44};
45use crate::xml::NamespacesShared;
46
47pub use self::error::Error;
48pub use self::resolver::Resolver;
49
50#[must_use]
65#[derive(Default, Debug)]
66pub struct Parser<TResolver = NoOpResolver> {
67 cache: HashSet<Url>,
68 schemas: Schemas,
69 pending: VecDeque<ResolveRequest>,
70
71 resolver: TResolver,
72 resolve_includes: bool,
73}
74
75impl Parser {
76 pub fn new() -> Self {
78 Self::default()
79 }
80}
81
82impl<TResolver> Parser<TResolver> {
83 pub fn with_default_resolver(self) -> Parser<FileResolver> {
87 self.with_resolver(FileResolver)
88 }
89
90 pub fn with_resolver<XResolver: Resolver + 'static>(
92 self,
93 resolver: XResolver,
94 ) -> Parser<XResolver> {
95 let Self { schemas, .. } = self;
96
97 let cache = HashSet::new();
98 let pending = VecDeque::new();
99
100 Parser {
101 cache,
102 schemas,
103 pending,
104
105 resolver,
106 resolve_includes: true,
107 }
108 }
109
110 pub fn resolve_includes(mut self, value: bool) -> Self {
112 self.resolve_includes = value;
113
114 self
115 }
116
117 pub fn finish(self) -> Schemas {
120 self.schemas
121 }
122}
123
124impl<TResolver> Parser<TResolver>
125where
126 TResolver: Resolver,
127{
128 pub fn with_default_namespaces(self) -> Self {
138 self.with_namespace(NamespacePrefix::XS, Namespace::XS)
139 .with_namespace(NamespacePrefix::XML, Namespace::XML)
140 }
141
142 pub fn with_namespace(mut self, prefix: NamespacePrefix, namespace: Namespace) -> Self {
155 self.schemas
156 .get_or_create_namespace_info_mut(Some(prefix), Some(namespace));
157
158 self
159 }
160}
161
162impl<TResolver> Parser<TResolver>
163where
164 TResolver: Resolver,
165 TResolver::Buffer: BufRead,
166{
167 #[instrument(err, level = "trace", skip(self, schema))]
177 pub fn add_schema_from_str(self, schema: &str) -> Result<Self, Error<TResolver::Error>> {
178 self.add_named_schema_from_str_impl(None, schema)
179 }
180
181 #[instrument(err, level = "trace", skip(self, schema))]
192 pub fn add_named_schema_from_str(
193 self,
194 name: String,
195 schema: &str,
196 ) -> Result<Self, Error<TResolver::Error>> {
197 self.add_named_schema_from_str_impl(Some(name), schema)
198 }
199
200 #[instrument(err, level = "trace", skip(self, schema))]
201 fn add_named_schema_from_str_impl(
202 mut self,
203 name: Option<String>,
204 schema: &str,
205 ) -> Result<Self, Error<TResolver::Error>> {
206 let reader = SliceReader::new(schema);
207 let mut reader = SchemaReader::new(reader);
208
209 let schema = Schema::deserialize(&mut reader)?;
210
211 self.add_schema(name, schema, None, &reader.namespaces);
212 self.resolve_pending()?;
213
214 Ok(self)
215 }
216
217 pub fn add_schema_from_reader<R: BufRead>(
227 self,
228 reader: R,
229 ) -> Result<Self, Error<TResolver::Error>> {
230 self.add_named_schema_from_reader_impl(None, reader)
231 }
232
233 pub fn add_named_schema_from_reader<R: BufRead>(
244 self,
245 name: String,
246 reader: R,
247 ) -> Result<Self, Error<TResolver::Error>> {
248 self.add_named_schema_from_reader_impl(Some(name), reader)
249 }
250
251 #[instrument(err, level = "trace", skip(self, reader))]
252 fn add_named_schema_from_reader_impl<R: BufRead>(
253 mut self,
254 name: Option<String>,
255 reader: R,
256 ) -> Result<Self, Error<TResolver::Error>> {
257 let reader = IoReader::new(reader);
258 let mut reader = SchemaReader::new(reader);
259
260 let schema = Schema::deserialize(&mut reader)?;
261
262 self.add_schema(name, schema, None, &reader.namespaces);
263 self.resolve_pending()?;
264
265 Ok(self)
266 }
267
268 #[instrument(err, level = "trace", skip(self))]
278 pub fn add_schema_from_file<P: AsRef<Path> + Debug>(
279 self,
280 path: P,
281 ) -> Result<Self, Error<TResolver::Error>> {
282 let path = path.as_ref().canonicalize()?;
283 let url = Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path))?;
284
285 self.add_schema_from_url(url)
286 }
287
288 #[instrument(err, level = "trace", skip(self))]
295 pub fn add_schema_from_files<I>(mut self, paths: I) -> Result<Self, Error<TResolver::Error>>
296 where
297 I: IntoIterator + Debug,
298 I::Item: AsRef<Path> + Debug,
299 {
300 for path in paths {
301 self = self.add_schema_from_file(path)?;
302 }
303
304 Ok(self)
305 }
306
307 #[instrument(err, level = "trace", skip(self))]
318 pub fn add_schema_from_url(mut self, url: Url) -> Result<Self, Error<TResolver::Error>> {
319 let req = ResolveRequest::new(url, ResolveRequestType::UserDefined);
320
321 self.resolve_location(req)?;
322 self.resolve_pending()?;
323
324 Ok(self)
325 }
326
327 fn add_pending(&mut self, req: ResolveRequest) {
328 tracing::debug!("Add pending resolve request: {req:#?}");
329
330 self.pending.push_back(req);
331 }
332
333 fn resolve_pending(&mut self) -> Result<(), Error<TResolver::Error>> {
334 while let Some(req) = self.pending.pop_front() {
335 self.resolve_location(req)?;
336 }
337
338 Ok(())
339 }
340
341 #[instrument(err, level = "trace", skip(self))]
342 fn resolve_location(&mut self, req: ResolveRequest) -> Result<(), Error<TResolver::Error>> {
343 tracing::debug!("Process resolve request: {req:#?}");
344
345 let Some((name, location, buffer)) =
346 self.resolver.resolve(&req).map_err(Error::resolver)?
347 else {
348 return Err(Error::UnableToResolve(Box::new(req)));
349 };
350 if self.cache.contains(&location) {
351 return Ok(());
352 }
353
354 let reader = IoReader::new(buffer);
355 let reader = SchemaReader::new(reader);
356 let mut reader = reader.with_error_info();
357
358 let mut schema = Schema::deserialize(&mut reader)?;
359
360 if schema.target_namespace.is_none()
361 && ResolveRequestType::IncludeRequest == req.request_type
362 {
363 if let Some(current_ns) = req.current_ns {
364 let inherited_ns = current_ns.to_string();
365 schema.target_namespace = Some(inherited_ns);
366 }
367 }
368
369 let reader = reader.into_inner();
370
371 self.add_schema(name, schema, Some(location.clone()), &reader.namespaces);
372 self.cache.insert(location);
373
374 Ok(())
375 }
376
377 fn add_schema(
378 &mut self,
379 name: Option<String>,
380 schema: Schema,
381 location: Option<Url>,
382 namespaces: &Namespaces,
383 ) {
384 tracing::debug!(
385 "Process schema (location={:?}, target_namespace={:?}",
386 location.as_ref().map(Url::as_str),
387 &schema.target_namespace
388 );
389
390 let target_ns = schema
391 .target_namespace
392 .as_deref()
393 .map(|ns| Namespace::from(ns.as_bytes().to_owned()));
394 let prefix = namespaces.get(&target_ns).cloned().flatten();
395
396 if self.resolve_includes {
397 for content in &schema.content {
398 match content {
399 SchemaContent::Import(x) => {
400 if let Some(req) = import_req(x, target_ns.clone(), location.as_ref()) {
401 self.add_pending(req);
402 }
403 }
404 SchemaContent::Include(x) => {
405 self.add_pending(include_req(x, target_ns.clone(), location.as_ref()));
406 }
407 _ => (),
408 }
409 }
410 }
411
412 self.schemas
413 .add_schema(prefix, target_ns, name, schema, location);
414 }
415}
416
417struct SchemaReader<R> {
418 inner: R,
419 namespaces: Namespaces,
420}
421
422type Namespaces = BTreeMap<Option<Namespace>, Option<NamespacePrefix>>;
423
424impl<R> SchemaReader<R> {
425 fn new(inner: R) -> Self {
426 Self {
427 inner,
428 namespaces: BTreeMap::new(),
429 }
430 }
431}
432
433impl<R> XmlReader for SchemaReader<R>
434where
435 R: XmlReader,
436{
437 fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult<'_>, LocalName<'n>) {
438 self.inner.resolve(name, attribute)
439 }
440
441 fn namespaces(&self) -> NamespacesShared<'static> {
442 self.inner.namespaces()
443 }
444
445 fn current_position(&self) -> u64 {
446 self.inner.current_position()
447 }
448
449 fn error_position(&self) -> u64 {
450 self.inner.error_position()
451 }
452}
453
454impl<'a, R> XmlReaderSync<'a> for SchemaReader<R>
455where
456 R: XmlReaderSync<'a>,
457{
458 fn read_event(&mut self) -> Result<Event<'a>, QuickXmlError> {
459 let event = self.inner.read_event()?;
460
461 if let Event::Start(x) | Event::Empty(x) = &event {
462 for a in x.attributes() {
463 let a = a?;
464 if matches!(a.key.prefix(), Some(x) if x.as_ref() == b"xmlns") {
465 let prefix = NamespacePrefix::new(a.key.local_name().as_ref().to_owned());
466 let namespace = Namespace::new(a.value.into_owned());
467
468 self.namespaces
469 .entry(Some(namespace))
470 .or_insert(Some(prefix));
471 }
472 }
473 }
474
475 Ok(event)
476 }
477}
478
479fn import_req(
480 import: &Import,
481 current_ns: Option<Namespace>,
482 current_location: Option<&Url>,
483) -> Option<ResolveRequest> {
484 let location = import.schema_location.as_ref()?;
485
486 let mut req = ResolveRequest::new(location, ResolveRequestType::ImportRequest);
487
488 if let Some(ns) = current_ns {
489 req = req.current_ns(ns);
490 }
491
492 if let Some(ns) = &import.namespace {
493 req = req.requested_ns(Namespace::from(ns.as_bytes().to_owned()));
494 }
495
496 if let Some(current_location) = current_location {
497 req = req.current_location(current_location.clone());
498 }
499
500 Some(req)
501}
502
503fn include_req(
504 include: &Include,
505 current_ns: Option<Namespace>,
506 current_location: Option<&Url>,
507) -> ResolveRequest {
508 let mut req = ResolveRequest::new(&include.schema_location, ResolveRequestType::IncludeRequest);
509
510 if let Some(ns) = current_ns {
511 req = req.current_ns(ns);
512 }
513
514 if let Some(current_location) = current_location {
515 req = req.current_location(current_location.clone());
516 }
517
518 req
519}