1pub mod resolver;
21
22mod error;
23
24use std::borrow::Cow;
25use std::collections::{btree_map::Entry, BTreeMap, HashMap, HashSet, VecDeque};
26use std::fmt::Debug;
27use std::io::BufRead;
28use std::path::Path;
29
30use quick_xml::events::Event;
31use resolver::{FileResolver, NoOpResolver, ResolveRequest};
32use tracing::instrument;
33use url::Url;
34
35use xsd_parser_types::misc::{Namespace, NamespacePrefix};
36use xsd_parser_types::quick_xml::{
37 DeserializeSync, Error as QuickXmlError, IoReader, SliceReader, XmlReader, XmlReaderSync,
38};
39
40use crate::models::schema::{
41 xs::{Import, Include, Schema, SchemaContent},
42 NamespaceId, NamespaceInfo, Schemas,
43};
44use crate::models::schema::{SchemaId, SchemaInfo};
45use crate::pipeline::parser::resolver::ResolveRequestType;
46
47pub use self::error::Error;
48pub use self::resolver::Resolver;
49
50#[must_use]
65#[derive(Default, Debug)]
66pub struct Parser<TResolver = NoOpResolver> {
67 cache: HashSet<Url>,
68 entries: Vec<ParserEntry>,
69 pending: VecDeque<ResolveRequest>,
70
71 resolver: TResolver,
72 resolve_includes: bool,
73 generate_prefixes: bool,
74 alternative_prefixes: bool,
75}
76
77#[derive(Debug)]
78#[allow(clippy::large_enum_variant)]
79enum ParserEntry {
80 Namespace {
81 prefix: NamespacePrefix,
82 namespace: Namespace,
83 },
84 Schema {
85 name: Option<String>,
86 schema: Schema,
87 location: Option<Url>,
88 target_ns: Option<Namespace>,
89 namespaces: Namespaces,
90 },
91}
92
93#[derive(Debug)]
94struct SchemasBuilder {
95 schemas: Schemas,
96 prefix_cache: HashMap<Option<Namespace>, PrefixEntry>,
97
98 generate_prefixes: bool,
99 alternative_prefixes: bool,
100}
101
102#[derive(Default, Debug)]
103struct PrefixEntry {
104 prefix: Option<NamespacePrefix>,
105 alt_prefixes: HashSet<NamespacePrefix>,
106}
107
108impl Parser {
109 pub fn new() -> Self {
111 Self::default()
112 }
113}
114
115impl<TResolver> Parser<TResolver> {
116 pub fn with_default_resolver(self) -> Parser<FileResolver> {
120 self.with_resolver(FileResolver)
121 }
122
123 pub fn with_resolver<XResolver: Resolver + 'static>(
125 self,
126 resolver: XResolver,
127 ) -> Parser<XResolver> {
128 let Self { entries, .. } = self;
129
130 let cache = HashSet::new();
131 let pending = VecDeque::new();
132
133 Parser {
134 cache,
135 entries,
136 pending,
137
138 resolver,
139 resolve_includes: true,
140 generate_prefixes: true,
141 alternative_prefixes: true,
142 }
143 }
144
145 pub fn resolve_includes(mut self, value: bool) -> Self {
147 self.resolve_includes = value;
148
149 self
150 }
151
152 pub fn generate_prefixes(mut self, value: bool) -> Self {
155 self.generate_prefixes = value;
156
157 self
158 }
159
160 pub fn alternative_prefixes(mut self, value: bool) -> Self {
164 self.alternative_prefixes = value;
165
166 self
167 }
168
169 pub fn finish(self) -> Schemas {
172 let builder = SchemasBuilder {
173 schemas: Schemas::default(),
174 prefix_cache: HashMap::new(),
175
176 generate_prefixes: self.generate_prefixes,
177 alternative_prefixes: self.alternative_prefixes,
178 };
179
180 builder.build(self.entries)
181 }
182}
183
184impl<TResolver> Parser<TResolver>
185where
186 TResolver: Resolver,
187{
188 pub fn with_default_namespaces(self) -> Self {
198 self.with_namespace(NamespacePrefix::XS, Namespace::XS)
199 .with_namespace(NamespacePrefix::XML, Namespace::XML)
200 }
201
202 pub fn with_namespace(mut self, prefix: NamespacePrefix, namespace: Namespace) -> Self {
215 self.entries
216 .push(ParserEntry::Namespace { prefix, namespace });
217
218 self
219 }
220}
221
222impl<TResolver> Parser<TResolver>
223where
224 TResolver: Resolver,
225 TResolver::Buffer: BufRead,
226{
227 #[instrument(err, level = "trace", skip(self, schema))]
237 pub fn add_schema_from_str(self, schema: &str) -> Result<Self, Error<TResolver::Error>> {
238 self.add_named_schema_from_str_impl(None, schema)
239 }
240
241 #[instrument(err, level = "trace", skip(self, schema))]
252 pub fn add_named_schema_from_str(
253 self,
254 name: String,
255 schema: &str,
256 ) -> Result<Self, Error<TResolver::Error>> {
257 self.add_named_schema_from_str_impl(Some(name), schema)
258 }
259
260 #[instrument(err, level = "trace", skip(self, schema))]
261 fn add_named_schema_from_str_impl(
262 mut self,
263 name: Option<String>,
264 schema: &str,
265 ) -> Result<Self, Error<TResolver::Error>> {
266 let reader = SliceReader::new(schema);
267 let mut reader = SchemaReader::new(reader);
268
269 let schema = Schema::deserialize(&mut reader)?;
270
271 self.add_schema(name, schema, None, reader.namespaces);
272 self.resolve_pending()?;
273
274 Ok(self)
275 }
276
277 pub fn add_schema_from_reader<R: BufRead>(
287 self,
288 reader: R,
289 ) -> Result<Self, Error<TResolver::Error>> {
290 self.add_named_schema_from_reader_impl(None, reader)
291 }
292
293 pub fn add_named_schema_from_reader<R: BufRead>(
304 self,
305 name: String,
306 reader: R,
307 ) -> Result<Self, Error<TResolver::Error>> {
308 self.add_named_schema_from_reader_impl(Some(name), reader)
309 }
310
311 #[instrument(err, level = "trace", skip(self, reader))]
312 fn add_named_schema_from_reader_impl<R: BufRead>(
313 mut self,
314 name: Option<String>,
315 reader: R,
316 ) -> Result<Self, Error<TResolver::Error>> {
317 let reader = IoReader::new(reader);
318 let mut reader = SchemaReader::new(reader);
319
320 let schema = Schema::deserialize(&mut reader)?;
321
322 self.add_schema(name, schema, None, reader.namespaces);
323 self.resolve_pending()?;
324
325 Ok(self)
326 }
327
328 #[instrument(err, level = "trace", skip(self))]
338 pub fn add_schema_from_file<P: AsRef<Path> + Debug>(
339 self,
340 path: P,
341 ) -> Result<Self, Error<TResolver::Error>> {
342 let path = path.as_ref().canonicalize()?;
343 let url = Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path))?;
344
345 self.add_schema_from_url(url)
346 }
347
348 #[instrument(err, level = "trace", skip(self))]
355 pub fn add_schema_from_files<I>(mut self, paths: I) -> Result<Self, Error<TResolver::Error>>
356 where
357 I: IntoIterator + Debug,
358 I::Item: AsRef<Path> + Debug,
359 {
360 for path in paths {
361 self = self.add_schema_from_file(path)?;
362 }
363
364 Ok(self)
365 }
366
367 #[instrument(err, level = "trace", skip(self))]
378 pub fn add_schema_from_url(mut self, url: Url) -> Result<Self, Error<TResolver::Error>> {
379 let req = ResolveRequest::new(url, ResolveRequestType::UserDefined);
380
381 self.resolve_location(req)?;
382 self.resolve_pending()?;
383
384 Ok(self)
385 }
386
387 fn add_pending(&mut self, req: ResolveRequest) {
388 tracing::debug!("Add pending resolve request: {req:#?}");
389
390 self.pending.push_back(req);
391 }
392
393 fn resolve_pending(&mut self) -> Result<(), Error<TResolver::Error>> {
394 while let Some(req) = self.pending.pop_front() {
395 self.resolve_location(req)?;
396 }
397
398 Ok(())
399 }
400
401 #[instrument(err, level = "trace", skip(self))]
402 fn resolve_location(&mut self, req: ResolveRequest) -> Result<(), Error<TResolver::Error>> {
403 tracing::debug!("Process resolve request: {req:#?}");
404
405 let Some((name, location, buffer)) =
406 self.resolver.resolve(&req).map_err(Error::resolver)?
407 else {
408 return Err(Error::UnableToResolve(Box::new(req)));
409 };
410 if self.cache.contains(&location) {
411 return Ok(());
412 }
413
414 let reader = IoReader::new(buffer);
415 let reader = SchemaReader::new(reader);
416 let mut reader = reader.with_error_info();
417
418 let mut schema = Schema::deserialize(&mut reader)?;
419
420 if schema.target_namespace.is_none()
421 && ResolveRequestType::IncludeRequest == req.request_type
422 {
423 if let Some(current_ns) = req.current_ns {
424 let inherited_ns = current_ns.to_string();
425 schema.target_namespace = Some(inherited_ns);
426 }
427 }
428
429 let reader = reader.into_inner();
430
431 self.add_schema(name, schema, Some(location.clone()), reader.namespaces);
432 self.cache.insert(location);
433
434 Ok(())
435 }
436
437 fn add_schema(
438 &mut self,
439 name: Option<String>,
440 schema: Schema,
441 location: Option<Url>,
442 namespaces: Namespaces,
443 ) {
444 tracing::debug!(
445 "Process schema (location={:?}, target_namespace={:?}",
446 location.as_ref().map(Url::as_str),
447 &schema.target_namespace
448 );
449
450 let target_ns = schema
451 .target_namespace
452 .as_deref()
453 .map(|ns| Namespace::from(ns.as_bytes().to_owned()));
454
455 if self.resolve_includes {
456 for content in &schema.content {
457 match content {
458 SchemaContent::Import(x) => {
459 if let Some(req) = import_req(x, target_ns.clone(), location.as_ref()) {
460 self.add_pending(req);
461 }
462 }
463 SchemaContent::Include(x) => {
464 self.add_pending(include_req(x, target_ns.clone(), location.as_ref()));
465 }
466 _ => (),
467 }
468 }
469 }
470
471 self.entries.push(ParserEntry::Schema {
472 name,
473 schema,
474 location,
475 target_ns,
476 namespaces,
477 });
478 }
479}
480
481struct SchemaReader<R> {
482 inner: R,
483 namespaces: Namespaces,
484}
485
486type Namespaces = BTreeMap<Option<Namespace>, Vec<NamespacePrefix>>;
487
488impl<R> SchemaReader<R> {
489 fn new(inner: R) -> Self {
490 Self {
491 inner,
492 namespaces: BTreeMap::new(),
493 }
494 }
495}
496
497impl<R> XmlReader for SchemaReader<R>
498where
499 R: XmlReader,
500{
501 fn extend_error(&self, error: QuickXmlError) -> QuickXmlError {
502 self.inner.extend_error(error)
503 }
504}
505
506impl<'a, R> XmlReaderSync<'a> for SchemaReader<R>
507where
508 R: XmlReaderSync<'a>,
509{
510 fn read_event(&mut self) -> Result<Event<'a>, QuickXmlError> {
511 let event = self.inner.read_event()?;
512
513 if let Event::Start(x) | Event::Empty(x) = &event {
514 for a in x.attributes() {
515 let a = a?;
516 if matches!(a.key.prefix(), Some(x) if x.as_ref() == b"xmlns") {
517 let prefix = NamespacePrefix::new(a.key.local_name().as_ref().to_owned());
518 let namespace = Namespace::new(a.value.into_owned());
519
520 self.namespaces
521 .entry(Some(namespace))
522 .or_default()
523 .push(prefix);
524 }
525 }
526 }
527
528 Ok(event)
529 }
530}
531
532impl SchemasBuilder {
533 fn build(mut self, entries: Vec<ParserEntry>) -> Schemas {
534 self.build_cache(&entries);
535
536 for entry in entries {
537 match entry {
538 ParserEntry::Namespace { namespace, .. } => {
539 self.get_or_create_namespace_info_mut(Some(namespace));
540 }
541 ParserEntry::Schema {
542 name,
543 schema,
544 location,
545 target_ns,
546 ..
547 } => {
548 self.add_schema(target_ns, name, location, schema);
549 }
550 }
551 }
552
553 self.determine_prefixes();
554
555 self.schemas
556 }
557
558 fn build_cache(&mut self, entries: &[ParserEntry]) {
559 for entry in entries {
560 match entry {
561 ParserEntry::Namespace { prefix, namespace } => {
562 self.prefix_cache
563 .entry(Some(namespace.clone()))
564 .or_default()
565 .prefix = Some(prefix.clone());
566 }
567 ParserEntry::Schema {
568 target_ns,
569 namespaces,
570 ..
571 } => {
572 let prefix = namespaces
573 .get(target_ns)
574 .and_then(|prefixes| prefixes.first())
575 .cloned();
576 let entry = self.prefix_cache.entry(target_ns.clone()).or_default();
577
578 if entry.prefix.is_none() {
579 entry.prefix = prefix;
580 } else if let Some(prefix) = prefix {
581 entry.alt_prefixes.insert(prefix);
582 }
583
584 for (namespace, prefixes) in namespaces {
585 for prefix in prefixes {
586 self.prefix_cache
587 .entry(namespace.clone())
588 .or_default()
589 .alt_prefixes
590 .insert(prefix.clone());
591 }
592 }
593 }
594 }
595 }
596 }
597
598 fn add_schema(
599 &mut self,
600 namespace: Option<Namespace>,
601 name: Option<String>,
602 location: Option<Url>,
603 schema: Schema,
604 ) {
605 let schema_id = SchemaId(self.schemas.next_schema_id);
606 self.schemas.next_schema_id = self.schemas.next_schema_id.wrapping_add(1);
607
608 let (namespace_id, namespace_info) = self.get_or_create_namespace_info_mut(namespace);
609 namespace_info.schemas.push(schema_id);
610
611 match self.schemas.schemas.entry(schema_id) {
612 Entry::Vacant(e) => e.insert(SchemaInfo {
613 name,
614 schema,
615 location,
616 namespace_id,
617 }),
618 Entry::Occupied(_) => crate::unreachable!(),
619 };
620 }
621
622 fn get_or_create_namespace_info_mut(
623 &mut self,
624 namespace: Option<Namespace>,
625 ) -> (NamespaceId, &mut NamespaceInfo) {
626 match self.schemas.known_namespaces.entry(namespace) {
627 Entry::Occupied(e) => {
628 let id = *e.get();
629 let info = self.schemas.namespace_infos.get_mut(&id).unwrap();
630
631 (id, info)
632 }
633 Entry::Vacant(e) => {
634 let id = NamespaceId(self.schemas.next_namespace_id);
635 self.schemas.next_namespace_id = self.schemas.next_namespace_id.wrapping_add(1);
636
637 let namespace = e.key().clone();
638 e.insert(id);
639
640 let info = match self.schemas.namespace_infos.entry(id) {
641 Entry::Vacant(e) => e.insert(NamespaceInfo::new(namespace)),
642 Entry::Occupied(_) => crate::unreachable!(),
643 };
644
645 (id, info)
646 }
647 }
648 }
649
650 fn determine_prefixes(&mut self) {
651 for (id, info) in &mut self.schemas.namespace_infos {
653 if info.prefix.is_some() {
654 continue;
655 }
656
657 let entry = &mut self.prefix_cache.get(&info.namespace).unwrap();
658 if let Some(prefix) = &entry.prefix {
659 if let Entry::Vacant(e) = self.schemas.known_prefixes.entry(prefix.clone()) {
660 info.prefix = Some(e.key().clone());
661 e.insert(*id);
662 }
663 }
664 }
665
666 if self.alternative_prefixes {
668 for (id, info) in &mut self.schemas.namespace_infos {
669 if info.prefix.is_some() {
670 continue;
671 }
672
673 let entry = &mut self.prefix_cache.get(&info.namespace).unwrap();
674 for alt in &entry.alt_prefixes {
675 if let Entry::Vacant(e) = self.schemas.known_prefixes.entry(alt.clone()) {
676 info.prefix = Some(e.key().clone());
677 e.insert(*id);
678 }
679 }
680 }
681 }
682
683 if self.generate_prefixes {
685 for (id, info) in &mut self.schemas.namespace_infos {
686 if info.prefix.is_some() {
687 continue;
688 }
689
690 let entry = &mut self.prefix_cache.get(&info.namespace).unwrap();
691 let prefix = entry
692 .prefix
693 .clone()
694 .or_else(|| entry.alt_prefixes.iter().next().cloned());
695 if let Some(prefix) = prefix {
696 let ext = format!("_{}", id.0);
697 let ext = ext.as_bytes();
698
699 let mut p = prefix.0.into_owned();
700 p.extend_from_slice(ext);
701
702 let prefix = NamespacePrefix(Cow::Owned(p));
703 self.schemas.known_prefixes.insert(prefix, *id);
704 }
705 }
706 }
707 }
708}
709
710fn import_req(
711 import: &Import,
712 current_ns: Option<Namespace>,
713 current_location: Option<&Url>,
714) -> Option<ResolveRequest> {
715 let location = import.schema_location.as_ref()?;
716
717 let mut req = ResolveRequest::new(location, ResolveRequestType::ImportRequest);
718
719 if let Some(ns) = current_ns {
720 req = req.current_ns(ns);
721 }
722
723 if let Some(ns) = &import.namespace {
724 req = req.requested_ns(Namespace::from(ns.as_bytes().to_owned()));
725 }
726
727 if let Some(current_location) = current_location {
728 req = req.current_location(current_location.clone());
729 }
730
731 Some(req)
732}
733
734fn include_req(
735 include: &Include,
736 current_ns: Option<Namespace>,
737 current_location: Option<&Url>,
738) -> ResolveRequest {
739 let mut req = ResolveRequest::new(&include.schema_location, ResolveRequestType::IncludeRequest);
740
741 if let Some(ns) = current_ns {
742 req = req.current_ns(ns);
743 }
744
745 if let Some(current_location) = current_location {
746 req = req.current_location(current_location.clone());
747 }
748
749 req
750}