1pub mod resolver;
4
5mod error;
6
7use std::collections::{BTreeMap, HashSet, VecDeque};
8use std::fmt::Debug;
9use std::io::BufRead;
10use std::path::Path;
11
12use quick_xml::{
13 events::Event,
14 name::{LocalName, PrefixIter, QName, ResolveResult},
15};
16use resolver::{FileResolver, NoOpResolver, ResolveRequest, Resolver};
17use tracing::instrument;
18use url::Url;
19
20use crate::quick_xml::{
21 DeserializeSync, Error as QuickXmlError, IoReader, SliceReader, XmlReader, XmlReaderSync,
22};
23use crate::schema::xs::{Import, Include, Schema, SchemaContent};
24use crate::schema::{Namespace, NamespacePrefix, Schemas};
25
26pub use self::error::Error;
27
28#[must_use]
39#[derive(Default, Debug)]
40pub struct Parser<TResolver = NoOpResolver> {
41 cache: HashSet<Url>,
42 schemas: Schemas,
43 pending: VecDeque<ResolveRequest>,
44
45 resolver: TResolver,
46 resolve_includes: bool,
47}
48
49impl Parser {
50 pub fn new() -> Self {
52 Self::default()
53 }
54}
55
56impl<TResolver> Parser<TResolver> {
57 pub fn with_default_resolver(self) -> Parser<FileResolver> {
61 self.with_resolver(FileResolver)
62 }
63
64 pub fn with_resolver<XResolver: Resolver + 'static>(
66 self,
67 resolver: XResolver,
68 ) -> Parser<XResolver> {
69 let Self { schemas, .. } = self;
70
71 let cache = HashSet::new();
72 let pending = VecDeque::new();
73
74 Parser {
75 cache,
76 schemas,
77 pending,
78
79 resolver,
80 resolve_includes: true,
81 }
82 }
83
84 pub fn resolve_includes(mut self, value: bool) -> Self {
86 self.resolve_includes = value;
87
88 self
89 }
90
91 pub fn finish(self) -> Schemas {
94 self.schemas
95 }
96}
97
98impl<TResolver> Parser<TResolver>
99where
100 TResolver: Resolver,
101{
102 pub fn with_default_namespaces(self) -> Self {
112 self.with_namespace(NamespacePrefix::XS, Namespace::XS)
113 .with_namespace(NamespacePrefix::XML, Namespace::XML)
114 }
115
116 pub fn with_namespace(mut self, prefix: NamespacePrefix, namespace: Namespace) -> Self {
129 self.schemas
130 .get_or_create_namespace_info_mut(Some(prefix), Some(namespace));
131
132 self
133 }
134}
135
136impl<TResolver> Parser<TResolver>
137where
138 TResolver: Resolver,
139 TResolver::Buffer: BufRead,
140{
141 #[instrument(err, level = "trace", skip(self, schema))]
151 pub fn add_schema_from_str(mut self, schema: &str) -> Result<Self, Error<TResolver::Error>> {
152 let reader = SliceReader::new(schema);
153 let mut reader = SchemaReader::new(reader);
154
155 let schema = Schema::deserialize(&mut reader)?;
156
157 self.add_schema(schema, &reader.namespaces, None);
158 self.resolve_pending()?;
159
160 Ok(self)
161 }
162
163 #[instrument(err, level = "trace", skip(self, reader))]
173 pub fn add_schema_from_reader<R: BufRead>(
174 mut self,
175 reader: R,
176 ) -> Result<Self, Error<TResolver::Error>> {
177 let reader = IoReader::new(reader);
178 let mut reader = SchemaReader::new(reader);
179
180 let schema = Schema::deserialize(&mut reader)?;
181
182 self.add_schema(schema, &reader.namespaces, None);
183 self.resolve_pending()?;
184
185 Ok(self)
186 }
187
188 #[instrument(err, level = "trace", skip(self))]
198 pub fn add_schema_from_file<P: AsRef<Path> + Debug>(
199 self,
200 path: P,
201 ) -> Result<Self, Error<TResolver::Error>> {
202 let path = path.as_ref().canonicalize()?;
203 let url = Url::from_file_path(&path).map_err(|()| Error::InvalidFilePath(path))?;
204
205 self.add_schema_from_url(url)
206 }
207
208 #[instrument(err, level = "trace", skip(self))]
219 pub fn add_schema_from_url(mut self, url: Url) -> Result<Self, Error<TResolver::Error>> {
220 let req = ResolveRequest::new(url);
221
222 self.resolve_location(req)?;
223 self.resolve_pending()?;
224
225 Ok(self)
226 }
227
228 fn add_pending(&mut self, req: ResolveRequest) {
229 tracing::debug!("Add pending resolve request: {req:#?}");
230
231 self.pending.push_back(req);
232 }
233
234 fn resolve_pending(&mut self) -> Result<(), Error<TResolver::Error>> {
235 while let Some(req) = self.pending.pop_front() {
236 self.resolve_location(req)?;
237 }
238
239 Ok(())
240 }
241
242 #[instrument(err, level = "trace", skip(self))]
243 fn resolve_location(&mut self, req: ResolveRequest) -> Result<(), Error<TResolver::Error>> {
244 tracing::debug!("Process resolve request: {req:#?}");
245
246 let Some((location, buffer)) = self.resolver.resolve(&req).map_err(Error::resolver)? else {
247 return Err(Error::UnableToResolve(Box::new(req)));
248 };
249 if self.cache.contains(&location) {
250 return Ok(());
251 }
252
253 let reader = IoReader::new(buffer);
254 let mut reader = SchemaReader::new(reader);
255
256 let schema = Schema::deserialize(&mut reader)?;
257
258 self.add_schema(schema, &reader.namespaces, Some(&location));
259 self.cache.insert(location);
260
261 Ok(())
262 }
263
264 fn add_schema(
265 &mut self,
266 schema: Schema,
267 namespaces: &Namespaces,
268 current_location: Option<&Url>,
269 ) {
270 tracing::debug!(
271 "Process schema (location={:?}, target_namespace={:?}",
272 current_location.as_ref().map(|url| url.as_str()),
273 &schema.target_namespace
274 );
275
276 let target_ns = schema
277 .target_namespace
278 .as_deref()
279 .map(|ns| Namespace::from(ns.as_bytes().to_owned()));
280 let prefix = namespaces.get(&target_ns).cloned().flatten();
281
282 if self.resolve_includes {
283 for content in &schema.content {
284 match content {
285 SchemaContent::Import(x) => {
286 if let Some(req) = import_req(x, target_ns.clone(), current_location) {
287 self.add_pending(req);
288 }
289 }
290 SchemaContent::Include(x) => {
291 self.add_pending(include_req(x, target_ns.clone(), current_location));
292 }
293 _ => (),
294 }
295 }
296 }
297
298 self.schemas.add_schema(prefix, target_ns, schema);
299 }
300}
301
302struct SchemaReader<R> {
303 inner: R,
304 namespaces: Namespaces,
305}
306
307type Namespaces = BTreeMap<Option<Namespace>, Option<NamespacePrefix>>;
308
309impl<R> SchemaReader<R> {
310 fn new(inner: R) -> Self {
311 Self {
312 inner,
313 namespaces: BTreeMap::new(),
314 }
315 }
316}
317
318impl<R> XmlReader for SchemaReader<R>
319where
320 R: XmlReader,
321{
322 fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult<'_>, LocalName<'n>) {
323 self.inner.resolve(name, attribute)
324 }
325
326 fn prefixes(&self) -> PrefixIter<'_> {
327 self.inner.prefixes()
328 }
329
330 fn current_position(&self) -> u64 {
331 self.inner.current_position()
332 }
333
334 fn error_position(&self) -> u64 {
335 self.inner.error_position()
336 }
337}
338
339impl<'a, R> XmlReaderSync<'a> for SchemaReader<R>
340where
341 R: XmlReaderSync<'a>,
342{
343 fn read_event(&mut self) -> Result<Event<'a>, QuickXmlError> {
344 let event = self.inner.read_event()?;
345
346 if let Event::Start(x) | Event::Empty(x) = &event {
347 for a in x.attributes() {
348 let a = a?;
349 if matches!(a.key.prefix(), Some(x) if x.as_ref() == b"xmlns") {
350 let prefix = NamespacePrefix::new(a.key.local_name().as_ref().to_owned());
351 let namespace = Namespace::new(a.value.into_owned());
352
353 self.namespaces
354 .entry(Some(namespace))
355 .or_insert(Some(prefix));
356 }
357 }
358 }
359
360 Ok(event)
361 }
362}
363
364fn import_req(
365 import: &Import,
366 current_ns: Option<Namespace>,
367 current_location: Option<&Url>,
368) -> Option<ResolveRequest> {
369 let location = import.schema_location.as_ref()?;
370
371 let mut req = ResolveRequest::new(location);
372
373 if let Some(ns) = current_ns {
374 req = req.current_ns(ns);
375 }
376
377 if let Some(ns) = &import.namespace {
378 req = req.requested_ns(Namespace::from(ns.as_bytes().to_owned()));
379 }
380
381 if let Some(current_location) = current_location {
382 req = req.current_location(current_location.clone());
383 }
384
385 Some(req)
386}
387
388fn include_req(
389 include: &Include,
390 current_ns: Option<Namespace>,
391 current_location: Option<&Url>,
392) -> ResolveRequest {
393 let mut req = ResolveRequest::new(&include.schema_location);
394
395 if let Some(ns) = current_ns {
396 req = req.current_ns(ns);
397 }
398
399 if let Some(current_location) = current_location {
400 req = req.current_location(current_location.clone());
401 }
402
403 req
404}