json_ld_core/loader/
mod.rs

1use hashbrown::HashSet;
2use iref::{Iri, IriBuf};
3use mime::Mime;
4use rdf_types::vocabulary::{IriVocabulary, IriVocabularyMut};
5use static_iref::iri;
6use std::{borrow::Cow, hash::Hash};
7
8pub mod chain;
9pub mod fs;
10pub mod map;
11pub mod none;
12
13pub use chain::ChainLoader;
14pub use fs::FsLoader;
15pub use none::NoLoader;
16
17#[cfg(feature = "reqwest")]
18pub mod reqwest;
19
20#[cfg(feature = "reqwest")]
21pub use self::reqwest::ReqwestLoader;
22
23pub type LoadingResult<I = IriBuf> = Result<RemoteDocument<I>, LoadError>;
24
25pub type RemoteContextReference<I = IriBuf> = RemoteDocumentReference<I, json_ld_syntax::Context>;
26
27/// Remote document, loaded or not.
28///
29/// Either an IRI or the actual document content.
30#[derive(Clone)]
31pub enum RemoteDocumentReference<I = IriBuf, T = json_syntax::Value> {
32	/// IRI to the remote document.
33	Iri(I),
34
35	/// Remote document content.
36	Loaded(RemoteDocument<I, T>),
37}
38
39impl<I, T> RemoteDocumentReference<I, T> {
40	/// Creates an IRI to a `json_syntax::Value` JSON document.
41	///
42	/// This method can replace `RemoteDocumentReference::Iri` to help the type
43	/// inference in the case where `T = json_syntax::Value`.
44	pub fn iri(iri: I) -> Self {
45		Self::Iri(iri)
46	}
47}
48
49impl<I> RemoteDocumentReference<I> {
50	/// Loads the remote document with the given `vocabulary` and `loader`.
51	///
52	/// If the document is already [`Self::Loaded`], simply returns the inner
53	/// [`RemoteDocument`].
54	pub async fn load_with<V>(self, vocabulary: &mut V, loader: &impl Loader) -> LoadingResult<I>
55	where
56		V: IriVocabularyMut<Iri = I>,
57		I: Clone + Eq + Hash,
58	{
59		match self {
60			Self::Iri(r) => Ok(loader.load_with(vocabulary, r).await?.map(Into::into)),
61			Self::Loaded(doc) => Ok(doc),
62		}
63	}
64
65	/// Loads the remote document with the given `vocabulary` and `loader`.
66	///
67	/// For [`Self::Iri`] returns an owned [`RemoteDocument`] with
68	/// [`Cow::Owned`].
69	/// For [`Self::Loaded`] returns a reference to the inner [`RemoteDocument`]
70	/// with [`Cow::Borrowed`].
71	pub async fn loaded_with<V>(
72		&self,
73		vocabulary: &mut V,
74		loader: &impl Loader,
75	) -> Result<Cow<'_, RemoteDocument<V::Iri>>, LoadError>
76	where
77		V: IriVocabularyMut<Iri = I>,
78		I: Clone + Eq + Hash,
79	{
80		match self {
81			Self::Iri(r) => Ok(Cow::Owned(
82				loader
83					.load_with(vocabulary, r.clone())
84					.await?
85					.map(Into::into),
86			)),
87			Self::Loaded(doc) => Ok(Cow::Borrowed(doc)),
88		}
89	}
90}
91
92#[derive(Debug, thiserror::Error)]
93pub enum ContextLoadError {
94	#[error(transparent)]
95	LoadingDocumentFailed(#[from] LoadError),
96
97	#[error("context extraction failed")]
98	ContextExtractionFailed(#[from] ExtractContextError),
99}
100
101impl<I> RemoteContextReference<I> {
102	/// Loads the remote context with the given `vocabulary` and `loader`.
103	///
104	/// If the context is already [`Self::Loaded`], simply returns the inner
105	/// [`RemoteContext`].
106	pub async fn load_context_with<V, L: Loader>(
107		self,
108		vocabulary: &mut V,
109		loader: &L,
110	) -> Result<RemoteContext<I>, ContextLoadError>
111	where
112		V: IriVocabularyMut<Iri = I>,
113		I: Clone + Eq + Hash,
114	{
115		match self {
116			Self::Iri(r) => Ok(loader
117				.load_with(vocabulary, r)
118				.await?
119				.try_map(|d| d.into_ld_context())?),
120			Self::Loaded(doc) => Ok(doc),
121		}
122	}
123
124	/// Loads the remote context with the given `vocabulary` and `loader`.
125	///
126	/// For [`Self::Iri`] returns an owned [`RemoteContext`] with
127	/// [`Cow::Owned`].
128	/// For [`Self::Loaded`] returns a reference to the inner [`RemoteContext`]
129	/// with [`Cow::Borrowed`].
130	pub async fn loaded_context_with<V, L: Loader>(
131		&self,
132		vocabulary: &mut V,
133		loader: &L,
134	) -> Result<Cow<'_, RemoteContext<I>>, ContextLoadError>
135	where
136		V: IriVocabularyMut<Iri = I>,
137		I: Clone + Eq + Hash,
138	{
139		match self {
140			Self::Iri(r) => Ok(Cow::Owned(
141				loader
142					.load_with(vocabulary, r.clone())
143					.await?
144					.try_map(|d| d.into_ld_context())?,
145			)),
146			Self::Loaded(doc) => Ok(Cow::Borrowed(doc)),
147		}
148	}
149}
150
151/// Remote document.
152///
153/// Stores the content of a loaded remote document along with its original URL.
154#[derive(Debug, Clone)]
155pub struct RemoteDocument<I = IriBuf, T = json_syntax::Value> {
156	/// The final URL of the loaded document, after eventual redirection.
157	pub url: Option<I>,
158
159	/// The HTTP `Content-Type` header value of the loaded document, exclusive
160	/// of any optional parameters.
161	pub content_type: Option<Mime>,
162
163	/// If available, the value of the HTTP `Link Header` [RFC 8288] using the
164	/// `http://www.w3.org/ns/json-ld#context` link relation in the response.
165	///
166	/// If the response's `Content-Type` is `application/ld+json`, the HTTP
167	/// `Link Header` is ignored. If multiple HTTP `Link Headers` using the
168	/// `http://www.w3.org/ns/json-ld#context` link relation are found, the
169	/// loader fails with a `multiple context link headers` error.
170	///
171	/// [RFC 8288]: https://www.rfc-editor.org/rfc/rfc8288
172	pub context_url: Option<I>,
173
174	pub profile: HashSet<Profile<I>>,
175
176	/// The retrieved document.
177	pub document: T,
178}
179
180pub type RemoteContext<I = IriBuf> = RemoteDocument<I, json_ld_syntax::context::Context>;
181
182impl<I, T> RemoteDocument<I, T> {
183	/// Creates a new remote document.
184	///
185	/// `url` is the final URL of the loaded document, after eventual
186	/// redirection.
187	/// `content_type` is the HTTP `Content-Type` header value of the loaded
188	/// document, exclusive of any optional parameters.
189	pub fn new(url: Option<I>, content_type: Option<Mime>, document: T) -> Self {
190		Self::new_full(url, content_type, None, HashSet::new(), document)
191	}
192
193	/// Creates a new remote document.
194	///
195	/// `url` is the final URL of the loaded document, after eventual
196	/// redirection.
197	/// `content_type` is the HTTP `Content-Type` header value of the loaded
198	/// document, exclusive of any optional parameters.
199	/// `context_url` is the value of the HTTP `Link Header` [RFC 8288] using the
200	/// `http://www.w3.org/ns/json-ld#context` link relation in the response,
201	/// if any.
202	/// `profile` is the value of any profile parameter retrieved as part of the
203	/// original contentType.
204	///
205	/// [RFC 8288]: https://www.rfc-editor.org/rfc/rfc8288
206	pub fn new_full(
207		url: Option<I>,
208		content_type: Option<Mime>,
209		context_url: Option<I>,
210		profile: HashSet<Profile<I>>,
211		document: T,
212	) -> Self {
213		Self {
214			url,
215			content_type,
216			context_url,
217			profile,
218			document,
219		}
220	}
221
222	/// Maps the content of the remote document.
223	pub fn map<U>(self, f: impl Fn(T) -> U) -> RemoteDocument<I, U> {
224		RemoteDocument {
225			url: self.url,
226			content_type: self.content_type,
227			context_url: self.context_url,
228			profile: self.profile,
229			document: f(self.document),
230		}
231	}
232
233	/// Tries to map the content of the remote document.
234	pub fn try_map<U, E>(self, f: impl Fn(T) -> Result<U, E>) -> Result<RemoteDocument<I, U>, E> {
235		Ok(RemoteDocument {
236			url: self.url,
237			content_type: self.content_type,
238			context_url: self.context_url,
239			profile: self.profile,
240			document: f(self.document)?,
241		})
242	}
243
244	/// Maps all the IRIs.
245	pub fn map_iris<J>(self, mut f: impl FnMut(I) -> J) -> RemoteDocument<J, T>
246	where
247		J: Eq + Hash,
248	{
249		RemoteDocument {
250			url: self.url.map(&mut f),
251			content_type: self.content_type,
252			context_url: self.context_url.map(&mut f),
253			profile: self
254				.profile
255				.into_iter()
256				.map(|p| p.map_iri(&mut f))
257				.collect(),
258			document: self.document,
259		}
260	}
261
262	/// Returns a reference to the final URL of the loaded document, after eventual redirection.
263	pub fn url(&self) -> Option<&I> {
264		self.url.as_ref()
265	}
266
267	/// Returns the HTTP `Content-Type` header value of the loaded document,
268	/// exclusive of any optional parameters.
269	pub fn content_type(&self) -> Option<&Mime> {
270		self.content_type.as_ref()
271	}
272
273	/// Returns the value of the HTTP `Link Header` [RFC 8288] using the
274	/// `http://www.w3.org/ns/json-ld#context` link relation in the response,
275	/// if any.
276	///
277	/// If the response's `Content-Type` is `application/ld+json`, the HTTP
278	/// `Link Header` is ignored. If multiple HTTP `Link Headers` using the
279	/// `http://www.w3.org/ns/json-ld#context` link relation are found, the
280	/// loader fails with a `multiple context link headers` error.
281	///
282	/// [RFC 8288]: https://www.rfc-editor.org/rfc/rfc8288
283	pub fn context_url(&self) -> Option<&I> {
284		self.context_url.as_ref()
285	}
286
287	/// Returns a reference to the content of the document.
288	pub fn document(&self) -> &T {
289		&self.document
290	}
291
292	/// Returns a mutable reference to the content of the document.
293	pub fn document_mut(&mut self) -> &mut T {
294		&mut self.document
295	}
296
297	/// Drops the original URL and returns the content of the document.
298	pub fn into_document(self) -> T {
299		self.document
300	}
301
302	/// Drops the content and returns the original URL of the document.
303	pub fn into_url(self) -> Option<I> {
304		self.url
305	}
306
307	/// Sets the URL of the document.
308	pub fn set_url(&mut self, url: Option<I>) {
309		self.url = url
310	}
311}
312
313/// Standard `profile` parameter values defined for the `application/ld+json`.
314///
315/// See: <https://www.w3.org/TR/json-ld11/#iana-considerations>
316#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
317pub enum StandardProfile {
318	/// To request or specify expanded JSON-LD document form.
319	Expanded,
320
321	/// To request or specify compacted JSON-LD document form.
322	Compacted,
323
324	/// To request or specify a JSON-LD context document.
325	Context,
326
327	/// To request or specify flattened JSON-LD document form.
328	Flattened,
329
330	// /// To request or specify a JSON-LD frame document.
331	// Frame,
332	/// To request or specify a JSON-LD framed document.
333	Framed,
334}
335
336impl StandardProfile {
337	pub fn from_iri(iri: &Iri) -> Option<Self> {
338		if iri == iri!("http://www.w3.org/ns/json-ld#expanded") {
339			Some(Self::Expanded)
340		} else if iri == iri!("http://www.w3.org/ns/json-ld#compacted") {
341			Some(Self::Compacted)
342		} else if iri == iri!("http://www.w3.org/ns/json-ld#context") {
343			Some(Self::Context)
344		} else if iri == iri!("http://www.w3.org/ns/json-ld#flattened") {
345			Some(Self::Flattened)
346		} else if iri == iri!("http://www.w3.org/ns/json-ld#framed") {
347			Some(Self::Framed)
348		} else {
349			None
350		}
351	}
352
353	pub fn iri(&self) -> &'static Iri {
354		match self {
355			Self::Expanded => iri!("http://www.w3.org/ns/json-ld#expanded"),
356			Self::Compacted => iri!("http://www.w3.org/ns/json-ld#compacted"),
357			Self::Context => iri!("http://www.w3.org/ns/json-ld#context"),
358			Self::Flattened => iri!("http://www.w3.org/ns/json-ld#flattened"),
359			Self::Framed => iri!("http://www.w3.org/ns/json-ld#framed"),
360		}
361	}
362}
363
364/// Value for the `profile` parameter defined for the `application/ld+json`.
365///
366/// Standard values defined by the JSON-LD specification are defined by the
367/// [`StandardProfile`] type.
368///
369/// See: <https://www.w3.org/TR/json-ld11/#iana-considerations>
370#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
371pub enum Profile<I = IriBuf> {
372	Standard(StandardProfile),
373	Custom(I),
374}
375
376impl Profile {
377	pub fn new(iri: &Iri) -> Self {
378		match StandardProfile::from_iri(iri) {
379			Some(p) => Self::Standard(p),
380			None => Self::Custom(iri.to_owned()),
381		}
382	}
383
384	pub fn iri(&self) -> &Iri {
385		match self {
386			Self::Standard(s) => s.iri(),
387			Self::Custom(c) => c,
388		}
389	}
390}
391
392impl<I> Profile<I> {
393	pub fn new_with(iri: &Iri, vocabulary: &mut impl IriVocabularyMut<Iri = I>) -> Self {
394		match StandardProfile::from_iri(iri) {
395			Some(p) => Self::Standard(p),
396			None => Self::Custom(vocabulary.insert(iri)),
397		}
398	}
399
400	pub fn iri_with<'a>(&'a self, vocabulary: &'a impl IriVocabulary<Iri = I>) -> &'a Iri {
401		match self {
402			Self::Standard(s) => s.iri(),
403			Self::Custom(c) => vocabulary.iri(c).unwrap(),
404		}
405	}
406
407	pub fn map_iri<J>(self, f: impl FnOnce(I) -> J) -> Profile<J> {
408		match self {
409			Self::Standard(p) => Profile::Standard(p),
410			Self::Custom(i) => Profile::Custom(f(i)),
411		}
412	}
413}
414
415pub type LoadErrorCause = Box<dyn std::error::Error + Send + Sync>;
416
417/// Loading error.
418#[derive(Debug, thiserror::Error)]
419#[error("loading document `{target}` failed: {cause}")]
420pub struct LoadError {
421	pub target: IriBuf,
422	pub cause: LoadErrorCause,
423}
424
425impl LoadError {
426	pub fn new(target: IriBuf, cause: impl 'static + std::error::Error + Send + Sync) -> Self {
427		Self {
428			target,
429			cause: Box::new(cause),
430		}
431	}
432}
433
434/// Document loader.
435///
436/// A document loader is required by most processing functions to fetch remote
437/// documents identified by an IRI. In particular, the loader is in charge of
438/// fetching all the remote contexts imported in a `@context` entry.
439///
440/// This library provides a few default loader implementations:
441///   - [`NoLoader`] dummy loader that always fail. Perfect if you are certain
442///     that the processing will not require any loading.
443///   - Standard [`HashMap`](std::collection::HashMap) and
444///     [`BTreeMap`](std::collection::BTreeMap) mapping IRIs to pre-loaded
445///     documents. This way no network calls are performed and the loaded
446///     content can be trusted.
447///   - [`FsLoader`] that redirecting registered IRI prefixes to a local
448///     directory on the file system. This also avoids network calls. The loaded
449///     content can be trusted as long as the file system is trusted.
450///   - `ReqwestLoader` actually downloading the remote documents using the
451///     [`reqwest`](https://crates.io/crates/reqwest) library.
452///     This requires the `reqwest` feature to be enabled.
453pub trait Loader {
454	/// Loads the document behind the given IRI, using the given vocabulary.
455	#[allow(async_fn_in_trait)]
456	async fn load_with<V>(&self, vocabulary: &mut V, url: V::Iri) -> LoadingResult<V::Iri>
457	where
458		V: IriVocabularyMut,
459		V::Iri: Clone + Eq + Hash,
460	{
461		let lexical_url = vocabulary.iri(&url).unwrap();
462		let document = self.load(lexical_url).await?;
463		Ok(document.map_iris(|i| vocabulary.insert_owned(i)))
464	}
465
466	/// Loads the document behind the given IRI.
467	#[allow(async_fn_in_trait)]
468	async fn load(&self, url: &Iri) -> Result<RemoteDocument<IriBuf>, LoadError>;
469}
470
471impl<'l, L: Loader> Loader for &'l L {
472	async fn load_with<V>(&self, vocabulary: &mut V, url: V::Iri) -> LoadingResult<V::Iri>
473	where
474		V: IriVocabularyMut,
475		V::Iri: Clone + Eq + Hash,
476	{
477		L::load_with(self, vocabulary, url).await
478	}
479
480	async fn load(&self, url: &Iri) -> Result<RemoteDocument<IriBuf>, LoadError> {
481		L::load(self, url).await
482	}
483}
484
485impl<'l, L: Loader> Loader for &'l mut L {
486	async fn load_with<V>(&self, vocabulary: &mut V, url: V::Iri) -> LoadingResult<V::Iri>
487	where
488		V: IriVocabularyMut,
489		V::Iri: Clone + Eq + Hash,
490	{
491		L::load_with(self, vocabulary, url).await
492	}
493
494	async fn load(&self, url: &Iri) -> Result<RemoteDocument<IriBuf>, LoadError> {
495		L::load(self, url).await
496	}
497}
498
499/// Context extraction error.
500#[derive(Debug, thiserror::Error)]
501pub enum ExtractContextError {
502	/// Unexpected JSON value.
503	#[error("unexpected {0}")]
504	Unexpected(json_syntax::Kind),
505
506	/// No context definition found.
507	#[error("missing `@context` entry")]
508	NoContext,
509
510	/// Multiple context definitions found.
511	#[error("duplicate `@context` entry")]
512	DuplicateContext,
513
514	/// JSON syntax error.
515	#[error("JSON-LD context syntax error: {0}")]
516	Syntax(json_ld_syntax::context::InvalidContext),
517}
518
519impl ExtractContextError {
520	fn duplicate_context(
521		json_syntax::object::Duplicate(_, _): json_syntax::object::Duplicate<
522			json_syntax::object::Entry,
523		>,
524	) -> Self {
525		Self::DuplicateContext
526	}
527}
528
529pub trait ExtractContext {
530	fn into_ld_context(self) -> Result<json_ld_syntax::context::Context, ExtractContextError>;
531}
532
533impl ExtractContext for json_syntax::Value {
534	fn into_ld_context(self) -> Result<json_ld_syntax::context::Context, ExtractContextError> {
535		match self {
536			Self::Object(mut o) => match o
537				.remove_unique("@context")
538				.map_err(ExtractContextError::duplicate_context)?
539			{
540				Some(context) => {
541					use json_ld_syntax::TryFromJson;
542					json_ld_syntax::context::Context::try_from_json(context.value)
543						.map_err(ExtractContextError::Syntax)
544				}
545				None => Err(ExtractContextError::NoContext),
546			},
547			other => Err(ExtractContextError::Unexpected(other.kind())),
548		}
549	}
550}