json_ld_core_next/loader/
mod.rs

1use hashbrown::HashSet;
2use iref::{Iri, IriBuf};
3use mime::Mime;
4use rdf_types::vocabulary::{IriVocabulary, IriVocabularyMut};
5use static_iref::iri;
6use std::{borrow::Cow, hash::Hash};
7
8pub mod chain;
9pub mod fs;
10pub mod map;
11pub mod none;
12
13pub use chain::ChainLoader;
14pub use fs::FsLoader;
15pub use none::NoLoader;
16
17#[cfg(feature = "reqwest")]
18pub mod reqwest;
19
20#[cfg(feature = "reqwest")]
21pub use self::reqwest::ReqwestLoader;
22
23pub type LoadingResult<I = IriBuf> = Result<RemoteDocument<I>, LoadError>;
24
25pub type RemoteContextReference<I = IriBuf> =
26	RemoteDocumentReference<I, json_ld_syntax_next::Context>;
27
28/// Remote document, loaded or not.
29///
30/// Either an IRI or the actual document content.
31#[derive(Clone)]
32pub enum RemoteDocumentReference<I = IriBuf, T = json_syntax::Value> {
33	/// IRI to the remote document.
34	Iri(I),
35
36	/// Remote document content.
37	Loaded(RemoteDocument<I, T>),
38}
39
40impl<I, T> RemoteDocumentReference<I, T> {
41	/// Creates an IRI to a `json_syntax::Value` JSON document.
42	///
43	/// This method can replace `RemoteDocumentReference::Iri` to help the type
44	/// inference in the case where `T = json_syntax::Value`.
45	pub fn iri(iri: I) -> Self {
46		Self::Iri(iri)
47	}
48}
49
50impl<I> RemoteDocumentReference<I> {
51	/// Loads the remote document with the given `vocabulary` and `loader`.
52	///
53	/// If the document is already [`Self::Loaded`], simply returns the inner
54	/// [`RemoteDocument`].
55	pub async fn load_with<V>(self, vocabulary: &mut V, loader: &impl Loader) -> LoadingResult<I>
56	where
57		V: IriVocabularyMut<Iri = I>,
58		I: Clone + Eq + Hash,
59	{
60		match self {
61			Self::Iri(r) => Ok(loader.load_with(vocabulary, r).await?.map(Into::into)),
62			Self::Loaded(doc) => Ok(doc),
63		}
64	}
65
66	/// Loads the remote document with the given `vocabulary` and `loader`.
67	///
68	/// For [`Self::Iri`] returns an owned [`RemoteDocument`] with
69	/// [`Cow::Owned`].
70	/// For [`Self::Loaded`] returns a reference to the inner [`RemoteDocument`]
71	/// with [`Cow::Borrowed`].
72	pub async fn loaded_with<V>(
73		&self,
74		vocabulary: &mut V,
75		loader: &impl Loader,
76	) -> Result<Cow<'_, RemoteDocument<V::Iri>>, LoadError>
77	where
78		V: IriVocabularyMut<Iri = I>,
79		I: Clone + Eq + Hash,
80	{
81		match self {
82			Self::Iri(r) => Ok(Cow::Owned(
83				loader
84					.load_with(vocabulary, r.clone())
85					.await?
86					.map(Into::into),
87			)),
88			Self::Loaded(doc) => Ok(Cow::Borrowed(doc)),
89		}
90	}
91}
92
93#[derive(Debug, thiserror::Error)]
94pub enum ContextLoadError {
95	#[error(transparent)]
96	LoadingDocumentFailed(#[from] LoadError),
97
98	#[error("context extraction failed")]
99	ContextExtractionFailed(#[from] ExtractContextError),
100}
101
102impl<I> RemoteContextReference<I> {
103	/// Loads the remote context with the given `vocabulary` and `loader`.
104	///
105	/// If the context is already [`Self::Loaded`], simply returns the inner
106	/// [`RemoteContext`].
107	pub async fn load_context_with<V, L: Loader>(
108		self,
109		vocabulary: &mut V,
110		loader: &L,
111	) -> Result<RemoteContext<I>, ContextLoadError>
112	where
113		V: IriVocabularyMut<Iri = I>,
114		I: Clone + Eq + Hash,
115	{
116		match self {
117			Self::Iri(r) => Ok(loader
118				.load_with(vocabulary, r)
119				.await?
120				.try_map(|d| d.into_ld_context())?),
121			Self::Loaded(doc) => Ok(doc),
122		}
123	}
124
125	/// Loads the remote context with the given `vocabulary` and `loader`.
126	///
127	/// For [`Self::Iri`] returns an owned [`RemoteContext`] with
128	/// [`Cow::Owned`].
129	/// For [`Self::Loaded`] returns a reference to the inner [`RemoteContext`]
130	/// with [`Cow::Borrowed`].
131	pub async fn loaded_context_with<V, L: Loader>(
132		&self,
133		vocabulary: &mut V,
134		loader: &L,
135	) -> Result<Cow<'_, RemoteContext<I>>, ContextLoadError>
136	where
137		V: IriVocabularyMut<Iri = I>,
138		I: Clone + Eq + Hash,
139	{
140		match self {
141			Self::Iri(r) => Ok(Cow::Owned(
142				loader
143					.load_with(vocabulary, r.clone())
144					.await?
145					.try_map(|d| d.into_ld_context())?,
146			)),
147			Self::Loaded(doc) => Ok(Cow::Borrowed(doc)),
148		}
149	}
150}
151
152/// Remote document.
153///
154/// Stores the content of a loaded remote document along with its original URL.
155#[derive(Debug, Clone)]
156pub struct RemoteDocument<I = IriBuf, T = json_syntax::Value> {
157	/// The final URL of the loaded document, after eventual redirection.
158	pub url: Option<I>,
159
160	/// The HTTP `Content-Type` header value of the loaded document, exclusive
161	/// of any optional parameters.
162	pub content_type: Option<Mime>,
163
164	/// If available, the value of the HTTP `Link Header` [RFC 8288] using the
165	/// `http://www.w3.org/ns/json-ld#context` link relation in the response.
166	///
167	/// If the response's `Content-Type` is `application/ld+json`, the HTTP
168	/// `Link Header` is ignored. If multiple HTTP `Link Headers` using the
169	/// `http://www.w3.org/ns/json-ld#context` link relation are found, the
170	/// loader fails with a `multiple context link headers` error.
171	///
172	/// [RFC 8288]: https://www.rfc-editor.org/rfc/rfc8288
173	pub context_url: Option<I>,
174
175	pub profile: HashSet<Profile<I>>,
176
177	/// The retrieved document.
178	pub document: T,
179}
180
181pub type RemoteContext<I = IriBuf> = RemoteDocument<I, json_ld_syntax_next::context::Context>;
182
183impl<I, T> RemoteDocument<I, T> {
184	/// Creates a new remote document.
185	///
186	/// `url` is the final URL of the loaded document, after eventual
187	/// redirection.
188	/// `content_type` is the HTTP `Content-Type` header value of the loaded
189	/// document, exclusive of any optional parameters.
190	pub fn new(url: Option<I>, content_type: Option<Mime>, document: T) -> Self {
191		Self::new_full(url, content_type, None, HashSet::new(), document)
192	}
193
194	/// Creates a new remote document.
195	///
196	/// `url` is the final URL of the loaded document, after eventual
197	/// redirection.
198	/// `content_type` is the HTTP `Content-Type` header value of the loaded
199	/// document, exclusive of any optional parameters.
200	/// `context_url` is the value of the HTTP `Link Header` [RFC 8288] using the
201	/// `http://www.w3.org/ns/json-ld#context` link relation in the response,
202	/// if any.
203	/// `profile` is the value of any profile parameter retrieved as part of the
204	/// original contentType.
205	///
206	/// [RFC 8288]: https://www.rfc-editor.org/rfc/rfc8288
207	pub fn new_full(
208		url: Option<I>,
209		content_type: Option<Mime>,
210		context_url: Option<I>,
211		profile: HashSet<Profile<I>>,
212		document: T,
213	) -> Self {
214		Self {
215			url,
216			content_type,
217			context_url,
218			profile,
219			document,
220		}
221	}
222
223	/// Maps the content of the remote document.
224	pub fn map<U>(self, f: impl Fn(T) -> U) -> RemoteDocument<I, U> {
225		RemoteDocument {
226			url: self.url,
227			content_type: self.content_type,
228			context_url: self.context_url,
229			profile: self.profile,
230			document: f(self.document),
231		}
232	}
233
234	/// Tries to map the content of the remote document.
235	pub fn try_map<U, E>(self, f: impl Fn(T) -> Result<U, E>) -> Result<RemoteDocument<I, U>, E> {
236		Ok(RemoteDocument {
237			url: self.url,
238			content_type: self.content_type,
239			context_url: self.context_url,
240			profile: self.profile,
241			document: f(self.document)?,
242		})
243	}
244
245	/// Maps all the IRIs.
246	pub fn map_iris<J>(self, mut f: impl FnMut(I) -> J) -> RemoteDocument<J, T>
247	where
248		J: Eq + Hash,
249	{
250		RemoteDocument {
251			url: self.url.map(&mut f),
252			content_type: self.content_type,
253			context_url: self.context_url.map(&mut f),
254			profile: self
255				.profile
256				.into_iter()
257				.map(|p| p.map_iri(&mut f))
258				.collect(),
259			document: self.document,
260		}
261	}
262
263	/// Returns a reference to the final URL of the loaded document, after eventual redirection.
264	pub fn url(&self) -> Option<&I> {
265		self.url.as_ref()
266	}
267
268	/// Returns the HTTP `Content-Type` header value of the loaded document,
269	/// exclusive of any optional parameters.
270	pub fn content_type(&self) -> Option<&Mime> {
271		self.content_type.as_ref()
272	}
273
274	/// Returns the value of the HTTP `Link Header` [RFC 8288] using the
275	/// `http://www.w3.org/ns/json-ld#context` link relation in the response,
276	/// if any.
277	///
278	/// If the response's `Content-Type` is `application/ld+json`, the HTTP
279	/// `Link Header` is ignored. If multiple HTTP `Link Headers` using the
280	/// `http://www.w3.org/ns/json-ld#context` link relation are found, the
281	/// loader fails with a `multiple context link headers` error.
282	///
283	/// [RFC 8288]: https://www.rfc-editor.org/rfc/rfc8288
284	pub fn context_url(&self) -> Option<&I> {
285		self.context_url.as_ref()
286	}
287
288	/// Returns a reference to the content of the document.
289	pub fn document(&self) -> &T {
290		&self.document
291	}
292
293	/// Returns a mutable reference to the content of the document.
294	pub fn document_mut(&mut self) -> &mut T {
295		&mut self.document
296	}
297
298	/// Drops the original URL and returns the content of the document.
299	pub fn into_document(self) -> T {
300		self.document
301	}
302
303	/// Drops the content and returns the original URL of the document.
304	pub fn into_url(self) -> Option<I> {
305		self.url
306	}
307
308	/// Sets the URL of the document.
309	pub fn set_url(&mut self, url: Option<I>) {
310		self.url = url
311	}
312}
313
314/// Standard `profile` parameter values defined for the `application/ld+json`.
315///
316/// See: <https://www.w3.org/TR/json-ld11/#iana-considerations>
317#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
318pub enum StandardProfile {
319	/// To request or specify expanded JSON-LD document form.
320	Expanded,
321
322	/// To request or specify compacted JSON-LD document form.
323	Compacted,
324
325	/// To request or specify a JSON-LD context document.
326	Context,
327
328	/// To request or specify flattened JSON-LD document form.
329	Flattened,
330
331	// /// To request or specify a JSON-LD frame document.
332	// Frame,
333	/// To request or specify a JSON-LD framed document.
334	Framed,
335}
336
337impl StandardProfile {
338	pub fn from_iri(iri: &Iri) -> Option<Self> {
339		if iri == iri!("http://www.w3.org/ns/json-ld#expanded") {
340			Some(Self::Expanded)
341		} else if iri == iri!("http://www.w3.org/ns/json-ld#compacted") {
342			Some(Self::Compacted)
343		} else if iri == iri!("http://www.w3.org/ns/json-ld#context") {
344			Some(Self::Context)
345		} else if iri == iri!("http://www.w3.org/ns/json-ld#flattened") {
346			Some(Self::Flattened)
347		} else if iri == iri!("http://www.w3.org/ns/json-ld#framed") {
348			Some(Self::Framed)
349		} else {
350			None
351		}
352	}
353
354	pub fn iri(&self) -> &'static Iri {
355		match self {
356			Self::Expanded => iri!("http://www.w3.org/ns/json-ld#expanded"),
357			Self::Compacted => iri!("http://www.w3.org/ns/json-ld#compacted"),
358			Self::Context => iri!("http://www.w3.org/ns/json-ld#context"),
359			Self::Flattened => iri!("http://www.w3.org/ns/json-ld#flattened"),
360			Self::Framed => iri!("http://www.w3.org/ns/json-ld#framed"),
361		}
362	}
363}
364
365/// Value for the `profile` parameter defined for the `application/ld+json`.
366///
367/// Standard values defined by the JSON-LD specification are defined by the
368/// [`StandardProfile`] type.
369///
370/// See: <https://www.w3.org/TR/json-ld11/#iana-considerations>
371#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
372pub enum Profile<I = IriBuf> {
373	Standard(StandardProfile),
374	Custom(I),
375}
376
377impl Profile {
378	pub fn new(iri: &Iri) -> Self {
379		match StandardProfile::from_iri(iri) {
380			Some(p) => Self::Standard(p),
381			None => Self::Custom(iri.to_owned()),
382		}
383	}
384
385	pub fn iri(&self) -> &Iri {
386		match self {
387			Self::Standard(s) => s.iri(),
388			Self::Custom(c) => c,
389		}
390	}
391}
392
393impl<I> Profile<I> {
394	pub fn new_with(iri: &Iri, vocabulary: &mut impl IriVocabularyMut<Iri = I>) -> Self {
395		match StandardProfile::from_iri(iri) {
396			Some(p) => Self::Standard(p),
397			None => Self::Custom(vocabulary.insert(iri)),
398		}
399	}
400
401	pub fn iri_with<'a>(&'a self, vocabulary: &'a impl IriVocabulary<Iri = I>) -> &'a Iri {
402		match self {
403			Self::Standard(s) => s.iri(),
404			Self::Custom(c) => vocabulary.iri(c).unwrap(),
405		}
406	}
407
408	pub fn map_iri<J>(self, f: impl FnOnce(I) -> J) -> Profile<J> {
409		match self {
410			Self::Standard(p) => Profile::Standard(p),
411			Self::Custom(i) => Profile::Custom(f(i)),
412		}
413	}
414}
415
416pub type LoadErrorCause = Box<dyn std::error::Error + Send + Sync>;
417
418/// Loading error.
419#[derive(Debug, thiserror::Error)]
420#[error("loading document `{target}` failed: {cause}")]
421pub struct LoadError {
422	pub target: IriBuf,
423	pub cause: LoadErrorCause,
424}
425
426impl LoadError {
427	pub fn new(target: IriBuf, cause: impl 'static + std::error::Error + Send + Sync) -> Self {
428		Self {
429			target,
430			cause: Box::new(cause),
431		}
432	}
433}
434
435/// Document loader.
436///
437/// A document loader is required by most processing functions to fetch remote
438/// documents identified by an IRI. In particular, the loader is in charge of
439/// fetching all the remote contexts imported in a `@context` entry.
440///
441/// This library provides a few default loader implementations:
442///   - [`NoLoader`] dummy loader that always fail. Perfect if you are certain
443///     that the processing will not require any loading.
444///   - Standard [`HashMap`](std::collection::HashMap) and
445///     [`BTreeMap`](std::collection::BTreeMap) mapping IRIs to pre-loaded
446///     documents. This way no network calls are performed and the loaded
447///     content can be trusted.
448///   - [`FsLoader`] that redirecting registered IRI prefixes to a local
449///     directory on the file system. This also avoids network calls. The loaded
450///     content can be trusted as long as the file system is trusted.
451///   - `ReqwestLoader` actually downloading the remote documents using the
452///     [`reqwest`](https://crates.io/crates/reqwest) library.
453///     This requires the `reqwest` feature to be enabled.
454pub trait Loader {
455	/// Loads the document behind the given IRI, using the given vocabulary.
456	#[allow(async_fn_in_trait)]
457	async fn load_with<V>(&self, vocabulary: &mut V, url: V::Iri) -> LoadingResult<V::Iri>
458	where
459		V: IriVocabularyMut,
460		V::Iri: Clone + Eq + Hash,
461	{
462		let lexical_url = vocabulary.iri(&url).unwrap();
463		let document = self.load(lexical_url).await?;
464		Ok(document.map_iris(|i| vocabulary.insert_owned(i)))
465	}
466
467	/// Loads the document behind the given IRI.
468	#[allow(async_fn_in_trait)]
469	async fn load(&self, url: &Iri) -> Result<RemoteDocument<IriBuf>, LoadError>;
470}
471
472impl<L: Loader> Loader for &L {
473	async fn load_with<V>(&self, vocabulary: &mut V, url: V::Iri) -> LoadingResult<V::Iri>
474	where
475		V: IriVocabularyMut,
476		V::Iri: Clone + Eq + Hash,
477	{
478		L::load_with(self, vocabulary, url).await
479	}
480
481	async fn load(&self, url: &Iri) -> Result<RemoteDocument<IriBuf>, LoadError> {
482		L::load(self, url).await
483	}
484}
485
486impl<L: Loader> Loader for &mut L {
487	async fn load_with<V>(&self, vocabulary: &mut V, url: V::Iri) -> LoadingResult<V::Iri>
488	where
489		V: IriVocabularyMut,
490		V::Iri: Clone + Eq + Hash,
491	{
492		L::load_with(self, vocabulary, url).await
493	}
494
495	async fn load(&self, url: &Iri) -> Result<RemoteDocument<IriBuf>, LoadError> {
496		L::load(self, url).await
497	}
498}
499
500/// Context extraction error.
501#[derive(Debug, thiserror::Error)]
502pub enum ExtractContextError {
503	/// Unexpected JSON value.
504	#[error("unexpected {0}")]
505	Unexpected(json_syntax::Kind),
506
507	/// No context definition found.
508	#[error("missing `@context` entry")]
509	NoContext,
510
511	/// Multiple context definitions found.
512	#[error("duplicate `@context` entry")]
513	DuplicateContext,
514
515	/// JSON syntax error.
516	#[error("JSON-LD context syntax error: {0}")]
517	Syntax(json_ld_syntax_next::context::InvalidContext),
518}
519
520impl ExtractContextError {
521	fn duplicate_context(
522		json_syntax::object::Duplicate(_, _): json_syntax::object::Duplicate<
523			json_syntax::object::Entry,
524		>,
525	) -> Self {
526		Self::DuplicateContext
527	}
528}
529
530pub trait ExtractContext {
531	fn into_ld_context(self) -> Result<json_ld_syntax_next::context::Context, ExtractContextError>;
532}
533
534impl ExtractContext for json_syntax::Value {
535	fn into_ld_context(self) -> Result<json_ld_syntax_next::context::Context, ExtractContextError> {
536		match self {
537			Self::Object(mut o) => match o
538				.remove_unique("@context")
539				.map_err(ExtractContextError::duplicate_context)?
540			{
541				Some(context) => {
542					use json_ld_syntax_next::TryFromJson;
543					json_ld_syntax_next::context::Context::try_from_json(context.value)
544						.map_err(ExtractContextError::Syntax)
545				}
546				None => Err(ExtractContextError::NoContext),
547			},
548			other => Err(ExtractContextError::Unexpected(other.kind())),
549		}
550	}
551}