oxjsonld/to_rdf.rs
1use crate::context::{JsonLdLoadDocumentOptions, JsonLdRemoteDocument, JsonLdTermDefinition};
2use crate::error::{JsonLdParseError, JsonLdSyntaxError};
3use crate::expansion::{JsonLdEvent, JsonLdExpansionConverter, JsonLdValue};
4use crate::profile::{JsonLdProcessingMode, JsonLdProfile, JsonLdProfileSet};
5#[cfg(feature = "async-tokio")]
6use json_event_parser::TokioAsyncReaderJsonParser;
7use json_event_parser::{JsonEvent, ReaderJsonParser, SliceJsonParser, WriterJsonSerializer};
8use oxiri::{Iri, IriParseError};
9#[cfg(feature = "rdf-12")]
10use oxrdf::BaseDirection;
11use oxrdf::vocab::{rdf, xsd};
12use oxrdf::{BlankNode, GraphName, Literal, NamedNode, NamedNodeRef, NamedOrBlankNode, Quad};
13use std::error::Error;
14use std::fmt::Write;
15use std::io::Read;
16use std::panic::{RefUnwindSafe, UnwindSafe};
17use std::str;
18use std::str::FromStr;
19#[cfg(feature = "async-tokio")]
20use tokio::io::AsyncRead;
21
22/// A [JSON-LD](https://www.w3.org/TR/json-ld/) parser.
23///
24/// The parser supports two modes:
25/// - regular JSON-LD parsing that needs to buffer the full file into memory.
26/// - [Streaming JSON-LD](https://www.w3.org/TR/json-ld11-streaming/) that can avoid buffering in a few cases.
27/// To enable it call the [`with_profile(JsonLdProfile::Streaming)`](JsonLdParser::with_profile) method.
28///
29/// Count the number of people:
30/// ```
31/// use oxjsonld::JsonLdParser;
32/// use oxrdf::NamedNodeRef;
33/// use oxrdf::vocab::rdf;
34///
35/// let file = r#"{
36/// "@context": {"schema": "http://schema.org/"},
37/// "@graph": [
38/// {
39/// "@type": "schema:Person",
40/// "@id": "http://example.com/foo",
41/// "schema:name": "Foo"
42/// },
43/// {
44/// "@type": "schema:Person",
45/// "schema:name": "Bar"
46/// }
47/// ]
48/// }"#;
49///
50/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
51/// let mut count = 0;
52/// for quad in JsonLdParser::new().for_reader(file.as_bytes()) {
53/// let quad = quad?;
54/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
55/// count += 1;
56/// }
57/// }
58/// assert_eq!(2, count);
59/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
60/// ```
61#[derive(Default, Clone)]
62#[must_use]
63pub struct JsonLdParser {
64 processing_mode: JsonLdProcessingMode,
65 lenient: bool,
66 profile: JsonLdProfileSet,
67 base: Option<Iri<String>>,
68}
69
70impl JsonLdParser {
71 /// Builds a new [`JsonLdParser`].
72 #[inline]
73 pub fn new() -> Self {
74 Self::default()
75 }
76
77 /// Assumes the file is valid to make parsing faster.
78 ///
79 /// It will skip some validations.
80 ///
81 /// Note that if the file is actually not valid, the parser might emit broken RDF.
82 #[inline]
83 pub fn lenient(mut self) -> Self {
84 self.lenient = true;
85 self
86 }
87
88 /// Assume the given profile(s) during parsing.
89 ///
90 /// If you set the [Streaming JSON-LD](https://www.w3.org/TR/json-ld11-streaming/) profile ([`JsonLdProfile::Streaming`]),
91 /// the parser will skip some buffering to make parsing faster and memory consumption lower.
92 ///
93 /// ```
94 /// use oxjsonld::{JsonLdParser, JsonLdProfile};
95 /// use oxrdf::NamedNodeRef;
96 /// use oxrdf::vocab::rdf;
97 ///
98 /// let file = r#"{
99 /// "@context": {"schema": "http://schema.org/"},
100 /// "@graph": [
101 /// {
102 /// "@type": "schema:Person",
103 /// "@id": "http://example.com/foo",
104 /// "schema:name": "Foo"
105 /// }
106 /// ]
107 /// }"#;
108 ///
109 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
110 /// let mut count = 0;
111 /// for quad in JsonLdParser::new()
112 /// .with_profile(JsonLdProfile::Streaming)
113 /// .for_slice(file)
114 /// {
115 /// let quad = quad?;
116 /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
117 /// count += 1;
118 /// }
119 /// }
120 /// assert_eq!(1, count);
121 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
122 /// ```
123 #[inline]
124 pub fn with_profile(mut self, profile: impl Into<JsonLdProfileSet>) -> Self {
125 self.profile = profile.into();
126 self
127 }
128
129 /// Set the [processing mode](https://www.w3.org/TR/json-ld11/#dfn-processing-mode) of the parser.
130 #[inline]
131 pub fn with_processing_mode(mut self, processing_mode: JsonLdProcessingMode) -> Self {
132 self.processing_mode = processing_mode;
133 self
134 }
135
136 /// Base IRI to use when expanding the document.
137 ///
138 /// It corresponds to the [`base` option from the algorithm specification](https://www.w3.org/TR/json-ld-api/#dom-jsonldoptions-base).
139 #[inline]
140 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
141 self.base = Some(Iri::parse(base_iri.into())?);
142 Ok(self)
143 }
144
145 /// Parses a JSON-LD file from a [`Read`] implementation.
146 ///
147 /// Count the number of people:
148 /// ```
149 /// use oxjsonld::JsonLdParser;
150 /// use oxrdf::NamedNodeRef;
151 /// use oxrdf::vocab::rdf;
152 ///
153 /// let file = r#"{
154 /// "@context": {"schema": "http://schema.org/"},
155 /// "@graph": [
156 /// {
157 /// "@type": "schema:Person",
158 /// "@id": "http://example.com/foo",
159 /// "schema:name": "Foo"
160 /// },
161 /// {
162 /// "@type": "schema:Person",
163 /// "schema:name": "Bar"
164 /// }
165 /// ]
166 /// }"#;
167 ///
168 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
169 /// let mut count = 0;
170 /// for quad in JsonLdParser::new().for_reader(file.as_bytes()) {
171 /// let quad = quad?;
172 /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
173 /// count += 1;
174 /// }
175 /// }
176 /// assert_eq!(2, count);
177 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
178 /// ```
179 pub fn for_reader<R: Read>(self, reader: R) -> ReaderJsonLdParser<R> {
180 ReaderJsonLdParser {
181 results: Vec::new(),
182 errors: Vec::new(),
183 inner: self.into_inner(),
184 json_parser: ReaderJsonParser::new(reader),
185 }
186 }
187
188 /// Parses a JSON-LD file from a [`AsyncRead`] implementation.
189 ///
190 /// Count the number of people:
191 /// ```
192 /// # #[tokio::main(flavor = "current_thread")]
193 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
194 /// use oxjsonld::JsonLdParser;
195 /// use oxrdf::NamedNodeRef;
196 /// use oxrdf::vocab::rdf;
197 ///
198 /// let file = r#"{
199 /// "@context": {"schema": "http://schema.org/"},
200 /// "@graph": [
201 /// {
202 /// "@type": "schema:Person",
203 /// "@id": "http://example.com/foo",
204 /// "schema:name": "Foo"
205 /// },
206 /// {
207 /// "@type": "schema:Person",
208 /// "schema:name": "Bar"
209 /// }
210 /// ]
211 /// }"#;
212 ///
213 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
214 /// let mut count = 0;
215 /// let mut parser = JsonLdParser::new().for_tokio_async_reader(file.as_bytes());
216 /// while let Some(quad) = parser.next().await {
217 /// let quad = quad?;
218 /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
219 /// count += 1;
220 /// }
221 /// }
222 /// assert_eq!(2, count);
223 /// # Ok(())
224 /// # }
225 /// ```
226 #[cfg(feature = "async-tokio")]
227 pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
228 self,
229 reader: R,
230 ) -> TokioAsyncReaderJsonLdParser<R> {
231 TokioAsyncReaderJsonLdParser {
232 results: Vec::new(),
233 errors: Vec::new(),
234 inner: self.into_inner(),
235 json_parser: TokioAsyncReaderJsonParser::new(reader),
236 }
237 }
238
239 /// Parses a JSON-LD file from a byte slice.
240 ///
241 /// Count the number of people:
242 /// ```
243 /// use oxjsonld::JsonLdParser;
244 /// use oxrdf::NamedNodeRef;
245 /// use oxrdf::vocab::rdf;
246 ///
247 /// let file = r#"{
248 /// "@context": {"schema": "http://schema.org/"},
249 /// "@graph": [
250 /// {
251 /// "@type": "schema:Person",
252 /// "@id": "http://example.com/foo",
253 /// "schema:name": "Foo"
254 /// },
255 /// {
256 /// "@type": "schema:Person",
257 /// "schema:name": "Bar"
258 /// }
259 /// ]
260 /// }"#;
261 ///
262 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
263 /// let mut count = 0;
264 /// for quad in JsonLdParser::new().for_slice(file) {
265 /// let quad = quad?;
266 /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
267 /// count += 1;
268 /// }
269 /// }
270 /// assert_eq!(2, count);
271 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
272 /// ```
273 pub fn for_slice(self, slice: &(impl AsRef<[u8]> + ?Sized)) -> SliceJsonLdParser<'_> {
274 SliceJsonLdParser {
275 results: Vec::new(),
276 errors: Vec::new(),
277 inner: self.into_inner(),
278 json_parser: SliceJsonParser::new(slice.as_ref()),
279 }
280 }
281
282 fn into_inner(self) -> InternalJsonLdParser {
283 InternalJsonLdParser {
284 expansion: JsonLdExpansionConverter::new(
285 self.base,
286 self.profile.contains(JsonLdProfile::Streaming),
287 self.lenient,
288 self.processing_mode,
289 ),
290 expended_events: Vec::new(),
291 to_rdf: JsonLdToRdfConverter {
292 state: vec![JsonLdToRdfState::Graph(Some(GraphName::DefaultGraph))],
293 lenient: self.lenient,
294 },
295 json_error: false,
296 }
297 }
298}
299
300/// Parses a JSON-LD file from a [`Read`] implementation.
301///
302/// Can be built using [`JsonLdParser::for_reader`].
303///
304/// Count the number of people:
305/// ```
306/// use oxjsonld::JsonLdParser;
307/// use oxrdf::NamedNodeRef;
308/// use oxrdf::vocab::rdf;
309///
310/// let file = r#"{
311/// "@context": {"schema": "http://schema.org/"},
312/// "@graph": [
313/// {
314/// "@type": "schema:Person",
315/// "@id": "http://example.com/foo",
316/// "schema:name": "Foo"
317/// },
318/// {
319/// "@type": "schema:Person",
320/// "schema:name": "Bar"
321/// }
322/// ]
323/// }"#;
324///
325/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
326/// let mut count = 0;
327/// for quad in JsonLdParser::new().for_reader(file.as_bytes()) {
328/// let quad = quad?;
329/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
330/// count += 1;
331/// }
332/// }
333/// assert_eq!(2, count);
334/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
335/// ```
336#[must_use]
337pub struct ReaderJsonLdParser<R: Read> {
338 results: Vec<Quad>,
339 errors: Vec<JsonLdSyntaxError>,
340 inner: InternalJsonLdParser,
341 json_parser: ReaderJsonParser<R>,
342}
343
344impl<R: Read> Iterator for ReaderJsonLdParser<R> {
345 type Item = Result<Quad, JsonLdParseError>;
346
347 fn next(&mut self) -> Option<Self::Item> {
348 loop {
349 if let Some(error) = self.errors.pop() {
350 return Some(Err(error.into()));
351 } else if let Some(quad) = self.results.pop() {
352 return Some(Ok(quad));
353 } else if self.inner.is_end() {
354 return None;
355 }
356 let step = self.parse_step();
357 if let Err(e) = step {
358 return Some(Err(e));
359 }
360 // We make sure to have data in the right order
361 self.results.reverse();
362 self.errors.reverse();
363 }
364 }
365}
366
367impl<R: Read> ReaderJsonLdParser<R> {
368 /// Allows setting a callback to load remote documents and contexts
369 ///
370 /// The first argument is the document URL.
371 ///
372 /// It corresponds to the [`documentLoader` option from the algorithm specification](https://www.w3.org/TR/json-ld11-api/#dom-jsonldoptions-documentloader).
373 ///
374 /// See [`LoadDocumentCallback` API documentation](https://www.w3.org/TR/json-ld-api/#loaddocumentcallback) for more details
375 ///
376 /// ```
377 /// use oxjsonld::{JsonLdParser, JsonLdRemoteDocument};
378 /// use oxrdf::NamedNodeRef;
379 /// use oxrdf::vocab::rdf;
380 ///
381 /// let file = r#"{
382 /// "@context": "file://context.jsonld",
383 /// "@type": "schema:Person",
384 /// "@id": "http://example.com/foo",
385 /// "schema:name": "Foo"
386 /// }"#;
387 ///
388 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
389 /// let mut count = 0;
390 /// for quad in JsonLdParser::new()
391 /// .for_reader(file.as_bytes())
392 /// .with_load_document_callback(|url, _options| {
393 /// assert_eq!(url, "file://context.jsonld");
394 /// Ok(JsonLdRemoteDocument {
395 /// document: br#"{"@context":{"schema": "http://schema.org/"}}"#.to_vec(),
396 /// document_url: "file://context.jsonld".into(),
397 /// })
398 /// })
399 /// {
400 /// let quad = quad?;
401 /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
402 /// count += 1;
403 /// }
404 /// }
405 /// assert_eq!(1, count);
406 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
407 /// ```
408 pub fn with_load_document_callback(
409 mut self,
410 callback: impl Fn(
411 &str,
412 &JsonLdLoadDocumentOptions,
413 ) -> Result<JsonLdRemoteDocument, Box<dyn Error + Send + Sync>>
414 + Send
415 + Sync
416 + UnwindSafe
417 + RefUnwindSafe
418 + 'static,
419 ) -> Self {
420 self.inner.expansion = self.inner.expansion.with_load_document_callback(callback);
421 self
422 }
423
424 /// The list of IRI prefixes considered at the current step of the parsing.
425 ///
426 /// This method returns (prefix name, prefix value) tuples.
427 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
428 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
429 ///
430 /// ```
431 /// use oxjsonld::JsonLdParser;
432 ///
433 /// let file = r#"{
434 /// "@context": {"schema": "http://schema.org/", "@base": "http://example.com/"},
435 /// "@type": "schema:Person",
436 /// "@id": "foo",
437 /// "schema:name": "Foo"
438 /// }"#;
439 ///
440 /// let mut parser = JsonLdParser::new().for_reader(file.as_bytes());
441 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
442 ///
443 /// parser.next().unwrap()?; // We read the first quad
444 /// assert_eq!(
445 /// parser.prefixes().collect::<Vec<_>>(),
446 /// [("schema", "http://schema.org/")]
447 /// ); // There are now prefixes
448 /// //
449 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
450 /// ```
451 pub fn prefixes(&self) -> JsonLdPrefixesIter<'_> {
452 self.inner.prefixes()
453 }
454
455 /// The base IRI considered at the current step of the parsing.
456 ///
457 /// ```
458 /// use oxjsonld::JsonLdParser;
459 ///
460 /// let file = r#"{
461 /// "@context": {"schema": "http://schema.org/", "@base": "http://example.com/"},
462 /// "@type": "schema:Person",
463 /// "@id": "foo",
464 /// "schema:name": "Foo"
465 /// }"#;
466 ///
467 /// let mut parser = JsonLdParser::new().for_reader(file.as_bytes());
468 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
469 ///
470 /// parser.next().unwrap()?; // We read the first quad
471 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
472 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
473 /// ```
474 pub fn base_iri(&self) -> Option<&str> {
475 self.inner.base_iri()
476 }
477
478 fn parse_step(&mut self) -> Result<(), JsonLdParseError> {
479 let event = self.json_parser.parse_next().inspect_err(|_| {
480 self.inner.json_error = true;
481 })?;
482 self.inner
483 .parse_event(event, &mut self.results, &mut self.errors);
484 Ok(())
485 }
486}
487
488/// Parses a JSON-LD file from a [`AsyncRead`] implementation.
489///
490/// Can be built using [`JsonLdParser::for_tokio_async_reader`].
491///
492/// Count the number of people:
493/// ```
494/// # #[tokio::main(flavor = "current_thread")]
495/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
496/// use oxjsonld::JsonLdParser;
497/// use oxrdf::NamedNodeRef;
498/// use oxrdf::vocab::rdf;
499///
500/// let file = r#"{
501/// "@context": {"schema": "http://schema.org/"},
502/// "@graph": [
503/// {
504/// "@type": "schema:Person",
505/// "@id": "http://example.com/foo",
506/// "schema:name": "Foo"
507/// },
508/// {
509/// "@type": "schema:Person",
510/// "schema:name": "Bar"
511/// }
512/// ]
513/// }"#;
514///
515/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
516/// let mut count = 0;
517/// let mut parser = JsonLdParser::new().for_tokio_async_reader(file.as_bytes());
518/// while let Some(quad) = parser.next().await {
519/// let quad = quad?;
520/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
521/// count += 1;
522/// }
523/// }
524/// assert_eq!(2, count);
525/// # Ok(())
526/// # }
527/// ```
528#[cfg(feature = "async-tokio")]
529#[must_use]
530pub struct TokioAsyncReaderJsonLdParser<R: AsyncRead + Unpin> {
531 results: Vec<Quad>,
532 errors: Vec<JsonLdSyntaxError>,
533 inner: InternalJsonLdParser,
534 json_parser: TokioAsyncReaderJsonParser<R>,
535}
536
537#[cfg(feature = "async-tokio")]
538impl<R: AsyncRead + Unpin> TokioAsyncReaderJsonLdParser<R> {
539 /// Reads the next quad or returns `None` if the file is finished.
540 pub async fn next(&mut self) -> Option<Result<Quad, JsonLdParseError>> {
541 loop {
542 if let Some(error) = self.errors.pop() {
543 return Some(Err(error.into()));
544 } else if let Some(quad) = self.results.pop() {
545 return Some(Ok(quad));
546 } else if self.inner.is_end() {
547 return None;
548 }
549 if let Err(e) = self.parse_step().await {
550 return Some(Err(e));
551 }
552 // We make sure to have data in the right order
553 self.results.reverse();
554 self.errors.reverse();
555 }
556 }
557
558 /// The list of IRI prefixes considered at the current step of the parsing.
559 ///
560 /// This method returns (prefix name, prefix value) tuples.
561 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
562 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
563 ///
564 /// ```
565 /// # #[tokio::main(flavor = "current_thread")]
566 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
567 /// use oxjsonld::JsonLdParser;
568 ///
569 /// let file = r#"{
570 /// "@context": {"schema": "http://schema.org/", "@base": "http://example.com/"},
571 /// "@type": "schema:Person",
572 /// "@id": "foo",
573 /// "schema:name": "Foo"
574 /// }"#;
575 ///
576 /// let mut parser = JsonLdParser::new().for_tokio_async_reader(file.as_bytes());
577 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
578 ///
579 /// parser.next().await.unwrap()?; // We read the first quad
580 /// assert_eq!(
581 /// parser.prefixes().collect::<Vec<_>>(),
582 /// [("schema", "http://schema.org/")]
583 /// ); // There are now prefixes
584 /// //
585 /// # Ok(())
586 /// # }
587 /// ```
588 pub fn prefixes(&self) -> JsonLdPrefixesIter<'_> {
589 self.inner.prefixes()
590 }
591
592 /// The base IRI considered at the current step of the parsing.
593 ///
594 /// ```
595 /// # #[tokio::main(flavor = "current_thread")]
596 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
597 /// use oxjsonld::JsonLdParser;
598 ///
599 /// let file = r#"{
600 /// "@context": {"schema": "http://schema.org/", "@base": "http://example.com/"},
601 /// "@type": "schema:Person",
602 /// "@id": "foo",
603 /// "schema:name": "Foo"
604 /// }"#;
605 ///
606 /// let mut parser = JsonLdParser::new().for_tokio_async_reader(file.as_bytes());
607 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
608 ///
609 /// parser.next().await.unwrap()?; // We read the first quad
610 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
611 /// # Ok(())
612 /// # }
613 /// ```
614 pub fn base_iri(&self) -> Option<&str> {
615 self.inner.base_iri()
616 }
617
618 async fn parse_step(&mut self) -> Result<(), JsonLdParseError> {
619 let event = self.json_parser.parse_next().await.inspect_err(|_| {
620 self.inner.json_error = true;
621 })?;
622 self.inner
623 .parse_event(event, &mut self.results, &mut self.errors);
624 Ok(())
625 }
626}
627
628/// Parses a JSON-LD file from a byte slice.
629///
630/// Can be built using [`JsonLdParser::for_slice`].
631///
632/// Count the number of people:
633/// ```
634/// use oxjsonld::JsonLdParser;
635/// use oxrdf::NamedNodeRef;
636/// use oxrdf::vocab::rdf;
637///
638/// let file = r#"{
639/// "@context": {"schema": "http://schema.org/"},
640/// "@graph": [
641/// {
642/// "@type": "schema:Person",
643/// "@id": "http://example.com/foo",
644/// "schema:name": "Foo"
645/// },
646/// {
647/// "@type": "schema:Person",
648/// "schema:name": "Bar"
649/// }
650/// ]
651/// }"#;
652///
653/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
654/// let mut count = 0;
655/// for quad in JsonLdParser::new().for_slice(file) {
656/// let quad = quad?;
657/// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
658/// count += 1;
659/// }
660/// }
661/// assert_eq!(2, count);
662/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
663/// ```
664#[must_use]
665pub struct SliceJsonLdParser<'a> {
666 results: Vec<Quad>,
667 errors: Vec<JsonLdSyntaxError>,
668 inner: InternalJsonLdParser,
669 json_parser: SliceJsonParser<'a>,
670}
671
672impl Iterator for SliceJsonLdParser<'_> {
673 type Item = Result<Quad, JsonLdSyntaxError>;
674
675 fn next(&mut self) -> Option<Self::Item> {
676 loop {
677 if let Some(error) = self.errors.pop() {
678 return Some(Err(error));
679 } else if let Some(quad) = self.results.pop() {
680 return Some(Ok(quad));
681 } else if self.inner.is_end() {
682 return None;
683 }
684 if let Err(e) = self.parse_step() {
685 // I/O errors cannot happen
686 return Some(Err(e));
687 }
688 // We make sure to have data in the right order
689 self.results.reverse();
690 self.errors.reverse();
691 }
692 }
693}
694
695impl SliceJsonLdParser<'_> {
696 /// Allows setting a callback to load remote documents and contexts
697 ///
698 /// The first argument is the document URL.
699 ///
700 /// It corresponds to the [`documentLoader` option from the algorithm specification](https://www.w3.org/TR/json-ld11-api/#dom-jsonldoptions-documentloader).
701 ///
702 /// See [`LoadDocumentCallback` API documentation](https://www.w3.org/TR/json-ld-api/#loaddocumentcallback) for more details
703 ///
704 /// ```
705 /// use oxjsonld::{JsonLdParser, JsonLdRemoteDocument};
706 /// use oxrdf::NamedNodeRef;
707 /// use oxrdf::vocab::rdf;
708 ///
709 /// let file = r#"{
710 /// "@context": "file://context.jsonld",
711 /// "@type": "schema:Person",
712 /// "@id": "http://example.com/foo",
713 /// "schema:name": "Foo"
714 /// }"#;
715 ///
716 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
717 /// let mut count = 0;
718 /// for quad in JsonLdParser::new()
719 /// .for_slice(file)
720 /// .with_load_document_callback(|url, _options| {
721 /// assert_eq!(url, "file://context.jsonld");
722 /// Ok(JsonLdRemoteDocument {
723 /// document: br#"{"@context":{"schema": "http://schema.org/"}}"#.to_vec(),
724 /// document_url: "file://context.jsonld".into(),
725 /// })
726 /// })
727 /// {
728 /// let quad = quad?;
729 /// if quad.predicate == rdf::TYPE && quad.object == schema_person.into() {
730 /// count += 1;
731 /// }
732 /// }
733 /// assert_eq!(1, count);
734 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
735 /// ```
736 pub fn with_load_document_callback(
737 mut self,
738 callback: impl Fn(
739 &str,
740 &JsonLdLoadDocumentOptions,
741 ) -> Result<JsonLdRemoteDocument, Box<dyn Error + Send + Sync>>
742 + Send
743 + Sync
744 + UnwindSafe
745 + RefUnwindSafe
746 + 'static,
747 ) -> Self {
748 self.inner.expansion = self.inner.expansion.with_load_document_callback(callback);
749 self
750 }
751
752 /// The list of IRI prefixes considered at the current step of the parsing.
753 ///
754 /// This method returns (prefix name, prefix value) tuples.
755 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
756 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
757 ///
758 /// ```
759 /// use oxjsonld::JsonLdParser;
760 ///
761 /// let file = r#"{
762 /// "@context": {"schema": "http://schema.org/", "@base": "http://example.com/"},
763 /// "@type": "schema:Person",
764 /// "@id": "foo",
765 /// "schema:name": "Foo"
766 /// }"#;
767 ///
768 /// let mut parser = JsonLdParser::new().for_slice(file);
769 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
770 ///
771 /// parser.next().unwrap()?; // We read the first quad
772 /// assert_eq!(
773 /// parser.prefixes().collect::<Vec<_>>(),
774 /// [("schema", "http://schema.org/")]
775 /// ); // There are now prefixes
776 /// //
777 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
778 /// ```
779 pub fn prefixes(&self) -> JsonLdPrefixesIter<'_> {
780 self.inner.prefixes()
781 }
782
783 /// The base IRI considered at the current step of the parsing.
784 ///
785 /// ```
786 /// use oxjsonld::JsonLdParser;
787 ///
788 /// let file = r#"{
789 /// "@context": {"schema": "http://schema.org/", "@base": "http://example.com/"},
790 /// "@type": "schema:Person",
791 /// "@id": "foo",
792 /// "schema:name": "Foo"
793 /// }"#;
794 ///
795 /// let mut parser = JsonLdParser::new().for_slice(file);
796 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
797 ///
798 /// parser.next().unwrap()?; // We read the first quad
799 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
800 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
801 /// ```
802 pub fn base_iri(&self) -> Option<&str> {
803 self.inner.base_iri()
804 }
805
806 fn parse_step(&mut self) -> Result<(), JsonLdSyntaxError> {
807 let event = self.json_parser.parse_next().inspect_err(|_| {
808 self.inner.json_error = true;
809 })?;
810 self.inner
811 .parse_event(event, &mut self.results, &mut self.errors);
812 Ok(())
813 }
814}
815
816/// Iterator on the file prefixes.
817///
818/// See [`ReaderJsonLdParser::prefixes`].
819pub struct JsonLdPrefixesIter<'a> {
820 term_definitions: std::collections::hash_map::Iter<'a, String, JsonLdTermDefinition>,
821 lenient: bool,
822}
823
824impl<'a> Iterator for JsonLdPrefixesIter<'a> {
825 type Item = (&'a str, &'a str);
826
827 #[inline]
828 fn next(&mut self) -> Option<Self::Item> {
829 loop {
830 let (prefix, term_definition) = self.term_definitions.next()?;
831 if term_definition.prefix_flag {
832 if let Some(Some(mapping)) = &term_definition.iri_mapping {
833 if self.lenient || Iri::parse(mapping.as_str()).is_ok() {
834 return Some((prefix, mapping));
835 }
836 }
837 }
838 }
839 }
840
841 #[inline]
842 fn size_hint(&self) -> (usize, Option<usize>) {
843 (0, self.term_definitions.size_hint().1)
844 }
845}
846
847struct InternalJsonLdParser {
848 expansion: JsonLdExpansionConverter,
849 expended_events: Vec<JsonLdEvent>,
850 to_rdf: JsonLdToRdfConverter,
851 json_error: bool,
852}
853
854impl InternalJsonLdParser {
855 fn parse_event(
856 &mut self,
857 event: JsonEvent<'_>,
858 results: &mut Vec<Quad>,
859 errors: &mut Vec<JsonLdSyntaxError>,
860 ) {
861 self.expansion
862 .convert_event(event, &mut self.expended_events, errors);
863 for event in self.expended_events.drain(..) {
864 self.to_rdf.convert_event(event, results);
865 }
866 }
867
868 fn is_end(&self) -> bool {
869 self.json_error || self.expansion.is_end()
870 }
871
872 fn base_iri(&self) -> Option<&str> {
873 Some(self.expansion.active_context().base_iri.as_ref()?.as_str())
874 }
875
876 fn prefixes(&self) -> JsonLdPrefixesIter<'_> {
877 JsonLdPrefixesIter {
878 term_definitions: self.expansion.active_context().term_definitions.iter(),
879 lenient: self.to_rdf.lenient,
880 }
881 }
882}
883
884enum JsonLdToRdfState {
885 StartObject {
886 /// Events before the @id event
887 buffer: Vec<JsonLdEvent>,
888 /// Nesting level of objects, useful during buffering
889 nesting: usize,
890 },
891 Object(Option<NamedOrBlankNode>),
892 Property {
893 id: Option<NamedNode>,
894 reverse: bool,
895 },
896 List(Option<NamedOrBlankNode>),
897 Graph(Option<GraphName>),
898 Included,
899}
900
901struct JsonLdToRdfConverter {
902 state: Vec<JsonLdToRdfState>,
903 lenient: bool,
904}
905
906impl JsonLdToRdfConverter {
907 fn convert_event(&mut self, event: JsonLdEvent, results: &mut Vec<Quad>) {
908 #[expect(clippy::expect_used)]
909 let state = self.state.pop().expect("Empty stack");
910 match state {
911 JsonLdToRdfState::StartObject {
912 mut buffer,
913 nesting,
914 } => {
915 match event {
916 JsonLdEvent::Id(id) => {
917 if nesting > 0 {
918 buffer.push(JsonLdEvent::Id(id));
919 self.state
920 .push(JsonLdToRdfState::StartObject { buffer, nesting });
921 } else {
922 let id = self.convert_named_or_blank_node(id);
923 self.emit_quads_for_new_object(id.as_ref(), results);
924 self.state.push(JsonLdToRdfState::Object(id));
925 for event in buffer {
926 self.convert_event(event, results);
927 }
928 }
929 }
930 JsonLdEvent::EndObject => {
931 if nesting > 0 {
932 buffer.push(JsonLdEvent::EndObject);
933 self.state.push(JsonLdToRdfState::StartObject {
934 buffer,
935 nesting: nesting - 1,
936 });
937 } else {
938 let id = Some(BlankNode::default().into());
939 self.emit_quads_for_new_object(id.as_ref(), results);
940 if !buffer.is_empty() {
941 self.state.push(JsonLdToRdfState::Object(id));
942 for event in buffer {
943 self.convert_event(event, results);
944 }
945 // We properly end after playing the buffer
946 self.convert_event(JsonLdEvent::EndObject, results);
947 }
948 }
949 }
950 JsonLdEvent::StartObject => {
951 buffer.push(event);
952 self.state.push(JsonLdToRdfState::StartObject {
953 buffer,
954 nesting: nesting + 1,
955 });
956 }
957 _ => {
958 buffer.push(event);
959 self.state
960 .push(JsonLdToRdfState::StartObject { buffer, nesting });
961 }
962 }
963 }
964 JsonLdToRdfState::Object(id) => match event {
965 JsonLdEvent::Id(_) => {
966 // TODO: add a warning?
967 self.state.push(JsonLdToRdfState::Object(id));
968 }
969 JsonLdEvent::Type(t) => {
970 if let (Some(s), Some(o), Some(g)) = (
971 &id,
972 self.convert_named_or_blank_node(t),
973 self.last_graph_name(),
974 ) {
975 results.push(Quad::new(s.clone(), rdf::TYPE, o, g.clone()))
976 }
977 self.state.push(JsonLdToRdfState::Object(id));
978 }
979 JsonLdEvent::EndObject => (),
980 JsonLdEvent::StartProperty { name, reverse } => {
981 self.state.push(JsonLdToRdfState::Object(id));
982 self.state.push(JsonLdToRdfState::Property {
983 id: if self.has_defined_last_predicate() {
984 self.convert_named_node(name)
985 } else {
986 None // We do not want to emit if one of the parent property is not emitted
987 },
988 reverse,
989 });
990 }
991 JsonLdEvent::StartGraph => {
992 let graph_name = id.clone().map(Into::into);
993 self.state.push(JsonLdToRdfState::Object(id));
994 self.state.push(JsonLdToRdfState::Graph(graph_name));
995 }
996 JsonLdEvent::StartIncluded => {
997 self.state.push(JsonLdToRdfState::Object(id));
998 self.state.push(JsonLdToRdfState::Included);
999 }
1000 JsonLdEvent::StartObject
1001 | JsonLdEvent::Value { .. }
1002 | JsonLdEvent::Json(_)
1003 | JsonLdEvent::EndProperty
1004 | JsonLdEvent::EndGraph
1005 | JsonLdEvent::StartList
1006 | JsonLdEvent::EndList
1007 | JsonLdEvent::StartSet
1008 | JsonLdEvent::EndSet
1009 | JsonLdEvent::EndIncluded => unreachable!(),
1010 },
1011 JsonLdToRdfState::Property { .. } => match event {
1012 JsonLdEvent::StartObject => {
1013 self.state.push(state);
1014 self.state.push(JsonLdToRdfState::StartObject {
1015 buffer: Vec::new(),
1016 nesting: 0,
1017 });
1018 }
1019 JsonLdEvent::Value {
1020 value,
1021 r#type,
1022 language,
1023 direction,
1024 } => {
1025 self.state.push(state);
1026 self.emit_quad_for_new_literal(
1027 self.convert_literal(value, language, direction, r#type),
1028 results,
1029 )
1030 }
1031 JsonLdEvent::Json(value) => {
1032 self.state.push(state);
1033 self.emit_quad_for_new_literal(Some(Self::convert_json(value)), results)
1034 }
1035 JsonLdEvent::EndProperty => (),
1036 JsonLdEvent::StartList => {
1037 self.state.push(state);
1038 self.state.push(JsonLdToRdfState::List(None));
1039 }
1040 JsonLdEvent::StartSet | JsonLdEvent::EndSet => {
1041 self.state.push(state);
1042 }
1043 JsonLdEvent::StartProperty { .. }
1044 | JsonLdEvent::Id(_)
1045 | JsonLdEvent::Type(_)
1046 | JsonLdEvent::EndObject
1047 | JsonLdEvent::StartGraph
1048 | JsonLdEvent::EndGraph
1049 | JsonLdEvent::EndList
1050 | JsonLdEvent::StartIncluded
1051 | JsonLdEvent::EndIncluded => unreachable!(),
1052 },
1053 JsonLdToRdfState::List(current_node) => match event {
1054 JsonLdEvent::StartObject => {
1055 self.add_new_list_node_state(current_node, results);
1056 self.state.push(JsonLdToRdfState::StartObject {
1057 buffer: Vec::new(),
1058 nesting: 0,
1059 })
1060 }
1061 JsonLdEvent::Value {
1062 value,
1063 r#type,
1064 language,
1065 direction,
1066 } => {
1067 self.add_new_list_node_state(current_node, results);
1068 self.emit_quad_for_new_literal(
1069 self.convert_literal(value, language, direction, r#type),
1070 results,
1071 )
1072 }
1073 JsonLdEvent::Json(value) => {
1074 self.add_new_list_node_state(current_node, results);
1075 self.emit_quad_for_new_literal(Some(Self::convert_json(value)), results)
1076 }
1077 JsonLdEvent::StartList => {
1078 self.add_new_list_node_state(current_node, results);
1079 self.state.push(JsonLdToRdfState::List(None));
1080 }
1081 JsonLdEvent::EndList => {
1082 if let Some(previous_node) = current_node {
1083 if let Some(graph_name) = self.last_graph_name() {
1084 results.push(Quad::new(
1085 previous_node,
1086 rdf::REST,
1087 rdf::NIL.into_owned(),
1088 graph_name.clone(),
1089 ));
1090 }
1091 } else {
1092 self.emit_quads_for_new_object(Some(&rdf::NIL.into_owned().into()), results)
1093 }
1094 }
1095 JsonLdEvent::StartSet | JsonLdEvent::EndSet => {
1096 // TODO: this is bad
1097 self.state.push(JsonLdToRdfState::List(current_node));
1098 }
1099 JsonLdEvent::EndObject
1100 | JsonLdEvent::StartProperty { .. }
1101 | JsonLdEvent::EndProperty
1102 | JsonLdEvent::Id(_)
1103 | JsonLdEvent::Type(_)
1104 | JsonLdEvent::StartGraph
1105 | JsonLdEvent::EndGraph
1106 | JsonLdEvent::StartIncluded
1107 | JsonLdEvent::EndIncluded => unreachable!(),
1108 },
1109 JsonLdToRdfState::Graph(_) => match event {
1110 JsonLdEvent::StartObject => {
1111 self.state.push(state);
1112 self.state.push(JsonLdToRdfState::StartObject {
1113 buffer: Vec::new(),
1114 nesting: 0,
1115 });
1116 }
1117 JsonLdEvent::Value { .. } | JsonLdEvent::Json(_) => {
1118 self.state.push(state);
1119 }
1120 JsonLdEvent::EndGraph => (),
1121 JsonLdEvent::StartGraph
1122 | JsonLdEvent::StartProperty { .. }
1123 | JsonLdEvent::EndProperty
1124 | JsonLdEvent::Id(_)
1125 | JsonLdEvent::Type(_)
1126 | JsonLdEvent::EndObject
1127 | JsonLdEvent::StartList
1128 | JsonLdEvent::EndList
1129 | JsonLdEvent::StartSet
1130 | JsonLdEvent::EndSet
1131 | JsonLdEvent::StartIncluded
1132 | JsonLdEvent::EndIncluded => unreachable!(),
1133 },
1134 JsonLdToRdfState::Included => match event {
1135 JsonLdEvent::StartObject => {
1136 self.state.push(JsonLdToRdfState::Included);
1137 self.state.push(JsonLdToRdfState::StartObject {
1138 buffer: Vec::new(),
1139 nesting: 0,
1140 });
1141 }
1142 JsonLdEvent::Value { .. } | JsonLdEvent::Json(_) => {
1143 // Illegal but might happen in "lenient" mode
1144 self.state.push(JsonLdToRdfState::Included);
1145 }
1146 JsonLdEvent::EndIncluded => (),
1147 JsonLdEvent::StartGraph
1148 | JsonLdEvent::EndGraph
1149 | JsonLdEvent::StartProperty { .. }
1150 | JsonLdEvent::EndProperty
1151 | JsonLdEvent::Id(_)
1152 | JsonLdEvent::Type(_)
1153 | JsonLdEvent::EndObject
1154 | JsonLdEvent::StartList
1155 | JsonLdEvent::EndList
1156 | JsonLdEvent::StartSet
1157 | JsonLdEvent::EndSet
1158 | JsonLdEvent::StartIncluded => unreachable!(),
1159 },
1160 }
1161 }
1162
1163 fn emit_quads_for_new_object(&self, id: Option<&NamedOrBlankNode>, results: &mut Vec<Quad>) {
1164 let Some(id) = id else {
1165 return;
1166 };
1167 let Some(graph_name) = self.last_graph_name() else {
1168 return;
1169 };
1170 if let (Some(subject), Some((predicate, reverse))) =
1171 (self.last_subject(), self.last_predicate())
1172 {
1173 results.push(if reverse {
1174 Quad::new(id.clone(), predicate, subject.clone(), graph_name.clone())
1175 } else {
1176 Quad::new(subject.clone(), predicate, id.clone(), graph_name.clone())
1177 })
1178 }
1179 }
1180
1181 fn emit_quad_for_new_literal(&self, literal: Option<Literal>, results: &mut Vec<Quad>) {
1182 let Some(literal) = literal else {
1183 return;
1184 };
1185 let Some(graph_name) = self.last_graph_name() else {
1186 return;
1187 };
1188 let Some(subject) = self.last_subject() else {
1189 return;
1190 };
1191 let Some((predicate, reverse)) = self.last_predicate() else {
1192 return;
1193 };
1194 if reverse {
1195 return;
1196 }
1197 results.push(Quad::new(
1198 subject.clone(),
1199 predicate,
1200 literal,
1201 graph_name.clone(),
1202 ))
1203 }
1204
1205 fn add_new_list_node_state(
1206 &mut self,
1207 current_node: Option<NamedOrBlankNode>,
1208 results: &mut Vec<Quad>,
1209 ) {
1210 let new_node = BlankNode::default();
1211 if let Some(previous_node) = current_node {
1212 if let Some(graph_name) = self.last_graph_name() {
1213 results.push(Quad::new(
1214 previous_node,
1215 rdf::REST,
1216 new_node.clone(),
1217 graph_name.clone(),
1218 ));
1219 }
1220 } else {
1221 self.emit_quads_for_new_object(Some(&new_node.clone().into()), results)
1222 }
1223 self.state
1224 .push(JsonLdToRdfState::List(Some(new_node.into())));
1225 }
1226
1227 fn convert_named_or_blank_node(&self, value: String) -> Option<NamedOrBlankNode> {
1228 Some(if let Some(bnode_id) = value.strip_prefix("_:") {
1229 if self.lenient {
1230 Some(BlankNode::new_unchecked(bnode_id))
1231 } else {
1232 BlankNode::new(bnode_id).ok()
1233 }?
1234 .into()
1235 } else {
1236 self.convert_named_node(value)?.into()
1237 })
1238 }
1239
1240 fn convert_named_node(&self, value: String) -> Option<NamedNode> {
1241 if self.lenient {
1242 Some(NamedNode::new_unchecked(value))
1243 } else {
1244 NamedNode::new(&value).ok()
1245 }
1246 }
1247
1248 #[cfg_attr(not(feature = "rdf-12"), expect(unused_variables))]
1249 fn convert_literal(
1250 &self,
1251 value: JsonLdValue,
1252 language: Option<String>,
1253 direction: Option<&'static str>,
1254 r#type: Option<String>,
1255 ) -> Option<Literal> {
1256 let r#type = if let Some(t) = r#type {
1257 Some(self.convert_named_node(t)?)
1258 } else {
1259 None
1260 };
1261 Some(match value {
1262 JsonLdValue::String(value) => {
1263 if let Some(language) = language {
1264 #[cfg(feature = "rdf-12")]
1265 if let Some(direction) = direction {
1266 if r#type.is_some_and(|t| t != rdf::DIR_LANG_STRING) {
1267 return None; // Expansion already returns an error
1268 }
1269 let direction = match direction {
1270 "ltr" => BaseDirection::Ltr,
1271 "rtl" => BaseDirection::Rtl,
1272 _ => return None, // Expansion already returns an error
1273 };
1274 return if self.lenient {
1275 Some(Literal::new_directional_language_tagged_literal_unchecked(
1276 value, language, direction,
1277 ))
1278 } else {
1279 Literal::new_directional_language_tagged_literal(
1280 value, &language, direction,
1281 )
1282 .ok()
1283 };
1284 }
1285 if r#type.is_some_and(|t| t != rdf::LANG_STRING) {
1286 return None; // Expansion already returns an error
1287 }
1288 if self.lenient {
1289 Literal::new_language_tagged_literal_unchecked(value, language)
1290 } else {
1291 Literal::new_language_tagged_literal(value, &language).ok()?
1292 }
1293 } else if let Some(datatype) = r#type {
1294 Literal::new_typed_literal(value, datatype)
1295 } else {
1296 Literal::new_simple_literal(value)
1297 }
1298 }
1299 JsonLdValue::Number(value) => {
1300 if language.is_some() {
1301 return None; // Expansion already returns an error
1302 }
1303 let value = canonicalize_xsd_number(
1304 &value,
1305 r#type.as_ref().is_some_and(|t| *t == xsd::DOUBLE),
1306 )
1307 .unwrap_or(RdfJsonNumber::Double(value));
1308 match value {
1309 RdfJsonNumber::Integer(value) => Literal::new_typed_literal(
1310 value,
1311 r#type.unwrap_or_else(|| xsd::INTEGER.into()),
1312 ),
1313 RdfJsonNumber::Double(value) => Literal::new_typed_literal(
1314 value,
1315 r#type.unwrap_or_else(|| xsd::DOUBLE.into()),
1316 ),
1317 }
1318 }
1319 JsonLdValue::Boolean(value) => {
1320 if language.is_some() {
1321 return None; // Expansion already returns an error
1322 }
1323 Literal::new_typed_literal(
1324 if value { "true" } else { "false" },
1325 r#type.unwrap_or_else(|| xsd::BOOLEAN.into()),
1326 )
1327 }
1328 })
1329 }
1330
1331 fn convert_json(value: Vec<JsonEvent<'static>>) -> Literal {
1332 let mut writer = WriterJsonSerializer::new(Vec::new());
1333 serialize_canonical_json(value, &mut writer);
1334 Literal::new_typed_literal(
1335 String::from_utf8(writer.finish().unwrap()).unwrap(),
1336 #[cfg(feature = "rdf-12")]
1337 rdf::JSON,
1338 #[cfg(not(feature = "rdf-12"))]
1339 NamedNodeRef::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#JSON"),
1340 )
1341 }
1342
1343 fn last_subject(&self) -> Option<&NamedOrBlankNode> {
1344 for state in self.state.iter().rev() {
1345 match state {
1346 JsonLdToRdfState::Object(id) => {
1347 return id.as_ref();
1348 }
1349 JsonLdToRdfState::StartObject { .. } => {
1350 unreachable!()
1351 }
1352 JsonLdToRdfState::Property { .. } => (),
1353 JsonLdToRdfState::List(id) => return id.as_ref(),
1354 JsonLdToRdfState::Graph(_) | JsonLdToRdfState::Included => {
1355 return None;
1356 }
1357 }
1358 }
1359 None
1360 }
1361
1362 fn last_predicate(&self) -> Option<(NamedNodeRef<'_>, bool)> {
1363 for state in self.state.iter().rev() {
1364 match state {
1365 JsonLdToRdfState::Property { id, reverse } => {
1366 return Some((id.as_ref()?.as_ref(), *reverse));
1367 }
1368 JsonLdToRdfState::StartObject { .. } | JsonLdToRdfState::Object(_) => (),
1369 JsonLdToRdfState::List(_) => return Some((rdf::FIRST, false)),
1370 JsonLdToRdfState::Graph(_) | JsonLdToRdfState::Included => {
1371 return None;
1372 }
1373 }
1374 }
1375 None
1376 }
1377
1378 fn has_defined_last_predicate(&self) -> bool {
1379 for state in self.state.iter().rev() {
1380 if let JsonLdToRdfState::Property { id, .. } = state {
1381 return id.is_some();
1382 }
1383 }
1384 true
1385 }
1386
1387 fn last_graph_name(&self) -> Option<&GraphName> {
1388 for state in self.state.iter().rev() {
1389 match state {
1390 JsonLdToRdfState::Graph(graph) => {
1391 return graph.as_ref();
1392 }
1393 JsonLdToRdfState::StartObject { .. }
1394 | JsonLdToRdfState::Object(_)
1395 | JsonLdToRdfState::Property { .. }
1396 | JsonLdToRdfState::List(_)
1397 | JsonLdToRdfState::Included => (),
1398 }
1399 }
1400 None
1401 }
1402}
1403
1404#[derive(Eq, PartialEq, Debug, Clone)]
1405enum RdfJsonNumber {
1406 Integer(String),
1407 Double(String),
1408}
1409
1410/// Canonicalizes the JSON number to a xsd:integer, xsd:decimal or xsd:double.
1411fn canonicalize_xsd_number(value: &str, always_double: bool) -> Option<RdfJsonNumber> {
1412 // We parse
1413 let (value, is_negative) = if let Some(value) = value.strip_prefix('-') {
1414 (value, true)
1415 } else if let Some(value) = value.strip_prefix('+') {
1416 (value, false)
1417 } else {
1418 (value, false)
1419 };
1420 let (value, exp) = value.split_once(['e', 'E']).unwrap_or((value, "0"));
1421 let (mut integer_part, mut decimal_part) = value.split_once('.').unwrap_or((value, ""));
1422 let mut exp = exp.parse::<i64>().ok()?;
1423
1424 // We normalize
1425 // We trim the zeros
1426 while let Some(c) = integer_part.strip_prefix('0') {
1427 integer_part = c;
1428 }
1429 while let Some(c) = decimal_part.strip_suffix('0') {
1430 decimal_part = c;
1431 }
1432 if decimal_part.is_empty() {
1433 while let Some(c) = integer_part.strip_suffix('0') {
1434 integer_part = c;
1435 exp = exp.checked_add(1)?;
1436 }
1437 }
1438 if integer_part.is_empty() {
1439 while let Some(c) = decimal_part.strip_prefix('0') {
1440 decimal_part = c;
1441 exp = exp.checked_sub(1)?;
1442 }
1443 }
1444
1445 // We set the exponent in the 0.XXXEYYY form
1446 let exp_change = i64::try_from(integer_part.len()).ok()?;
1447 exp = exp.checked_add(exp_change)?;
1448
1449 // We handle the zero case
1450 if integer_part.is_empty() && decimal_part.is_empty() {
1451 integer_part = "0";
1452 exp = 1;
1453 }
1454
1455 // We serialize
1456 let mut buffer = String::with_capacity(value.len());
1457 if is_negative && !(decimal_part.is_empty() && integer_part == "0") {
1458 buffer.push('-');
1459 }
1460 let digits_count = i64::try_from(integer_part.len() + decimal_part.len()).ok()?;
1461 Some(if !always_double && exp >= digits_count && exp < 21 {
1462 buffer.push_str(integer_part);
1463 buffer.push_str(decimal_part);
1464 #[expect(clippy::map_with_unused_argument_over_ranges)]
1465 buffer.extend((0..(exp - digits_count)).map(|_| '0'));
1466 RdfJsonNumber::Integer(buffer)
1467 } else {
1468 let mut all_digits = integer_part.chars().chain(decimal_part.chars());
1469 buffer.push(all_digits.next()?);
1470 buffer.push('.');
1471 if digits_count == 1 {
1472 buffer.push('0');
1473 } else {
1474 buffer.extend(all_digits);
1475 }
1476 write!(&mut buffer, "E{}", exp.checked_sub(1)?).ok()?;
1477 RdfJsonNumber::Double(buffer)
1478 })
1479}
1480
1481fn serialize_canonical_json(
1482 events: Vec<JsonEvent<'static>>,
1483 writer: &mut WriterJsonSerializer<Vec<u8>>,
1484) {
1485 let mut iter = events.into_iter();
1486 while let Some(event) = iter.next() {
1487 match event {
1488 JsonEvent::StartObject => {
1489 writer.serialize_event(JsonEvent::StartObject).unwrap();
1490 let mut key_values = Vec::new();
1491 let mut nesting = 1;
1492 for event in iter.by_ref() {
1493 match event {
1494 JsonEvent::ObjectKey(k) if nesting == 1 => {
1495 key_values.push((k, Vec::new()));
1496 }
1497 JsonEvent::StartObject => {
1498 nesting += 1;
1499 key_values.last_mut().unwrap().1.push(event);
1500 }
1501 JsonEvent::EndObject => {
1502 nesting -= 1;
1503 if nesting == 0 {
1504 break;
1505 }
1506 key_values.last_mut().unwrap().1.push(event);
1507 }
1508 _ => {
1509 key_values.last_mut().unwrap().1.push(event);
1510 }
1511 }
1512 }
1513 key_values.sort_unstable_by(|(k1, _), (k2, _)| k1.cmp(k2));
1514 for (k, v) in key_values {
1515 writer.serialize_event(JsonEvent::ObjectKey(k)).unwrap();
1516 serialize_canonical_json(v, writer);
1517 }
1518 writer.serialize_event(JsonEvent::EndObject).unwrap();
1519 }
1520 JsonEvent::Number(value) => {
1521 let value = f64::from_str(&value).unwrap();
1522 let mut buffer = ryu_js::Buffer::new();
1523 writer
1524 .serialize_event(JsonEvent::Number(buffer.format(value).into()))
1525 .unwrap();
1526 }
1527 _ => {
1528 writer.serialize_event(event).unwrap();
1529 }
1530 }
1531 }
1532}
1533
1534#[cfg(test)]
1535mod tests {
1536 use super::*;
1537
1538 #[test]
1539 fn test_canonicalize_xsd_number() {
1540 assert_eq!(
1541 canonicalize_xsd_number("12", false),
1542 Some(RdfJsonNumber::Integer("12".into()))
1543 );
1544 assert_eq!(
1545 canonicalize_xsd_number("-12", false),
1546 Some(RdfJsonNumber::Integer("-12".into()))
1547 );
1548 assert_eq!(
1549 canonicalize_xsd_number("1", true),
1550 Some(RdfJsonNumber::Double("1.0E0".into()))
1551 );
1552 assert_eq!(
1553 canonicalize_xsd_number("1", true),
1554 Some(RdfJsonNumber::Double("1.0E0".into()))
1555 );
1556 assert_eq!(
1557 canonicalize_xsd_number("+1", true),
1558 Some(RdfJsonNumber::Double("1.0E0".into()))
1559 );
1560 assert_eq!(
1561 canonicalize_xsd_number("-1", true),
1562 Some(RdfJsonNumber::Double("-1.0E0".into()))
1563 );
1564 assert_eq!(
1565 canonicalize_xsd_number("12", true),
1566 Some(RdfJsonNumber::Double("1.2E1".into()))
1567 );
1568 assert_eq!(
1569 canonicalize_xsd_number("-12", true),
1570 Some(RdfJsonNumber::Double("-1.2E1".into()))
1571 );
1572 assert_eq!(
1573 canonicalize_xsd_number("12.3456E3", false),
1574 Some(RdfJsonNumber::Double("1.23456E4".into()))
1575 );
1576 assert_eq!(
1577 canonicalize_xsd_number("12.3456e3", false),
1578 Some(RdfJsonNumber::Double("1.23456E4".into()))
1579 );
1580 assert_eq!(
1581 canonicalize_xsd_number("-12.3456E3", false),
1582 Some(RdfJsonNumber::Double("-1.23456E4".into()))
1583 );
1584 assert_eq!(
1585 canonicalize_xsd_number("12.34E-3", false),
1586 Some(RdfJsonNumber::Double("1.234E-2".into()))
1587 );
1588 assert_eq!(
1589 canonicalize_xsd_number("12.340E-3", false),
1590 Some(RdfJsonNumber::Double("1.234E-2".into()))
1591 );
1592 assert_eq!(
1593 canonicalize_xsd_number("0.01234E-1", false),
1594 Some(RdfJsonNumber::Double("1.234E-3".into()))
1595 );
1596 assert_eq!(
1597 canonicalize_xsd_number("1.0", false),
1598 Some(RdfJsonNumber::Integer("1".into()))
1599 );
1600 assert_eq!(
1601 canonicalize_xsd_number("1.0E0", false),
1602 Some(RdfJsonNumber::Integer("1".into()))
1603 );
1604 assert_eq!(
1605 canonicalize_xsd_number("0.01E2", false),
1606 Some(RdfJsonNumber::Integer("1".into()))
1607 );
1608 assert_eq!(
1609 canonicalize_xsd_number("1E2", false),
1610 Some(RdfJsonNumber::Integer("100".into()))
1611 );
1612 assert_eq!(
1613 canonicalize_xsd_number("1E21", false),
1614 Some(RdfJsonNumber::Double("1.0E21".into()))
1615 );
1616 assert_eq!(
1617 canonicalize_xsd_number("0", false),
1618 Some(RdfJsonNumber::Integer("0".into()))
1619 );
1620 assert_eq!(
1621 canonicalize_xsd_number("0", true),
1622 Some(RdfJsonNumber::Double("0.0E0".into()))
1623 );
1624 assert_eq!(
1625 canonicalize_xsd_number("-0", true),
1626 Some(RdfJsonNumber::Double("0.0E0".into()))
1627 );
1628 assert_eq!(
1629 canonicalize_xsd_number("0E-10", true),
1630 Some(RdfJsonNumber::Double("0.0E0".into()))
1631 );
1632 }
1633}