json_ld_core/
serialization.rs

1use hashbrown::HashSet;
2use iref::Iri;
3use json_syntax::Parse;
4use linked_data::{FromLinkedDataError, LinkedDataDeserialize};
5use rdf_types::{
6	dataset::{PatternMatchingDataset, TraversableDataset},
7	interpretation::{
8		ReverseIdInterpretation, ReverseIriInterpretation, ReverseTermInterpretation,
9	},
10	vocabulary::{BlankIdVocabulary, IriVocabulary},
11	LiteralTypeRef, Quad, Term, Vocabulary,
12};
13use static_iref::iri;
14use std::{
15	collections::{BTreeMap, BTreeSet},
16	hash::Hash,
17	str::FromStr,
18};
19
20use crate::{
21	object::{List, Literal},
22	rdf::{
23		RDF_FIRST, RDF_JSON, RDF_NIL, RDF_REST, RDF_TYPE, XSD_BOOLEAN, XSD_DOUBLE, XSD_INTEGER,
24		XSD_STRING,
25	},
26	ExpandedDocument, Id, Indexed, IndexedObject, LangString, Node, Object, ValidId, Value,
27};
28
29struct SerDataset<R> {
30	named_graphs: BTreeMap<R, SerGraph<R>>,
31	default_graph: SerGraph<R>,
32}
33
34impl<R> SerDataset<R> {
35	fn new() -> Self {
36		Self {
37			named_graphs: BTreeMap::new(),
38			default_graph: SerGraph::new(),
39		}
40	}
41}
42
43impl<R: Ord> SerDataset<R> {
44	fn graph_mut(&mut self, label: Option<R>) -> &mut SerGraph<R>
45	where
46		R: Ord,
47	{
48		match label {
49			Some(g) => self.named_graphs.entry(g).or_insert_with(SerGraph::new),
50			None => &mut self.default_graph,
51		}
52	}
53
54	fn fold_into_default_graph(mut self) -> SerGraph<R> {
55		for (id, graph) in self.named_graphs {
56			self.default_graph.resource_mut(id).graph = Some(graph);
57		}
58
59		self.default_graph
60	}
61}
62
63struct SerGraph<R> {
64	resources: BTreeMap<R, SerResource<R>>,
65}
66
67struct SerList<R> {
68	first: HashSet<R>,
69	rest: HashSet<R>,
70	reverse_rest: HashSet<R>,
71	values: Option<Vec<R>>,
72}
73
74impl<R> Default for SerList<R> {
75	fn default() -> Self {
76		Self {
77			first: HashSet::new(),
78			rest: HashSet::new(),
79			reverse_rest: HashSet::new(),
80			values: None,
81		}
82	}
83}
84
85impl<R> SerList<R> {
86	fn is_well_formed(&self) -> bool {
87		self.first.len() == 1 && self.rest.len() == 1
88	}
89
90	fn is_empty(&self) -> bool {
91		self.first.is_empty() && self.rest.is_empty()
92	}
93}
94
95struct SerResource<R> {
96	types: BTreeSet<RdfType<R>>,
97	properties: BTreeMap<R, BTreeSet<R>>,
98	graph: Option<SerGraph<R>>,
99	list: SerList<R>,
100	references: usize,
101}
102
103impl<R> Default for SerResource<R> {
104	fn default() -> Self {
105		Self {
106			types: BTreeSet::new(),
107			properties: BTreeMap::new(),
108			graph: None,
109			list: SerList::default(),
110			references: 0,
111		}
112	}
113}
114
115impl<R> SerResource<R> {
116	fn is_empty(&self) -> bool {
117		self.types.is_empty()
118			&& self.properties.is_empty()
119			&& self.graph.is_none()
120			&& self.list.is_empty()
121	}
122
123	fn is_list_node(&self) -> bool {
124		self.types.iter().all(|ty| ty.is_list())
125			&& self.properties.is_empty()
126			&& self.graph.is_none()
127			&& self.list.is_well_formed()
128	}
129
130	fn insert(&mut self, prop: R, object: R)
131	where
132		R: Ord,
133	{
134		self.properties.entry(prop).or_default().insert(object);
135	}
136}
137
138impl<R> SerGraph<R> {
139	fn new() -> Self {
140		Self {
141			resources: BTreeMap::new(),
142		}
143	}
144
145	fn get(&self, id: &R) -> Option<&SerResource<R>>
146	where
147		R: Ord,
148	{
149		self.resources.get(id)
150	}
151
152	fn resource_mut(&mut self, id: R) -> &mut SerResource<R>
153	where
154		R: Ord,
155	{
156		self.resources.entry(id).or_default()
157	}
158}
159
160enum RdfProperty {
161	Type,
162	First,
163	Rest,
164}
165
166fn rdf_property<V: IriVocabulary, I: ReverseIriInterpretation<Iri = V::Iri>>(
167	vocabulary: &V,
168	interpretation: &I,
169	id: &I::Resource,
170) -> Option<RdfProperty> {
171	for i in interpretation.iris_of(id) {
172		let iri = vocabulary.iri(i).unwrap();
173		if iri == RDF_TYPE {
174			return Some(RdfProperty::Type);
175		} else if iri == RDF_FIRST {
176			return Some(RdfProperty::First);
177		} else if iri == RDF_REST {
178			return Some(RdfProperty::Rest);
179		}
180	}
181
182	None
183}
184
185#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
186enum RdfType<R> {
187	List,
188	Other(R),
189}
190
191impl<R> RdfType<R> {
192	fn is_list(&self) -> bool {
193		matches!(self, Self::List)
194	}
195}
196
197const RDF_LIST: &Iri = iri!("http://www.w3.org/1999/02/22-rdf-syntax-ns#List");
198
199fn rdf_type<'a, V: IriVocabulary, I: ReverseIriInterpretation<Iri = V::Iri>>(
200	vocabulary: &V,
201	interpretation: &I,
202	id: &'a I::Resource,
203) -> RdfType<&'a I::Resource> {
204	for i in interpretation.iris_of(id) {
205		let iri = vocabulary.iri(i).unwrap();
206		if iri == RDF_LIST {
207			return RdfType::List;
208		}
209	}
210
211	RdfType::Other(id)
212}
213
214fn is_anonymous<I: ReverseTermInterpretation>(interpretation: &I, id: &I::Resource) -> bool {
215	interpretation.iris_of(id).next().is_none() && interpretation.literals_of(id).next().is_none()
216}
217
218#[derive(Debug, thiserror::Error)]
219pub enum SerializationError {
220	#[error("invalid JSON")]
221	InvalidJson(linked_data::ContextIris, json_syntax::parse::Error),
222
223	#[error("invalid boolean value")]
224	InvalidBoolean(linked_data::ContextIris, String),
225
226	#[error("invalid number value")]
227	Number(linked_data::ContextIris, String),
228}
229
230#[derive(Clone, Copy)]
231pub struct RdfTerms<R> {
232	list: Option<R>,
233	first: Option<R>,
234	rest: Option<R>,
235}
236
237impl<I, B> ExpandedDocument<I, B> {
238	pub fn from_interpreted_quads_in<'a, V, T>(
239		vocabulary: &V,
240		interpretation: &T,
241		quads: impl IntoIterator<
242			Item = Quad<&'a T::Resource, &'a T::Resource, &'a T::Resource, &'a T::Resource>,
243		>,
244		context: linked_data::Context<T>,
245	) -> Result<Self, SerializationError>
246	where
247		V: Vocabulary<Iri = I, BlankId = B>,
248		T: ReverseTermInterpretation<Iri = I, BlankId = B, Literal = V::Literal>,
249		T::Resource: 'a + Ord + Hash,
250		I: Clone + Eq + Hash,
251		B: Clone + Eq + Hash,
252	{
253		let mut node_map: SerDataset<&'a T::Resource> = SerDataset::new();
254
255		let mut nil = None;
256		let mut rdf_terms = RdfTerms {
257			list: None,
258			first: None,
259			rest: None,
260		};
261
262		for quad in quads {
263			let graph = node_map.graph_mut(quad.3);
264			let subject = graph.resource_mut(quad.0);
265
266			match rdf_property(vocabulary, interpretation, quad.1) {
267				Some(RdfProperty::Type) => {
268					rdf_terms.first = Some(quad.1);
269					let ty = rdf_type(vocabulary, interpretation, quad.2);
270
271					if ty.is_list() {
272						rdf_terms.list = Some(quad.2);
273					}
274
275					subject.types.insert(ty);
276				}
277				Some(RdfProperty::First) => {
278					rdf_terms.first = Some(quad.1);
279					subject.list.first.insert(quad.2);
280				}
281				Some(RdfProperty::Rest) => {
282					rdf_terms.rest = Some(quad.1);
283					if nil.is_none() {
284						for i in interpretation.iris_of(quad.2) {
285							let iri = vocabulary.iri(i).unwrap();
286							if iri == RDF_NIL {
287								nil = Some(quad.2);
288							}
289						}
290					}
291
292					subject.list.rest.insert(quad.2);
293					graph.resource_mut(quad.2).list.reverse_rest.insert(quad.1);
294				}
295				None => {
296					subject.insert(quad.1, quad.2);
297				}
298			}
299
300			let object = graph.resource_mut(quad.2);
301			if quad.1 == quad.2 {
302				object.references = usize::MAX;
303			} else {
304				let r = object.references;
305				object.references = r.saturating_add(1)
306			}
307		}
308
309		let mut graph = node_map.fold_into_default_graph();
310
311		let mut lists = Vec::new();
312		if let Some(nil_id) = nil {
313			if let Some(nil) = graph.get(&nil_id) {
314				for &node_id in &nil.list.reverse_rest {
315					let mut head_id = node_id;
316					if is_anonymous(interpretation, head_id) {
317						if let Some(mut head) = graph.get(&head_id) {
318							if head.references == 1 && head.is_list_node() {
319								let mut values = Vec::new();
320
321								loop {
322									let first = head.list.first.iter().next().copied().unwrap();
323									let parent_id =
324										head.list.reverse_rest.iter().next().copied().unwrap();
325									values.push(first);
326
327									if is_anonymous(interpretation, parent_id) {
328										if let Some(parent) = graph.get(&parent_id) {
329											if parent.references == 1 && parent.is_list_node() {
330												head_id = parent_id;
331												head = parent;
332												continue;
333											}
334										}
335									}
336
337									break;
338								}
339
340								values.reverse();
341								lists.push((head_id, values))
342							}
343						}
344					}
345				}
346			}
347		}
348
349		for (id, values) in lists {
350			graph.resource_mut(id).list.values = Some(values)
351		}
352
353		let mut result = ExpandedDocument::new();
354		for (id, resource) in &graph.resources {
355			if resource.references != 1 && !resource.is_empty() {
356				result.insert(render_object(
357					vocabulary,
358					interpretation,
359					rdf_terms,
360					&graph,
361					id,
362					resource,
363					context,
364				)?);
365			}
366		}
367
368		Ok(result)
369	}
370
371	pub fn from_interpreted_quads<'a, V, T>(
372		vocabulary: &V,
373		interpretation: &T,
374		quads: impl IntoIterator<
375			Item = Quad<&'a T::Resource, &'a T::Resource, &'a T::Resource, &'a T::Resource>,
376		>,
377	) -> Result<Self, SerializationError>
378	where
379		V: Vocabulary<Iri = I, BlankId = B>,
380		T: ReverseTermInterpretation<Iri = I, BlankId = B, Literal = V::Literal>,
381		T::Resource: 'a + Ord + Hash,
382		I: Clone + Eq + Hash,
383		B: Clone + Eq + Hash,
384	{
385		Self::from_interpreted_quads_in(
386			vocabulary,
387			interpretation,
388			quads,
389			linked_data::Context::default(),
390		)
391	}
392}
393
394fn render_object<V, I>(
395	vocabulary: &V,
396	interpretation: &I,
397	rdf_terms: RdfTerms<&I::Resource>,
398	graph: &SerGraph<&I::Resource>,
399	id: &I::Resource,
400	resource: &SerResource<&I::Resource>,
401	context: linked_data::Context<I>,
402) -> Result<IndexedObject<V::Iri, V::BlankId>, SerializationError>
403where
404	V: Vocabulary,
405	I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
406	V::Iri: Clone + Eq + Hash,
407	V::BlankId: Clone + Eq + Hash,
408	I::Resource: Ord,
409{
410	let context = context.with_subject(id);
411	if resource.is_empty() {
412		render_reference(vocabulary, interpretation, id, context)
413	} else {
414		match &resource.list.values {
415			Some(values) => {
416				let mut objects = Vec::with_capacity(values.len());
417
418				for value in values {
419					objects.push(render_object_or_reference(
420						vocabulary,
421						interpretation,
422						rdf_terms,
423						graph,
424						value,
425						context,
426					)?);
427				}
428
429				Ok(Indexed::none(Object::List(List::new(objects))))
430			}
431			None => {
432				let mut node: Node<V::Iri, V::BlankId> = Node::new();
433
434				if let Some(id) = id_of(interpretation, id) {
435					node.id = Some(id)
436				}
437
438				let mut types = Vec::with_capacity(resource.types.len());
439				for ty in &resource.types {
440					let ty_resource = match ty {
441						RdfType::List => rdf_terms.list.unwrap(),
442						RdfType::Other(o) => o,
443					};
444
445					if let Some(ty_id) = id_of(interpretation, ty_resource) {
446						types.push(ty_id)
447					}
448				}
449
450				if !types.is_empty() {
451					node.types = Some(types);
452				}
453
454				if let Some(graph) = &resource.graph {
455					let mut value = crate::object::Graph::new();
456
457					for (id, resource) in &graph.resources {
458						if resource.references != 1 && !resource.is_empty() {
459							value.insert(render_object(
460								vocabulary,
461								interpretation,
462								rdf_terms,
463								graph,
464								id,
465								resource,
466								context,
467							)?);
468						}
469					}
470
471					node.graph = Some(value)
472				}
473
474				for (prop, objects) in &resource.properties {
475					insert_property(
476						vocabulary,
477						interpretation,
478						rdf_terms,
479						graph,
480						&mut node,
481						prop,
482						objects.iter().copied(),
483						context,
484					)?;
485				}
486
487				if !resource.list.first.is_empty() {
488					let rdf_first_id = rdf_terms.first.unwrap();
489					insert_property(
490						vocabulary,
491						interpretation,
492						rdf_terms,
493						graph,
494						&mut node,
495						rdf_first_id,
496						resource.list.first.iter().copied(),
497						context,
498					)?;
499				}
500
501				if !resource.list.rest.is_empty() {
502					let rdf_rest_id = rdf_terms.rest.unwrap();
503					insert_property(
504						vocabulary,
505						interpretation,
506						rdf_terms,
507						graph,
508						&mut node,
509						rdf_rest_id,
510						resource.list.rest.iter().copied(),
511						context,
512					)?;
513				}
514
515				Ok(Indexed::none(Object::node(node)))
516			}
517		}
518	}
519}
520
521#[allow(clippy::too_many_arguments)]
522fn insert_property<'a, V, I, O>(
523	vocabulary: &V,
524	interpretation: &I,
525	rdf_terms: RdfTerms<&'a I::Resource>,
526	graph: &SerGraph<&'a I::Resource>,
527	node: &mut Node<V::Iri, V::BlankId>,
528	prop: &I::Resource,
529	values: O,
530	context: linked_data::Context<I>,
531) -> Result<(), SerializationError>
532where
533	V: Vocabulary,
534	I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
535	V::Iri: Clone + Eq + Hash,
536	V::BlankId: Clone + Eq + Hash,
537	I::Resource: 'a + Ord,
538	O: IntoIterator<Item = &'a I::Resource>,
539	O::IntoIter: ExactSizeIterator,
540{
541	let context = context.with_predicate(prop);
542	match id_of(interpretation, prop) {
543		Some(prop) => {
544			let mut values = values.into_iter();
545
546			while values.len() > 1 {
547				let value = values.next().unwrap();
548				let v = render_object_or_reference(
549					vocabulary,
550					interpretation,
551					rdf_terms,
552					graph,
553					value,
554					context,
555				)?;
556				node.insert(prop.clone(), v);
557			}
558
559			if let Some(value) = values.next() {
560				let v = render_object_or_reference(
561					vocabulary,
562					interpretation,
563					rdf_terms,
564					graph,
565					value,
566					context,
567				)?;
568				node.insert(prop, v);
569			}
570
571			Ok(())
572		}
573		None => Ok(()),
574	}
575}
576
577fn render_object_or_reference<V, I>(
578	vocabulary: &V,
579	interpretation: &I,
580	rdf_terms: RdfTerms<&I::Resource>,
581	graph: &SerGraph<&I::Resource>,
582	id: &I::Resource,
583	context: linked_data::Context<I>,
584) -> Result<IndexedObject<V::Iri, V::BlankId>, SerializationError>
585where
586	V: Vocabulary,
587	I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
588	V::Iri: Clone + Eq + Hash,
589	V::BlankId: Clone + Eq + Hash,
590	I::Resource: Ord,
591{
592	match graph.get(&id) {
593		Some(resource) => {
594			if resource.references == 1 && !resource.is_empty() {
595				render_object(
596					vocabulary,
597					interpretation,
598					rdf_terms,
599					graph,
600					id,
601					resource,
602					context,
603				)
604			} else {
605				render_reference(vocabulary, interpretation, id, context)
606			}
607		}
608		None => render_reference(vocabulary, interpretation, id, context),
609	}
610}
611
612fn render_reference<V, I>(
613	vocabulary: &V,
614	interpretation: &I,
615	id: &I::Resource,
616	context: linked_data::Context<I>,
617) -> Result<IndexedObject<V::Iri, V::BlankId>, SerializationError>
618where
619	V: Vocabulary,
620	I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
621	V::Iri: Clone,
622	V::BlankId: Clone,
623	I::Resource: Ord,
624{
625	match term_of(vocabulary, interpretation, id, context)? {
626		Some(Term::Id(id)) => Ok(Indexed::none(Object::node(Node::with_id(id)))),
627		Some(Term::Literal(value)) => Ok(Indexed::none(Object::Value(value))),
628		None => Ok(Indexed::none(Object::node(Node::new()))),
629	}
630}
631
632fn id_of<T>(interpretation: &T, resource: &T::Resource) -> Option<Id<T::Iri, T::BlankId>>
633where
634	T: ReverseIdInterpretation,
635	T::Iri: Clone,
636	T::BlankId: Clone,
637{
638	interpretation
639		.iris_of(resource)
640		.next()
641		.map(|i| Id::Valid(ValidId::Iri(i.clone())))
642		.or_else(|| {
643			interpretation
644				.blank_ids_of(resource)
645				.next()
646				.map(|b| Id::Valid(ValidId::Blank(b.clone())))
647		})
648}
649
650type ResourceTerm<V> = Term<
651	Id<<V as IriVocabulary>::Iri, <V as BlankIdVocabulary>::BlankId>,
652	Value<<V as IriVocabulary>::Iri>,
653>;
654
655fn term_of<V, T>(
656	vocabulary: &V,
657	interpretation: &T,
658	resource: &T::Resource,
659	context: linked_data::Context<T>,
660) -> Result<Option<ResourceTerm<V>>, SerializationError>
661where
662	V: Vocabulary,
663	T: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
664	V::Iri: Clone,
665	V::BlankId: Clone,
666{
667	match id_of(interpretation, resource) {
668		Some(id) => Ok(Some(Term::Id(id))),
669		None => match interpretation.literals_of(resource).next() {
670			Some(l) => {
671				let l = vocabulary.literal(l).unwrap();
672				let value = match l.type_ {
673					LiteralTypeRef::Any(i) => {
674						let ty = vocabulary.iri(i).unwrap();
675						if ty == RDF_JSON {
676							let (json, _) =
677								json_syntax::Value::parse_str(l.value).map_err(|e| {
678									SerializationError::InvalidJson(
679										context.into_iris(vocabulary, interpretation),
680										e,
681									)
682								})?;
683							Value::Json(json)
684						} else if ty == XSD_BOOLEAN {
685							let b = match l.as_ref() {
686								"true" | "1" => true,
687								"false" | "0" => false,
688								other => {
689									return Err(SerializationError::InvalidBoolean(
690										context.into_iris(vocabulary, interpretation),
691										other.to_owned(),
692									))
693								}
694							};
695
696							Value::Literal(Literal::Boolean(b), Some(i.clone()))
697						} else if ty == XSD_INTEGER || ty == XSD_DOUBLE {
698							let n = json_syntax::NumberBuf::from_str(l.as_str()).map_err(|_| {
699								SerializationError::Number(
700									context.into_iris(vocabulary, interpretation),
701									l.as_ref().to_owned(),
702								)
703							})?;
704							Value::Literal(Literal::Number(n), Some(i.clone()))
705						} else if ty == XSD_STRING {
706							Value::Literal(Literal::String(l.as_ref().into()), None)
707						} else {
708							Value::Literal(Literal::String(l.as_ref().into()), Some(i.clone()))
709						}
710					}
711					LiteralTypeRef::LangString(tag) => Value::LangString(
712						LangString::new(l.value.into(), Some(tag.to_owned().into()), None).unwrap(),
713					),
714				};
715
716				Ok(Some(Term::Literal(value)))
717			}
718			None => Ok(None),
719		},
720	}
721}
722
723impl<V, I> LinkedDataDeserialize<V, I> for ExpandedDocument<V::Iri, V::BlankId>
724where
725	V: Vocabulary,
726	I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
727	I::Resource: Ord + Hash,
728	V::Iri: Clone + Eq + Hash,
729	V::BlankId: Clone + Eq + Hash,
730{
731	fn deserialize_dataset_in(
732		vocabulary: &V,
733		interpretation: &I,
734		dataset: &(impl TraversableDataset<Resource = I::Resource> + PatternMatchingDataset),
735		context: linked_data::Context<I>,
736	) -> Result<Self, FromLinkedDataError> {
737		Self::from_interpreted_quads(vocabulary, interpretation, dataset.quads()).map_err(|_| {
738			FromLinkedDataError::InvalidLiteral(context.into_iris(vocabulary, interpretation))
739		})
740	}
741}