1use hashbrown::HashSet;
2use iref::Iri;
3use json_syntax::Parse;
4use linked_data::{FromLinkedDataError, LinkedDataDeserialize};
5use rdf_types::{
6 dataset::{PatternMatchingDataset, TraversableDataset},
7 interpretation::{
8 ReverseIdInterpretation, ReverseIriInterpretation, ReverseTermInterpretation,
9 },
10 vocabulary::{BlankIdVocabulary, IriVocabulary},
11 LiteralTypeRef, Quad, Term, Vocabulary,
12};
13use static_iref::iri;
14use std::{
15 collections::{BTreeMap, BTreeSet},
16 hash::Hash,
17 str::FromStr,
18};
19
20use crate::{
21 object::{List, Literal},
22 rdf::{
23 RDF_FIRST, RDF_JSON, RDF_NIL, RDF_REST, RDF_TYPE, XSD_BOOLEAN, XSD_DOUBLE, XSD_INTEGER,
24 XSD_STRING,
25 },
26 ExpandedDocument, Id, Indexed, IndexedObject, LangString, Node, Object, ValidId, Value,
27};
28
29struct SerDataset<R> {
30 named_graphs: BTreeMap<R, SerGraph<R>>,
31 default_graph: SerGraph<R>,
32}
33
34impl<R> SerDataset<R> {
35 fn new() -> Self {
36 Self {
37 named_graphs: BTreeMap::new(),
38 default_graph: SerGraph::new(),
39 }
40 }
41}
42
43impl<R: Ord> SerDataset<R> {
44 fn graph_mut(&mut self, label: Option<R>) -> &mut SerGraph<R>
45 where
46 R: Ord,
47 {
48 match label {
49 Some(g) => self.named_graphs.entry(g).or_insert_with(SerGraph::new),
50 None => &mut self.default_graph,
51 }
52 }
53
54 fn fold_into_default_graph(mut self) -> SerGraph<R> {
55 for (id, graph) in self.named_graphs {
56 self.default_graph.resource_mut(id).graph = Some(graph);
57 }
58
59 self.default_graph
60 }
61}
62
63struct SerGraph<R> {
64 resources: BTreeMap<R, SerResource<R>>,
65}
66
67struct SerList<R> {
68 first: HashSet<R>,
69 rest: HashSet<R>,
70 reverse_rest: HashSet<R>,
71 values: Option<Vec<R>>,
72}
73
74impl<R> Default for SerList<R> {
75 fn default() -> Self {
76 Self {
77 first: HashSet::new(),
78 rest: HashSet::new(),
79 reverse_rest: HashSet::new(),
80 values: None,
81 }
82 }
83}
84
85impl<R> SerList<R> {
86 fn is_well_formed(&self) -> bool {
87 self.first.len() == 1 && self.rest.len() == 1
88 }
89
90 fn is_empty(&self) -> bool {
91 self.first.is_empty() && self.rest.is_empty()
92 }
93}
94
95struct SerResource<R> {
96 types: BTreeSet<RdfType<R>>,
97 properties: BTreeMap<R, BTreeSet<R>>,
98 graph: Option<SerGraph<R>>,
99 list: SerList<R>,
100 references: usize,
101}
102
103impl<R> Default for SerResource<R> {
104 fn default() -> Self {
105 Self {
106 types: BTreeSet::new(),
107 properties: BTreeMap::new(),
108 graph: None,
109 list: SerList::default(),
110 references: 0,
111 }
112 }
113}
114
115impl<R> SerResource<R> {
116 fn is_empty(&self) -> bool {
117 self.types.is_empty()
118 && self.properties.is_empty()
119 && self.graph.is_none()
120 && self.list.is_empty()
121 }
122
123 fn is_list_node(&self) -> bool {
124 self.types.iter().all(|ty| ty.is_list())
125 && self.properties.is_empty()
126 && self.graph.is_none()
127 && self.list.is_well_formed()
128 }
129
130 fn insert(&mut self, prop: R, object: R)
131 where
132 R: Ord,
133 {
134 self.properties.entry(prop).or_default().insert(object);
135 }
136}
137
138impl<R> SerGraph<R> {
139 fn new() -> Self {
140 Self {
141 resources: BTreeMap::new(),
142 }
143 }
144
145 fn get(&self, id: &R) -> Option<&SerResource<R>>
146 where
147 R: Ord,
148 {
149 self.resources.get(id)
150 }
151
152 fn resource_mut(&mut self, id: R) -> &mut SerResource<R>
153 where
154 R: Ord,
155 {
156 self.resources.entry(id).or_default()
157 }
158}
159
160enum RdfProperty {
161 Type,
162 First,
163 Rest,
164}
165
166fn rdf_property<V: IriVocabulary, I: ReverseIriInterpretation<Iri = V::Iri>>(
167 vocabulary: &V,
168 interpretation: &I,
169 id: &I::Resource,
170) -> Option<RdfProperty> {
171 for i in interpretation.iris_of(id) {
172 let iri = vocabulary.iri(i).unwrap();
173 if iri == RDF_TYPE {
174 return Some(RdfProperty::Type);
175 } else if iri == RDF_FIRST {
176 return Some(RdfProperty::First);
177 } else if iri == RDF_REST {
178 return Some(RdfProperty::Rest);
179 }
180 }
181
182 None
183}
184
185#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
186enum RdfType<R> {
187 List,
188 Other(R),
189}
190
191impl<R> RdfType<R> {
192 fn is_list(&self) -> bool {
193 matches!(self, Self::List)
194 }
195}
196
197const RDF_LIST: &Iri = iri!("http://www.w3.org/1999/02/22-rdf-syntax-ns#List");
198
199fn rdf_type<'a, V: IriVocabulary, I: ReverseIriInterpretation<Iri = V::Iri>>(
200 vocabulary: &V,
201 interpretation: &I,
202 id: &'a I::Resource,
203) -> RdfType<&'a I::Resource> {
204 for i in interpretation.iris_of(id) {
205 let iri = vocabulary.iri(i).unwrap();
206 if iri == RDF_LIST {
207 return RdfType::List;
208 }
209 }
210
211 RdfType::Other(id)
212}
213
214fn is_anonymous<I: ReverseTermInterpretation>(interpretation: &I, id: &I::Resource) -> bool {
215 interpretation.iris_of(id).next().is_none() && interpretation.literals_of(id).next().is_none()
216}
217
218#[derive(Debug, thiserror::Error)]
219pub enum SerializationError {
220 #[error("invalid JSON")]
221 InvalidJson(linked_data::ContextIris, json_syntax::parse::Error),
222
223 #[error("invalid boolean value")]
224 InvalidBoolean(linked_data::ContextIris, String),
225
226 #[error("invalid number value")]
227 Number(linked_data::ContextIris, String),
228}
229
230#[derive(Clone, Copy)]
231pub struct RdfTerms<R> {
232 list: Option<R>,
233 first: Option<R>,
234 rest: Option<R>,
235}
236
237impl<I, B> ExpandedDocument<I, B> {
238 pub fn from_interpreted_quads_in<'a, V, T>(
239 vocabulary: &V,
240 interpretation: &T,
241 quads: impl IntoIterator<
242 Item = Quad<&'a T::Resource, &'a T::Resource, &'a T::Resource, &'a T::Resource>,
243 >,
244 context: linked_data::Context<T>,
245 ) -> Result<Self, SerializationError>
246 where
247 V: Vocabulary<Iri = I, BlankId = B>,
248 T: ReverseTermInterpretation<Iri = I, BlankId = B, Literal = V::Literal>,
249 T::Resource: 'a + Ord + Hash,
250 I: Clone + Eq + Hash,
251 B: Clone + Eq + Hash,
252 {
253 let mut node_map: SerDataset<&'a T::Resource> = SerDataset::new();
254
255 let mut nil = None;
256 let mut rdf_terms = RdfTerms {
257 list: None,
258 first: None,
259 rest: None,
260 };
261
262 for quad in quads {
263 let graph = node_map.graph_mut(quad.3);
264 let subject = graph.resource_mut(quad.0);
265
266 match rdf_property(vocabulary, interpretation, quad.1) {
267 Some(RdfProperty::Type) => {
268 rdf_terms.first = Some(quad.1);
269 let ty = rdf_type(vocabulary, interpretation, quad.2);
270
271 if ty.is_list() {
272 rdf_terms.list = Some(quad.2);
273 }
274
275 subject.types.insert(ty);
276 }
277 Some(RdfProperty::First) => {
278 rdf_terms.first = Some(quad.1);
279 subject.list.first.insert(quad.2);
280 }
281 Some(RdfProperty::Rest) => {
282 rdf_terms.rest = Some(quad.1);
283 if nil.is_none() {
284 for i in interpretation.iris_of(quad.2) {
285 let iri = vocabulary.iri(i).unwrap();
286 if iri == RDF_NIL {
287 nil = Some(quad.2);
288 }
289 }
290 }
291
292 subject.list.rest.insert(quad.2);
293 graph.resource_mut(quad.2).list.reverse_rest.insert(quad.1);
294 }
295 None => {
296 subject.insert(quad.1, quad.2);
297 }
298 }
299
300 let object = graph.resource_mut(quad.2);
301 if quad.1 == quad.2 {
302 object.references = usize::MAX;
303 } else {
304 let r = object.references;
305 object.references = r.saturating_add(1)
306 }
307 }
308
309 let mut graph = node_map.fold_into_default_graph();
310
311 let mut lists = Vec::new();
312 if let Some(nil_id) = nil {
313 if let Some(nil) = graph.get(&nil_id) {
314 for &node_id in &nil.list.reverse_rest {
315 let mut head_id = node_id;
316 if is_anonymous(interpretation, head_id) {
317 if let Some(mut head) = graph.get(&head_id) {
318 if head.references == 1 && head.is_list_node() {
319 let mut values = Vec::new();
320
321 loop {
322 let first = head.list.first.iter().next().copied().unwrap();
323 let parent_id =
324 head.list.reverse_rest.iter().next().copied().unwrap();
325 values.push(first);
326
327 if is_anonymous(interpretation, parent_id) {
328 if let Some(parent) = graph.get(&parent_id) {
329 if parent.references == 1 && parent.is_list_node() {
330 head_id = parent_id;
331 head = parent;
332 continue;
333 }
334 }
335 }
336
337 break;
338 }
339
340 values.reverse();
341 lists.push((head_id, values))
342 }
343 }
344 }
345 }
346 }
347 }
348
349 for (id, values) in lists {
350 graph.resource_mut(id).list.values = Some(values)
351 }
352
353 let mut result = ExpandedDocument::new();
354 for (id, resource) in &graph.resources {
355 if resource.references != 1 && !resource.is_empty() {
356 result.insert(render_object(
357 vocabulary,
358 interpretation,
359 rdf_terms,
360 &graph,
361 id,
362 resource,
363 context,
364 )?);
365 }
366 }
367
368 Ok(result)
369 }
370
371 pub fn from_interpreted_quads<'a, V, T>(
372 vocabulary: &V,
373 interpretation: &T,
374 quads: impl IntoIterator<
375 Item = Quad<&'a T::Resource, &'a T::Resource, &'a T::Resource, &'a T::Resource>,
376 >,
377 ) -> Result<Self, SerializationError>
378 where
379 V: Vocabulary<Iri = I, BlankId = B>,
380 T: ReverseTermInterpretation<Iri = I, BlankId = B, Literal = V::Literal>,
381 T::Resource: 'a + Ord + Hash,
382 I: Clone + Eq + Hash,
383 B: Clone + Eq + Hash,
384 {
385 Self::from_interpreted_quads_in(
386 vocabulary,
387 interpretation,
388 quads,
389 linked_data::Context::default(),
390 )
391 }
392}
393
394fn render_object<V, I>(
395 vocabulary: &V,
396 interpretation: &I,
397 rdf_terms: RdfTerms<&I::Resource>,
398 graph: &SerGraph<&I::Resource>,
399 id: &I::Resource,
400 resource: &SerResource<&I::Resource>,
401 context: linked_data::Context<I>,
402) -> Result<IndexedObject<V::Iri, V::BlankId>, SerializationError>
403where
404 V: Vocabulary,
405 I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
406 V::Iri: Clone + Eq + Hash,
407 V::BlankId: Clone + Eq + Hash,
408 I::Resource: Ord,
409{
410 let context = context.with_subject(id);
411 if resource.is_empty() {
412 render_reference(vocabulary, interpretation, id, context)
413 } else {
414 match &resource.list.values {
415 Some(values) => {
416 let mut objects = Vec::with_capacity(values.len());
417
418 for value in values {
419 objects.push(render_object_or_reference(
420 vocabulary,
421 interpretation,
422 rdf_terms,
423 graph,
424 value,
425 context,
426 )?);
427 }
428
429 Ok(Indexed::none(Object::List(List::new(objects))))
430 }
431 None => {
432 let mut node: Node<V::Iri, V::BlankId> = Node::new();
433
434 if let Some(id) = id_of(interpretation, id) {
435 node.id = Some(id)
436 }
437
438 let mut types = Vec::with_capacity(resource.types.len());
439 for ty in &resource.types {
440 let ty_resource = match ty {
441 RdfType::List => rdf_terms.list.unwrap(),
442 RdfType::Other(o) => o,
443 };
444
445 if let Some(ty_id) = id_of(interpretation, ty_resource) {
446 types.push(ty_id)
447 }
448 }
449
450 if !types.is_empty() {
451 node.types = Some(types);
452 }
453
454 if let Some(graph) = &resource.graph {
455 let mut value = crate::object::Graph::new();
456
457 for (id, resource) in &graph.resources {
458 if resource.references != 1 && !resource.is_empty() {
459 value.insert(render_object(
460 vocabulary,
461 interpretation,
462 rdf_terms,
463 graph,
464 id,
465 resource,
466 context,
467 )?);
468 }
469 }
470
471 node.graph = Some(value)
472 }
473
474 for (prop, objects) in &resource.properties {
475 insert_property(
476 vocabulary,
477 interpretation,
478 rdf_terms,
479 graph,
480 &mut node,
481 prop,
482 objects.iter().copied(),
483 context,
484 )?;
485 }
486
487 if !resource.list.first.is_empty() {
488 let rdf_first_id = rdf_terms.first.unwrap();
489 insert_property(
490 vocabulary,
491 interpretation,
492 rdf_terms,
493 graph,
494 &mut node,
495 rdf_first_id,
496 resource.list.first.iter().copied(),
497 context,
498 )?;
499 }
500
501 if !resource.list.rest.is_empty() {
502 let rdf_rest_id = rdf_terms.rest.unwrap();
503 insert_property(
504 vocabulary,
505 interpretation,
506 rdf_terms,
507 graph,
508 &mut node,
509 rdf_rest_id,
510 resource.list.rest.iter().copied(),
511 context,
512 )?;
513 }
514
515 Ok(Indexed::none(Object::node(node)))
516 }
517 }
518 }
519}
520
521#[allow(clippy::too_many_arguments)]
522fn insert_property<'a, V, I, O>(
523 vocabulary: &V,
524 interpretation: &I,
525 rdf_terms: RdfTerms<&'a I::Resource>,
526 graph: &SerGraph<&'a I::Resource>,
527 node: &mut Node<V::Iri, V::BlankId>,
528 prop: &I::Resource,
529 values: O,
530 context: linked_data::Context<I>,
531) -> Result<(), SerializationError>
532where
533 V: Vocabulary,
534 I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
535 V::Iri: Clone + Eq + Hash,
536 V::BlankId: Clone + Eq + Hash,
537 I::Resource: 'a + Ord,
538 O: IntoIterator<Item = &'a I::Resource>,
539 O::IntoIter: ExactSizeIterator,
540{
541 let context = context.with_predicate(prop);
542 match id_of(interpretation, prop) {
543 Some(prop) => {
544 let mut values = values.into_iter();
545
546 while values.len() > 1 {
547 let value = values.next().unwrap();
548 let v = render_object_or_reference(
549 vocabulary,
550 interpretation,
551 rdf_terms,
552 graph,
553 value,
554 context,
555 )?;
556 node.insert(prop.clone(), v);
557 }
558
559 if let Some(value) = values.next() {
560 let v = render_object_or_reference(
561 vocabulary,
562 interpretation,
563 rdf_terms,
564 graph,
565 value,
566 context,
567 )?;
568 node.insert(prop, v);
569 }
570
571 Ok(())
572 }
573 None => Ok(()),
574 }
575}
576
577fn render_object_or_reference<V, I>(
578 vocabulary: &V,
579 interpretation: &I,
580 rdf_terms: RdfTerms<&I::Resource>,
581 graph: &SerGraph<&I::Resource>,
582 id: &I::Resource,
583 context: linked_data::Context<I>,
584) -> Result<IndexedObject<V::Iri, V::BlankId>, SerializationError>
585where
586 V: Vocabulary,
587 I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
588 V::Iri: Clone + Eq + Hash,
589 V::BlankId: Clone + Eq + Hash,
590 I::Resource: Ord,
591{
592 match graph.get(&id) {
593 Some(resource) => {
594 if resource.references == 1 && !resource.is_empty() {
595 render_object(
596 vocabulary,
597 interpretation,
598 rdf_terms,
599 graph,
600 id,
601 resource,
602 context,
603 )
604 } else {
605 render_reference(vocabulary, interpretation, id, context)
606 }
607 }
608 None => render_reference(vocabulary, interpretation, id, context),
609 }
610}
611
612fn render_reference<V, I>(
613 vocabulary: &V,
614 interpretation: &I,
615 id: &I::Resource,
616 context: linked_data::Context<I>,
617) -> Result<IndexedObject<V::Iri, V::BlankId>, SerializationError>
618where
619 V: Vocabulary,
620 I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
621 V::Iri: Clone,
622 V::BlankId: Clone,
623 I::Resource: Ord,
624{
625 match term_of(vocabulary, interpretation, id, context)? {
626 Some(Term::Id(id)) => Ok(Indexed::none(Object::node(Node::with_id(id)))),
627 Some(Term::Literal(value)) => Ok(Indexed::none(Object::Value(value))),
628 None => Ok(Indexed::none(Object::node(Node::new()))),
629 }
630}
631
632fn id_of<T>(interpretation: &T, resource: &T::Resource) -> Option<Id<T::Iri, T::BlankId>>
633where
634 T: ReverseIdInterpretation,
635 T::Iri: Clone,
636 T::BlankId: Clone,
637{
638 interpretation
639 .iris_of(resource)
640 .next()
641 .map(|i| Id::Valid(ValidId::Iri(i.clone())))
642 .or_else(|| {
643 interpretation
644 .blank_ids_of(resource)
645 .next()
646 .map(|b| Id::Valid(ValidId::Blank(b.clone())))
647 })
648}
649
650type ResourceTerm<V> = Term<
651 Id<<V as IriVocabulary>::Iri, <V as BlankIdVocabulary>::BlankId>,
652 Value<<V as IriVocabulary>::Iri>,
653>;
654
655fn term_of<V, T>(
656 vocabulary: &V,
657 interpretation: &T,
658 resource: &T::Resource,
659 context: linked_data::Context<T>,
660) -> Result<Option<ResourceTerm<V>>, SerializationError>
661where
662 V: Vocabulary,
663 T: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
664 V::Iri: Clone,
665 V::BlankId: Clone,
666{
667 match id_of(interpretation, resource) {
668 Some(id) => Ok(Some(Term::Id(id))),
669 None => match interpretation.literals_of(resource).next() {
670 Some(l) => {
671 let l = vocabulary.literal(l).unwrap();
672 let value = match l.type_ {
673 LiteralTypeRef::Any(i) => {
674 let ty = vocabulary.iri(i).unwrap();
675 if ty == RDF_JSON {
676 let (json, _) =
677 json_syntax::Value::parse_str(l.value).map_err(|e| {
678 SerializationError::InvalidJson(
679 context.into_iris(vocabulary, interpretation),
680 e,
681 )
682 })?;
683 Value::Json(json)
684 } else if ty == XSD_BOOLEAN {
685 let b = match l.as_ref() {
686 "true" | "1" => true,
687 "false" | "0" => false,
688 other => {
689 return Err(SerializationError::InvalidBoolean(
690 context.into_iris(vocabulary, interpretation),
691 other.to_owned(),
692 ))
693 }
694 };
695
696 Value::Literal(Literal::Boolean(b), Some(i.clone()))
697 } else if ty == XSD_INTEGER || ty == XSD_DOUBLE {
698 let n = json_syntax::NumberBuf::from_str(l.as_str()).map_err(|_| {
699 SerializationError::Number(
700 context.into_iris(vocabulary, interpretation),
701 l.as_ref().to_owned(),
702 )
703 })?;
704 Value::Literal(Literal::Number(n), Some(i.clone()))
705 } else if ty == XSD_STRING {
706 Value::Literal(Literal::String(l.as_ref().into()), None)
707 } else {
708 Value::Literal(Literal::String(l.as_ref().into()), Some(i.clone()))
709 }
710 }
711 LiteralTypeRef::LangString(tag) => Value::LangString(
712 LangString::new(l.value.into(), Some(tag.to_owned().into()), None).unwrap(),
713 ),
714 };
715
716 Ok(Some(Term::Literal(value)))
717 }
718 None => Ok(None),
719 },
720 }
721}
722
723impl<V, I> LinkedDataDeserialize<V, I> for ExpandedDocument<V::Iri, V::BlankId>
724where
725 V: Vocabulary,
726 I: ReverseTermInterpretation<Iri = V::Iri, BlankId = V::BlankId, Literal = V::Literal>,
727 I::Resource: Ord + Hash,
728 V::Iri: Clone + Eq + Hash,
729 V::BlankId: Clone + Eq + Hash,
730{
731 fn deserialize_dataset_in(
732 vocabulary: &V,
733 interpretation: &I,
734 dataset: &(impl TraversableDataset<Resource = I::Resource> + PatternMatchingDataset),
735 context: linked_data::Context<I>,
736 ) -> Result<Self, FromLinkedDataError> {
737 Self::from_interpreted_quads(vocabulary, interpretation, dataset.quads()).map_err(|_| {
738 FromLinkedDataError::InvalidLiteral(context.into_iris(vocabulary, interpretation))
739 })
740 }
741}