grdf/utils/isomorphism/
btree.rs

1use super::blank_term_matches;
2use crate::{Dataset, Quad};
3use educe::Educe;
4use rdf_types::{AsRdfTerm, BlankIdBuf, Id};
5use std::collections::btree_map::Entry;
6use std::collections::{BTreeMap, BTreeSet};
7
8pub(crate) trait FindBTreeBlankIdBijection<V: Dataset>: Dataset {
9	fn blank_quad_matches<'u, 'v, UI, UB, UL, VI, VB, VL>(
10		a: Quad<&'u Self::Subject, &'u Self::Predicate, &'u Self::Object, &'u Self::GraphLabel>,
11		b: Quad<&'v V::Subject, &'v V::Predicate, &'v V::Object, &'v V::GraphLabel>,
12	) -> bool
13	where
14		Self::Subject: AsRdfTerm<UI, UB, UL>,
15		Self::Predicate: AsRdfTerm<UI, UB, UL>,
16		Self::Object: AsRdfTerm<UI, UB, UL>,
17		Self::GraphLabel: AsRdfTerm<UI, UB, UL>,
18		V::Subject: AsRdfTerm<VI, VB, VL>,
19		V::Predicate: AsRdfTerm<VI, VB, VL>,
20		V::Object: AsRdfTerm<VI, VB, VL>,
21		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
22		UI: PartialEq<VI>,
23		UL: PartialEq<VL>;
24
25	fn blank_quad_matches_with<'u, 'v, UI, UB, UL, VI, VB, VL>(
26		sigma: &BTreeBijection<'u, 'v, UB, VB>,
27		a: Quad<&'u Self::Subject, &'u Self::Predicate, &'u Self::Object, &'u Self::GraphLabel>,
28		b: Quad<&'v V::Subject, &'v V::Predicate, &'v V::Object, &'v V::GraphLabel>,
29	) -> bool
30	where
31		Self::Subject: AsRdfTerm<UI, UB, UL>,
32		Self::Predicate: AsRdfTerm<UI, UB, UL>,
33		Self::Object: AsRdfTerm<UI, UB, UL>,
34		Self::GraphLabel: AsRdfTerm<UI, UB, UL>,
35		V::Subject: AsRdfTerm<VI, VB, VL>,
36		V::Predicate: AsRdfTerm<VI, VB, VL>,
37		V::Object: AsRdfTerm<VI, VB, VL>,
38		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
39		UI: PartialEq<VI>,
40		UL: PartialEq<VL>,
41		UB: Ord,
42		VB: Ord;
43
44	/// Find a blank node identifier substitution from the blank no identifiers of `a` to the blank node identifiers of `b`
45	/// so that the sub-dataset of `a` containing all the quads with blank node identifier
46	/// is equal to the sub-dataset of `b` containing all the quads with a blank node identifier
47	/// once the substitution if applied.
48	///
49	/// The function ignores all the quads having no blank node identifiers.
50	#[allow(clippy::type_complexity)]
51	fn find_btree_blank_id_bijection<'u, 'v, UI: 'u, UB, UL: 'u, VI: 'v, VB, VL: 'v>(
52		a: &'u Self,
53		b: &'v V,
54	) -> Option<BTreeBijection<'u, 'v, UB, VB>>
55	where
56		Self::Subject: Ord + AsRdfTerm<UI, UB, UL>,
57		Self::Predicate: Ord + AsRdfTerm<UI, UB, UL>,
58		Self::Object: Ord + AsRdfTerm<UI, UB, UL>,
59		Self::GraphLabel: Ord + AsRdfTerm<UI, UB, UL>,
60		V::Subject: Ord + AsRdfTerm<VI, VB, VL>,
61		V::Predicate: Ord + AsRdfTerm<VI, VB, VL>,
62		V::Object: Ord + AsRdfTerm<VI, VB, VL>,
63		V::GraphLabel: Ord + AsRdfTerm<VI, VB, VL>,
64		UI: PartialEq<VI>,
65		UL: PartialEq<VL>,
66		UB: Ord,
67		VB: Ord;
68
69	#[allow(clippy::type_complexity)]
70	fn find_from_candidates<'a, 'b, UI, UB, UL, VI, VB, VL>(
71		candidates: std::collections::btree_map::Iter<&'a UB, BTreeSet<&'b VB>>,
72		sigma: BTreeBijection<'a, 'b, UB, VB>,
73		a: &BTreeMap<&'a UB, BlankSignature<'a, Self>>,
74		b: &BTreeMap<&'b VB, BlankSignature<'b, V>>,
75	) -> Option<BTreeBijection<'a, 'b, UB, VB>>
76	where
77		Self::Subject: AsRdfTerm<UI, UB, UL>,
78		Self::Predicate: AsRdfTerm<UI, UB, UL>,
79		Self::Object: AsRdfTerm<UI, UB, UL>,
80		Self::GraphLabel: AsRdfTerm<UI, UB, UL>,
81		V::Subject: AsRdfTerm<VI, VB, VL>,
82		V::Predicate: AsRdfTerm<VI, VB, VL>,
83		V::Object: AsRdfTerm<VI, VB, VL>,
84		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
85		UI: PartialEq<VI>,
86		UL: PartialEq<VL>,
87		UB: Ord,
88		VB: Ord;
89}
90
91impl<U: Dataset, V: Dataset> FindBTreeBlankIdBijection<V> for U {
92	fn blank_quad_matches<'u, 'v, UI, UB, UL, VI, VB, VL>(
93		a: Quad<&'u U::Subject, &'u U::Predicate, &'u U::Object, &'u U::GraphLabel>,
94		b: Quad<&'v V::Subject, &'v V::Predicate, &'v V::Object, &'v V::GraphLabel>,
95	) -> bool
96	where
97		U::Subject: AsRdfTerm<UI, UB, UL>,
98		U::Predicate: AsRdfTerm<UI, UB, UL>,
99		U::Object: AsRdfTerm<UI, UB, UL>,
100		U::GraphLabel: AsRdfTerm<UI, UB, UL>,
101		V::Subject: AsRdfTerm<VI, VB, VL>,
102		V::Predicate: AsRdfTerm<VI, VB, VL>,
103		V::Object: AsRdfTerm<VI, VB, VL>,
104		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
105		UI: PartialEq<VI>,
106		UL: PartialEq<VL>,
107	{
108		blank_term_matches(a.subject().as_rdf_term(), b.subject().as_rdf_term())
109			&& blank_term_matches(a.predicate().as_rdf_term(), b.predicate().as_rdf_term())
110			&& blank_term_matches(a.object().as_rdf_term(), b.object().as_rdf_term())
111			&& match (a.graph(), b.graph()) {
112				(Some(a), Some(b)) => blank_term_matches(a.as_rdf_term(), b.as_rdf_term()),
113				(None, None) => true,
114				_ => false,
115			}
116	}
117
118	fn blank_quad_matches_with<'u, 'v, UI, UB, UL, VI, VB, VL>(
119		sigma: &BTreeBijection<'u, 'v, UB, VB>,
120		a: Quad<&'u U::Subject, &'u U::Predicate, &'u U::Object, &'u U::GraphLabel>,
121		b: Quad<&'v V::Subject, &'v V::Predicate, &'v V::Object, &'v V::GraphLabel>,
122	) -> bool
123	where
124		U::Subject: AsRdfTerm<UI, UB, UL>,
125		U::Predicate: AsRdfTerm<UI, UB, UL>,
126		U::Object: AsRdfTerm<UI, UB, UL>,
127		U::GraphLabel: AsRdfTerm<UI, UB, UL>,
128		V::Subject: AsRdfTerm<VI, VB, VL>,
129		V::Predicate: AsRdfTerm<VI, VB, VL>,
130		V::Object: AsRdfTerm<VI, VB, VL>,
131		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
132		UI: PartialEq<VI>,
133		UL: PartialEq<VL>,
134		UB: Ord,
135		VB: Ord,
136	{
137		blank_term_matches_with(sigma, a.subject().as_rdf_term(), b.subject().as_rdf_term())
138			&& blank_term_matches_with(
139				sigma,
140				a.predicate().as_rdf_term(),
141				b.predicate().as_rdf_term(),
142			) && blank_term_matches_with(sigma, a.object().as_rdf_term(), b.object().as_rdf_term())
143			&& match (a.graph(), b.graph()) {
144				(Some(a), Some(b)) => {
145					blank_term_matches_with(sigma, a.as_rdf_term(), b.as_rdf_term())
146				}
147				(None, None) => true,
148				_ => false,
149			}
150	}
151
152	fn find_from_candidates<'u, 'v, UI, UB, UL, VI, VB, VL>(
153		mut candidates: std::collections::btree_map::Iter<&'u UB, BTreeSet<&'v VB>>,
154		sigma: BTreeBijection<'u, 'v, UB, VB>,
155		a: &BTreeMap<&'u UB, BlankSignature<'u, U>>,
156		b: &BTreeMap<&'v VB, BlankSignature<'v, V>>,
157	) -> Option<BTreeBijection<'u, 'v, UB, VB>>
158	where
159		U: FindBTreeBlankIdBijection<V>,
160		U::Subject: AsRdfTerm<UI, UB, UL>,
161		U::Predicate: AsRdfTerm<UI, UB, UL>,
162		U::Object: AsRdfTerm<UI, UB, UL>,
163		U::GraphLabel: AsRdfTerm<UI, UB, UL>,
164		V::Subject: AsRdfTerm<VI, VB, VL>,
165		V::Predicate: AsRdfTerm<VI, VB, VL>,
166		V::Object: AsRdfTerm<VI, VB, VL>,
167		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
168		UI: PartialEq<VI>,
169		UL: PartialEq<VL>,
170		UB: Ord,
171		VB: Ord,
172	{
173		match candidates.next() {
174			Some((a_blank_id, b_candidates)) => {
175				for b_candidate in b_candidates {
176					if !sigma.backward.contains_key(b_candidate) {
177						// eprintln!("analyzing candidate {} for {}", b_candidate, a_blank_id);
178
179						let mut new_sigma = sigma.clone();
180						new_sigma.insert(a_blank_id, b_candidate);
181						if a.get(a_blank_id)
182							.unwrap()
183							.matches_with(&new_sigma, b.get(b_candidate).unwrap())
184						{
185							// eprintln!("this is a valid candidate. continuing.");
186							if let Some(final_sigma) =
187								Self::find_from_candidates(candidates.clone(), new_sigma, a, b)
188							{
189								return Some(final_sigma);
190							}
191							// eprintln!("it didn't work out in the end. next candidate for {}.", a_blank_id);
192						}
193					}
194				}
195
196				// eprintln!("no valid candidate for {}", a_blank_id);
197				None
198			}
199			None => Some(sigma),
200		}
201	}
202
203	fn find_btree_blank_id_bijection<'u, 'v, UI: 'u, UB, UL: 'u, VI: 'v, VB, VL: 'v>(
204		a: &'u U,
205		b: &'v V,
206	) -> Option<BTreeBijection<'u, 'v, UB, VB>>
207	where
208		U::Subject: Ord + AsRdfTerm<UI, UB, UL>,
209		U::Predicate: Ord + AsRdfTerm<UI, UB, UL>,
210		U::Object: Ord + AsRdfTerm<UI, UB, UL>,
211		U::GraphLabel: Ord + AsRdfTerm<UI, UB, UL>,
212		V::Subject: Ord + AsRdfTerm<VI, VB, VL>,
213		V::Predicate: Ord + AsRdfTerm<VI, VB, VL>,
214		V::Object: Ord + AsRdfTerm<VI, VB, VL>,
215		V::GraphLabel: Ord + AsRdfTerm<VI, VB, VL>,
216		UI: PartialEq<VI>,
217		UL: PartialEq<VL>,
218		UB: Ord,
219		VB: Ord,
220	{
221		fn add_blank_quad<'d, D: Dataset, B: Ord>(
222			blanks: &mut BTreeMap<&'d B, BlankSignature<'d, D>>,
223			blank_id: &'d B,
224			quad: Quad<&'d D::Subject, &'d D::Predicate, &'d D::Object, &'d D::GraphLabel>,
225		) where
226			D::Subject: Ord,
227			D::Predicate: Ord,
228			D::Object: Ord,
229			D::GraphLabel: Ord,
230		{
231			match blanks.entry(blank_id) {
232				Entry::Vacant(entry) => {
233					entry.insert(BlankSignature::new(quad));
234				}
235				Entry::Occupied(mut entry) => entry.get_mut().insert(quad),
236			}
237		}
238
239		fn collect_signatures<'d, D: Dataset, I: 'd, B, L: 'd>(
240			map: &mut BTreeMap<&'d B, BlankSignature<'d, D>>,
241			ds: &'d D,
242		) where
243			D::Subject: Ord + AsRdfTerm<I, B, L>,
244			D::Predicate: Ord + AsRdfTerm<I, B, L>,
245			D::Object: Ord + AsRdfTerm<I, B, L>,
246			D::GraphLabel: Ord + AsRdfTerm<I, B, L>,
247			B: Ord,
248		{
249			for quad @ Quad(subject, predicate, object, graph) in ds.quads() {
250				let subject = subject.as_rdf_term();
251				let predicate = predicate.as_rdf_term();
252				let object = object.as_rdf_term();
253
254				if let Some(blank_id) = subject.as_blank() {
255					add_blank_quad(map, blank_id, quad);
256				}
257
258				if let Some(blank_id) = predicate.as_blank() {
259					add_blank_quad(map, blank_id, quad);
260				}
261
262				if let Some(blank_id) = object.as_blank() {
263					add_blank_quad(map, blank_id, quad);
264				}
265
266				if let Some(graph) = graph {
267					if let Some(blank_id) = graph.as_rdf_term().as_blank() {
268						add_blank_quad(map, blank_id, quad);
269					}
270				}
271			}
272		}
273
274		fn split_by_size<'s, 'd, D: Dataset, B: Ord>(
275			blanks: &'s BTreeMap<&'d B, BlankSignature<'d, D>>,
276		) -> BTreeMap<usize, BTreeMap<&'d B, &'s BlankSignature<'d, D>>> {
277			let mut result = BTreeMap::new();
278
279			for (&blank_id, sig) in blanks {
280				match result.entry(sig.len()) {
281					Entry::Vacant(entry) => {
282						let mut map = BTreeMap::new();
283						map.insert(blank_id, sig);
284						entry.insert(map);
285					}
286					Entry::Occupied(mut entry) => {
287						entry.get_mut().insert(blank_id, sig);
288					}
289				}
290			}
291
292			result
293		}
294
295		// Step 1: collect signatures.
296		let mut a_blanks_map = BTreeMap::new();
297		let mut b_blanks_map = BTreeMap::new();
298		collect_signatures(&mut a_blanks_map, a);
299		collect_signatures(&mut b_blanks_map, b);
300
301		if a_blanks_map.len() == b_blanks_map.len() {
302			// Step 2: split by sizes.
303			let a_groups = split_by_size(&a_blanks_map);
304			let b_groups = split_by_size(&b_blanks_map);
305
306			if a_groups.len() == b_groups.len() {
307				if a_groups.iter().all(|(len, _)| b_groups.contains_key(len)) {
308					// Step 3: find candidates for each blank id.
309					let mut candidates = BTreeMap::new();
310					for (len, a_group) in a_groups {
311						let b_group = b_groups.get(&len).unwrap();
312
313						for (a_blank_id, a_sig) in a_group {
314							let mut a_blank_id_candidates = BTreeSet::new();
315							for (b_blank_id, b_sig) in b_group {
316								if a_sig.matches(b_sig) {
317									a_blank_id_candidates.insert(*b_blank_id);
318								}
319							}
320
321							if a_blank_id_candidates.is_empty() {
322								// eprintln!("no candidates found for {}", a_blank_id);
323								return None;
324							}
325
326							candidates.insert(a_blank_id, a_blank_id_candidates);
327						}
328					}
329
330					Self::find_from_candidates(
331						candidates.iter(),
332						BTreeBijection::new(),
333						&a_blanks_map,
334						&b_blanks_map,
335					)
336				} else {
337					// eprintln!("different group lengths");
338					None
339				}
340			} else {
341				// eprintln!("different group count");
342				None
343			}
344		} else {
345			// eprintln!("different blank node count");
346			None
347		}
348	}
349}
350
351/// Blank node identifier bijection
352/// between two (isomorphic) datasets.
353#[derive(Educe)]
354#[educe(Clone)]
355pub struct BTreeBijection<'a, 'b, A: ?Sized = BlankIdBuf, B: ?Sized = BlankIdBuf> {
356	pub forward: BTreeMap<&'a A, &'b B>,
357	pub backward: BTreeMap<&'b B, &'a A>,
358}
359
360impl<'a, 'b, A: ?Sized, B: ?Sized> BTreeBijection<'a, 'b, A, B> {
361	fn new() -> Self {
362		Self {
363			forward: BTreeMap::new(),
364			backward: BTreeMap::new(),
365		}
366	}
367
368	fn insert(&mut self, a: &'a A, b: &'b B)
369	where
370		A: Ord,
371		B: Ord,
372	{
373		self.forward.insert(a, b);
374		self.backward.insert(b, a);
375	}
376}
377
378#[allow(clippy::type_complexity)]
379pub(crate) struct BlankSignature<'a, A: ?Sized + Dataset>(
380	BTreeSet<Quad<&'a A::Subject, &'a A::Predicate, &'a A::Object, &'a A::GraphLabel>>,
381);
382
383impl<'a, U: Dataset> BlankSignature<'a, U> {
384	pub fn new(
385		quad: Quad<&'a U::Subject, &'a U::Predicate, &'a U::Object, &'a U::GraphLabel>,
386	) -> Self
387	where
388		U::Subject: Ord,
389		U::Predicate: Ord,
390		U::Object: Ord,
391		U::GraphLabel: Ord,
392	{
393		Self(Some(quad).into_iter().collect())
394	}
395
396	pub fn insert(
397		&mut self,
398		quad: Quad<&'a U::Subject, &'a U::Predicate, &'a U::Object, &'a U::GraphLabel>,
399	) where
400		U::Subject: Ord,
401		U::Predicate: Ord,
402		U::Object: Ord,
403		U::GraphLabel: Ord,
404	{
405		self.0.insert(quad);
406	}
407
408	pub fn len(&self) -> usize {
409		self.0.len()
410	}
411
412	pub fn matches<V: Dataset, UI, UB, UL, VI, VB, VL>(&self, other: &BlankSignature<V>) -> bool
413	where
414		U: FindBTreeBlankIdBijection<V>,
415		U::Subject: AsRdfTerm<UI, UB, UL>,
416		U::Predicate: AsRdfTerm<UI, UB, UL>,
417		U::Object: AsRdfTerm<UI, UB, UL>,
418		U::GraphLabel: AsRdfTerm<UI, UB, UL>,
419		V::Subject: AsRdfTerm<VI, VB, VL>,
420		V::Predicate: AsRdfTerm<VI, VB, VL>,
421		V::Object: AsRdfTerm<VI, VB, VL>,
422		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
423		UI: PartialEq<VI>,
424		UL: PartialEq<VL>,
425		UB: Ord,
426		VB: Ord,
427	{
428		if self.len() == other.len() {
429			let mut other: Vec<_> = other.0.iter().map(|q| Some(*q)).collect();
430			'next_quad: for quad in &self.0 {
431				for other_quad in &mut other {
432					if let Some(oq) = other_quad {
433						if U::blank_quad_matches(*quad, *oq) {
434							other_quad.take();
435							continue 'next_quad;
436						}
437					}
438				}
439
440				return false;
441			}
442
443			true
444		} else {
445			false
446		}
447	}
448
449	pub fn matches_with<'b, V: Dataset, UI, UB, UL, VI, VB, VL>(
450		&self,
451		sigma: &BTreeBijection<'a, 'b, UB, VB>,
452		other: &BlankSignature<'b, V>,
453	) -> bool
454	where
455		U: FindBTreeBlankIdBijection<V>,
456		U::Subject: AsRdfTerm<UI, UB, UL>,
457		U::Predicate: AsRdfTerm<UI, UB, UL>,
458		U::Object: AsRdfTerm<UI, UB, UL>,
459		U::GraphLabel: AsRdfTerm<UI, UB, UL>,
460		V::Subject: AsRdfTerm<VI, VB, VL>,
461		V::Predicate: AsRdfTerm<VI, VB, VL>,
462		V::Object: AsRdfTerm<VI, VB, VL>,
463		V::GraphLabel: AsRdfTerm<VI, VB, VL>,
464		UI: PartialEq<VI>,
465		UL: PartialEq<VL>,
466		UB: Ord,
467		VB: Ord,
468	{
469		if self.len() == other.len() {
470			let mut other: Vec<_> = other.0.iter().map(|q| Some(*q)).collect();
471			'next_quad: for quad in &self.0 {
472				for other_quad in &mut other {
473					if let Some(oq) = other_quad {
474						if U::blank_quad_matches_with(sigma, *quad, *oq) {
475							// eprintln!("matching {} with {}", quad, oq);
476							other_quad.take();
477							continue 'next_quad;
478						}
479					}
480				}
481
482				// eprintln!("could not match {} with anything", quad);
483				return false;
484			}
485
486			true
487		} else {
488			false
489		}
490	}
491}
492
493fn blank_term_matches_with<'u, 'v, UI, UB, UL, VI, VB, VL>(
494	sigma: &BTreeBijection<'u, 'v, UB, VB>,
495	a: rdf_types::Term<Id<&'u UI, &'u UB>, &'u UL>,
496	b: rdf_types::Term<Id<&'v VI, &'v VB>, &'v VL>,
497) -> bool
498where
499	UI: PartialEq<VI>,
500	UL: PartialEq<VL>,
501	UB: Ord,
502	VB: Ord,
503{
504	let r = match (a, b) {
505		(rdf_types::Term::Id(Id::Blank(a)), rdf_types::Term::Id(Id::Blank(b))) => {
506			match sigma.forward.get(a) {
507				Some(&c) => c == b,
508				None => match sigma.backward.get(b) {
509					Some(&c) => a == c,
510					None => true,
511				},
512			}
513		}
514		(rdf_types::Term::Id(Id::Iri(a)), rdf_types::Term::Id(Id::Iri(b))) => a == b,
515		(rdf_types::Term::Literal(a), rdf_types::Term::Literal(b)) => a == b,
516		_ => false,
517	};
518
519	r
520}