relmath/provenance.rs
1//! Deterministic exact provenance support.
2//!
3//! In this first G3 slice, a provenance token or label is an explicit stable
4//! identifier attached directly to a stored fact. A witness is the exact,
5//! deterministic set of provenance tokens attached to one stored fact. A
6//! derived tuple is a tuple produced by later inference or algebraic
7//! derivation; this module does not compute derived tuples yet. The first
8//! explanation query in this module is
9//! [`crate::provenance::ProvenanceRelation::why`], which explains only stored
10//! base facts. When a fact is absent, `why` returns `None` rather than an
11//! empty [`crate::provenance::ProvenanceSet`]. No public rule API lives in
12//! this module yet; the first least-fixed-point rule layer is still
13//! documented as later G3 work. Relation-level exact support materialization
14//! for provenance surfaces is also available generically through
15//! [`crate::ExactSupport`].
16
17use std::collections::{BTreeMap, BTreeSet};
18
19use crate::{
20 BinaryRelation, NaryRelation, NaryRelationError, UnaryRelation, traits::FiniteRelation,
21};
22
23/// Deterministic provenance tokens attached to one stored fact.
24///
25/// `ProvenanceSet<P>` is the user-visible witness for a fact in this first G3
26/// slice. It is an exact `BTreeSet`-backed set of tokens or labels, not a
27/// derivation DAG.
28///
29/// # Examples
30///
31/// ```rust
32/// use relmath::provenance::{ProvenanceRelation, ProvenanceSet};
33///
34/// let empty = ProvenanceSet::<&str>::default();
35/// assert!(empty.is_empty());
36///
37/// let evidence = ProvenanceRelation::from_facts([
38/// (("BRCA1", "BreastCancer"), "paper_12"),
39/// (("BRCA1", "BreastCancer"), "curated_panel"),
40/// ]);
41/// let witness = evidence
42/// .why(&("BRCA1", "BreastCancer"))
43/// .expect("expected explanation");
44///
45/// assert_eq!(witness.len(), 2);
46/// assert!(witness.contains(&"paper_12"));
47/// assert!(witness.contains_token(&"curated_panel"));
48/// assert_eq!(
49/// witness.iter().copied().collect::<Vec<_>>(),
50/// vec!["curated_panel", "paper_12"]
51/// );
52/// assert_eq!(witness.to_vec(), vec!["curated_panel", "paper_12"]);
53/// ```
54#[derive(Clone, Debug, PartialEq, Eq)]
55pub struct ProvenanceSet<P: Ord> {
56 tokens: BTreeSet<P>,
57}
58
59impl<P: Ord> ProvenanceSet<P> {
60 /// Returns the number of provenance tokens in the set.
61 #[must_use]
62 pub fn len(&self) -> usize {
63 self.tokens.len()
64 }
65
66 /// Returns `true` when the provenance set contains no tokens.
67 #[must_use]
68 pub fn is_empty(&self) -> bool {
69 self.tokens.is_empty()
70 }
71
72 /// Returns `true` when the set contains the given token.
73 #[must_use]
74 pub fn contains(&self, token: &P) -> bool {
75 self.tokens.contains(token)
76 }
77
78 /// Returns `true` when the set contains the given provenance token.
79 #[must_use]
80 pub fn contains_token(&self, token: &P) -> bool {
81 self.contains(token)
82 }
83
84 /// Returns an iterator over provenance tokens in deterministic order.
85 pub fn iter(&self) -> impl Iterator<Item = &P> {
86 self.tokens.iter()
87 }
88
89 /// Converts the provenance set into a sorted vector of tokens.
90 #[must_use]
91 pub fn to_vec(&self) -> Vec<P>
92 where
93 P: Clone,
94 {
95 self.tokens.iter().cloned().collect()
96 }
97}
98
99impl<P: Ord> Default for ProvenanceSet<P> {
100 fn default() -> Self {
101 Self {
102 tokens: BTreeSet::new(),
103 }
104 }
105}
106
107/// Deterministic exact provenance attached to stored facts.
108///
109/// `ProvenanceRelation<F, P>` maps each stored fact to a deterministic set of
110/// provenance tokens. Repeated insertion of the same `(fact, token)` pair is
111/// idempotent. Inserting a new token for an existing fact combines provenance
112/// by exact set union.
113///
114/// In this first additive provenance slice, every stored fact is a base fact.
115/// The module does not yet derive new tuples. [`Self::why`] explains stored
116/// facts only; derived-tuple explanations and `why_not` queries remain later
117/// work.
118///
119/// # Examples
120///
121/// ```rust
122/// use relmath::{UnaryRelation, provenance::ProvenanceRelation};
123///
124/// let gene_disease = ProvenanceRelation::from_facts([
125/// (("BRCA1", "BreastCancer"), "paper_12"),
126/// (("BRCA1", "BreastCancer"), "curated_panel"),
127/// (("TP53", "BreastCancer"), "paper_77"),
128/// ]);
129///
130/// let supported = gene_disease.to_binary_relation();
131/// let brca1 = UnaryRelation::singleton("BRCA1");
132///
133/// assert_eq!(supported.image(&brca1).to_vec(), vec!["BreastCancer"]);
134/// assert_eq!(
135/// gene_disease
136/// .why(&("BRCA1", "BreastCancer"))
137/// .expect("expected explanation")
138/// .to_vec(),
139/// vec!["curated_panel", "paper_12"]
140/// );
141/// ```
142#[derive(Clone, Debug, PartialEq, Eq)]
143pub struct ProvenanceRelation<F: Ord, P: Ord> {
144 facts: BTreeMap<F, ProvenanceSet<P>>,
145}
146
147impl<F: Ord, P: Ord> ProvenanceRelation<F, P> {
148 /// Creates an empty provenance relation.
149 ///
150 /// # Examples
151 ///
152 /// ```rust
153 /// use relmath::provenance::ProvenanceRelation;
154 ///
155 /// let mut evidence = ProvenanceRelation::new();
156 ///
157 /// assert!(evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
158 /// assert!(!evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
159 /// assert!(evidence.contains_fact(&("BRCA1", "BreastCancer")));
160 /// assert_eq!(
161 /// evidence
162 /// .provenance_of(&("BRCA1", "BreastCancer"))
163 /// .expect("expected explanation")
164 /// .to_vec(),
165 /// vec!["paper_12"]
166 /// );
167 /// ```
168 #[must_use]
169 pub fn new() -> Self {
170 Self {
171 facts: BTreeMap::new(),
172 }
173 }
174
175 /// Creates a provenance relation from `(fact, token)` entries.
176 ///
177 /// Repeated facts accumulate tokens by exact set union.
178 #[must_use]
179 pub fn from_facts<I>(facts: I) -> Self
180 where
181 I: IntoIterator<Item = (F, P)>,
182 {
183 facts.into_iter().collect()
184 }
185
186 /// Inserts one provenance token for a fact.
187 ///
188 /// Returns `true` when the token was not already attached to the fact.
189 pub fn insert(&mut self, fact: F, token: P) -> bool {
190 self.facts.entry(fact).or_default().tokens.insert(token)
191 }
192
193 /// Returns `true` when the relation contains the given fact.
194 #[must_use]
195 pub fn contains_fact(&self, fact: &F) -> bool {
196 self.facts.contains_key(fact)
197 }
198
199 /// Returns why `fact` is present in this provenance relation.
200 ///
201 /// When `fact` is present, the returned witness is the deterministic exact
202 /// set of provenance tokens attached to that stored fact. When `fact` is
203 /// absent, this returns `None`.
204 ///
205 /// In this first G3 slice, every stored fact is a base fact with at least
206 /// one token, so `None` means the relation has no explanation for the fact
207 /// because the fact is absent.
208 ///
209 /// # Examples
210 ///
211 /// ```rust
212 /// use relmath::provenance::ProvenanceRelation;
213 ///
214 /// let evidence = ProvenanceRelation::from_facts([
215 /// (("BRCA1", "BreastCancer"), "paper_12"),
216 /// (("BRCA1", "BreastCancer"), "curated_panel"),
217 /// ]);
218 ///
219 /// let why = evidence
220 /// .why(&("BRCA1", "BreastCancer"))
221 /// .expect("expected explanation");
222 ///
223 /// assert!(why.contains_token(&"paper_12"));
224 /// assert!(why.contains_token(&"curated_panel"));
225 /// assert!(evidence.why(&("BRCA1", "Olaparib")).is_none());
226 /// ```
227 #[must_use]
228 pub fn why(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
229 self.facts.get(fact)
230 }
231
232 /// Returns the deterministic provenance set for one fact.
233 ///
234 /// Prefer [`Self::why`] for explanation-oriented queries.
235 #[must_use]
236 pub fn provenance_of(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
237 self.why(fact)
238 }
239
240 /// Returns an iterator over facts and provenance in deterministic order.
241 ///
242 /// # Examples
243 ///
244 /// ```rust
245 /// use relmath::provenance::ProvenanceRelation;
246 ///
247 /// let evidence = ProvenanceRelation::from_facts([
248 /// (("BRCA1", "BreastCancer"), "paper_12"),
249 /// (("BRCA1", "BreastCancer"), "curated_panel"),
250 /// (("TP53", "BreastCancer"), "paper_77"),
251 /// ]);
252 ///
253 /// assert_eq!(
254 /// evidence
255 /// .iter()
256 /// .map(|(fact, witness)| (*fact, witness.to_vec()))
257 /// .collect::<Vec<_>>(),
258 /// vec![
259 /// (("BRCA1", "BreastCancer"), vec!["curated_panel", "paper_12"]),
260 /// (("TP53", "BreastCancer"), vec!["paper_77"]),
261 /// ]
262 /// );
263 /// ```
264 pub fn iter(&self) -> impl Iterator<Item = (&F, &ProvenanceSet<P>)> {
265 self.facts.iter()
266 }
267
268 /// Returns the exact support relation of stored facts as a unary relation.
269 ///
270 /// This support forgets provenance and keeps only which facts are present.
271 /// Generic code can access the same relation-level boundary through
272 /// [`crate::ExactSupport::exact_support`].
273 ///
274 /// # Examples
275 ///
276 /// ```rust
277 /// use relmath::provenance::ProvenanceRelation;
278 ///
279 /// let evidence = ProvenanceRelation::from_facts([
280 /// (("BRCA1", "BreastCancer"), "paper_12"),
281 /// (("BRCA1", "BreastCancer"), "curated_panel"),
282 /// (("TP53", "BreastCancer"), "paper_77"),
283 /// ]);
284 ///
285 /// assert_eq!(
286 /// evidence.support().to_vec(),
287 /// vec![("BRCA1", "BreastCancer"), ("TP53", "BreastCancer")]
288 /// );
289 /// ```
290 #[must_use]
291 pub fn support(&self) -> UnaryRelation<F>
292 where
293 F: Clone,
294 {
295 self.facts.keys().cloned().collect()
296 }
297}
298
299impl<F: Ord, P: Ord> Default for ProvenanceRelation<F, P> {
300 fn default() -> Self {
301 Self::new()
302 }
303}
304
305impl<F: Ord, P: Ord> FiniteRelation for ProvenanceRelation<F, P> {
306 fn len(&self) -> usize {
307 self.facts.len()
308 }
309}
310
311impl<F: Ord, P: Ord> FromIterator<(F, P)> for ProvenanceRelation<F, P> {
312 fn from_iter<I: IntoIterator<Item = (F, P)>>(iter: I) -> Self {
313 let mut relation = Self::new();
314 relation.extend(iter);
315 relation
316 }
317}
318
319impl<F: Ord, P: Ord> Extend<(F, P)> for ProvenanceRelation<F, P> {
320 fn extend<I: IntoIterator<Item = (F, P)>>(&mut self, iter: I) {
321 for (fact, token) in iter {
322 self.insert(fact, token);
323 }
324 }
325}
326
327impl<T: Ord + Clone, P: Ord> ProvenanceRelation<T, P> {
328 /// Converts scalar facts into a unary relation support set.
329 ///
330 /// Generic code can access the same unary materialization boundary through
331 /// [`crate::ToExactUnaryRelation`].
332 ///
333 /// # Examples
334 ///
335 /// ```rust
336 /// use relmath::provenance::ProvenanceRelation;
337 ///
338 /// let concepts = ProvenanceRelation::from_facts([
339 /// ("Relations", "lecture_1"),
340 /// ("Relations", "worksheet"),
341 /// ("Closure", "lecture_2"),
342 /// ]);
343 ///
344 /// assert_eq!(concepts.to_unary_relation().to_vec(), vec!["Closure", "Relations"]);
345 /// ```
346 #[must_use]
347 pub fn to_unary_relation(&self) -> UnaryRelation<T> {
348 self.support()
349 }
350}
351
352impl<A: Ord + Clone, B: Ord + Clone, P: Ord> ProvenanceRelation<(A, B), P> {
353 /// Converts pair facts into a binary relation support set.
354 ///
355 /// Generic code can access the same binary materialization boundary
356 /// through [`crate::ToExactBinaryRelation`].
357 ///
358 /// # Examples
359 ///
360 /// ```rust
361 /// use relmath::provenance::ProvenanceRelation;
362 ///
363 /// let evidence = ProvenanceRelation::from_facts([
364 /// (("Alice", "Reader"), "directory"),
365 /// (("Bob", "Editor"), "directory"),
366 /// ]);
367 ///
368 /// assert_eq!(
369 /// evidence.to_binary_relation().to_vec(),
370 /// vec![("Alice", "Reader"), ("Bob", "Editor")]
371 /// );
372 /// ```
373 #[must_use]
374 pub fn to_binary_relation(&self) -> BinaryRelation<A, B> {
375 self.facts.keys().cloned().collect()
376 }
377}
378
379impl<T: Ord + Clone, P: Ord> ProvenanceRelation<Vec<T>, P> {
380 /// Converts row facts into an n-ary relation with the given schema.
381 ///
382 /// The explicit schema preserves the current exact n-ary boundary where row
383 /// values do not themselves encode column names. This method reuses the
384 /// current `NaryRelation` validation rules, so duplicate or blank column
385 /// names and row-arity mismatches return `NaryRelationError`. Generic code
386 /// can access the same n-ary materialization boundary through
387 /// [`crate::ToExactNaryRelation`].
388 ///
389 /// # Examples
390 ///
391 /// ```rust
392 /// use relmath::provenance::ProvenanceRelation;
393 ///
394 /// let rows = ProvenanceRelation::from_facts([
395 /// (vec!["Alice", "Math", "passed"], "gradebook"),
396 /// (vec!["Alice", "Math", "passed"], "reviewed"),
397 /// (vec!["Bob", "Physics", "passed"], "gradebook"),
398 /// ]);
399 ///
400 /// let relation = rows
401 /// .to_nary_relation(["student", "course", "status"])
402 /// .expect("expected valid support relation");
403 ///
404 /// assert_eq!(
405 /// relation.to_rows(),
406 /// vec![
407 /// vec!["Alice", "Math", "passed"],
408 /// vec!["Bob", "Physics", "passed"],
409 /// ]
410 /// );
411 /// ```
412 pub fn to_nary_relation<I, S>(&self, schema: I) -> Result<NaryRelation<T>, NaryRelationError>
413 where
414 I: IntoIterator<Item = S>,
415 S: Into<String>,
416 {
417 NaryRelation::from_rows(schema, self.facts.keys().cloned())
418 }
419}