Skip to main content

relmath/
provenance.rs

1//! Deterministic exact provenance support.
2//!
3//! In this first G3 slice, a provenance token or label is an explicit stable
4//! identifier attached directly to a stored fact. A witness is the exact,
5//! deterministic set of provenance tokens attached to one stored fact. A
6//! derived tuple is a tuple produced by later inference or algebraic
7//! derivation; this module does not compute derived tuples yet. The first
8//! explanation query in this module is
9//! [`crate::provenance::ProvenanceRelation::why`], which explains only stored
10//! base facts. When a fact is absent, `why` returns `None` rather than an
11//! empty [`crate::provenance::ProvenanceSet`]. No public rule API lives in
12//! this module yet; the first least-fixed-point rule layer is still
13//! documented as later G3 work.
14
15use std::collections::{BTreeMap, BTreeSet};
16
17use crate::{
18    BinaryRelation, NaryRelation, NaryRelationError, UnaryRelation, traits::FiniteRelation,
19};
20
21/// Deterministic provenance tokens attached to one stored fact.
22///
23/// `ProvenanceSet<P>` is the user-visible witness for a fact in this first G3
24/// slice. It is an exact `BTreeSet`-backed set of tokens or labels, not a
25/// derivation DAG.
26///
27/// # Examples
28///
29/// ```rust
30/// use relmath::provenance::{ProvenanceRelation, ProvenanceSet};
31///
32/// let empty = ProvenanceSet::<&str>::default();
33/// assert!(empty.is_empty());
34///
35/// let evidence = ProvenanceRelation::from_facts([
36///     (("BRCA1", "BreastCancer"), "paper_12"),
37///     (("BRCA1", "BreastCancer"), "curated_panel"),
38/// ]);
39/// let witness = evidence
40///     .why(&("BRCA1", "BreastCancer"))
41///     .expect("expected explanation");
42///
43/// assert_eq!(witness.len(), 2);
44/// assert!(witness.contains(&"paper_12"));
45/// assert!(witness.contains_token(&"curated_panel"));
46/// assert_eq!(
47///     witness.iter().copied().collect::<Vec<_>>(),
48///     vec!["curated_panel", "paper_12"]
49/// );
50/// assert_eq!(witness.to_vec(), vec!["curated_panel", "paper_12"]);
51/// ```
52#[derive(Clone, Debug, PartialEq, Eq)]
53pub struct ProvenanceSet<P: Ord> {
54    tokens: BTreeSet<P>,
55}
56
57impl<P: Ord> ProvenanceSet<P> {
58    /// Returns the number of provenance tokens in the set.
59    #[must_use]
60    pub fn len(&self) -> usize {
61        self.tokens.len()
62    }
63
64    /// Returns `true` when the provenance set contains no tokens.
65    #[must_use]
66    pub fn is_empty(&self) -> bool {
67        self.tokens.is_empty()
68    }
69
70    /// Returns `true` when the set contains the given token.
71    #[must_use]
72    pub fn contains(&self, token: &P) -> bool {
73        self.tokens.contains(token)
74    }
75
76    /// Returns `true` when the set contains the given provenance token.
77    #[must_use]
78    pub fn contains_token(&self, token: &P) -> bool {
79        self.contains(token)
80    }
81
82    /// Returns an iterator over provenance tokens in deterministic order.
83    pub fn iter(&self) -> impl Iterator<Item = &P> {
84        self.tokens.iter()
85    }
86
87    /// Converts the provenance set into a sorted vector of tokens.
88    #[must_use]
89    pub fn to_vec(&self) -> Vec<P>
90    where
91        P: Clone,
92    {
93        self.tokens.iter().cloned().collect()
94    }
95}
96
97impl<P: Ord> Default for ProvenanceSet<P> {
98    fn default() -> Self {
99        Self {
100            tokens: BTreeSet::new(),
101        }
102    }
103}
104
105/// Deterministic exact provenance attached to stored facts.
106///
107/// `ProvenanceRelation<F, P>` maps each stored fact to a deterministic set of
108/// provenance tokens. Repeated insertion of the same `(fact, token)` pair is
109/// idempotent. Inserting a new token for an existing fact combines provenance
110/// by exact set union.
111///
112/// In this first additive provenance slice, every stored fact is a base fact.
113/// The module does not yet derive new tuples. [`Self::why`] explains stored
114/// facts only; derived-tuple explanations and `why_not` queries remain later
115/// work.
116///
117/// # Examples
118///
119/// ```rust
120/// use relmath::{UnaryRelation, provenance::ProvenanceRelation};
121///
122/// let gene_disease = ProvenanceRelation::from_facts([
123///     (("BRCA1", "BreastCancer"), "paper_12"),
124///     (("BRCA1", "BreastCancer"), "curated_panel"),
125///     (("TP53", "BreastCancer"), "paper_77"),
126/// ]);
127///
128/// let supported = gene_disease.to_binary_relation();
129/// let brca1 = UnaryRelation::singleton("BRCA1");
130///
131/// assert_eq!(supported.image(&brca1).to_vec(), vec!["BreastCancer"]);
132/// assert_eq!(
133///     gene_disease
134///         .why(&("BRCA1", "BreastCancer"))
135///         .expect("expected explanation")
136///         .to_vec(),
137///     vec!["curated_panel", "paper_12"]
138/// );
139/// ```
140#[derive(Clone, Debug, PartialEq, Eq)]
141pub struct ProvenanceRelation<F: Ord, P: Ord> {
142    facts: BTreeMap<F, ProvenanceSet<P>>,
143}
144
145impl<F: Ord, P: Ord> ProvenanceRelation<F, P> {
146    /// Creates an empty provenance relation.
147    ///
148    /// # Examples
149    ///
150    /// ```rust
151    /// use relmath::provenance::ProvenanceRelation;
152    ///
153    /// let mut evidence = ProvenanceRelation::new();
154    ///
155    /// assert!(evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
156    /// assert!(!evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
157    /// assert!(evidence.contains_fact(&("BRCA1", "BreastCancer")));
158    /// assert_eq!(
159    ///     evidence
160    ///         .provenance_of(&("BRCA1", "BreastCancer"))
161    ///         .expect("expected explanation")
162    ///         .to_vec(),
163    ///     vec!["paper_12"]
164    /// );
165    /// ```
166    #[must_use]
167    pub fn new() -> Self {
168        Self {
169            facts: BTreeMap::new(),
170        }
171    }
172
173    /// Creates a provenance relation from `(fact, token)` entries.
174    ///
175    /// Repeated facts accumulate tokens by exact set union.
176    #[must_use]
177    pub fn from_facts<I>(facts: I) -> Self
178    where
179        I: IntoIterator<Item = (F, P)>,
180    {
181        facts.into_iter().collect()
182    }
183
184    /// Inserts one provenance token for a fact.
185    ///
186    /// Returns `true` when the token was not already attached to the fact.
187    pub fn insert(&mut self, fact: F, token: P) -> bool {
188        self.facts.entry(fact).or_default().tokens.insert(token)
189    }
190
191    /// Returns `true` when the relation contains the given fact.
192    #[must_use]
193    pub fn contains_fact(&self, fact: &F) -> bool {
194        self.facts.contains_key(fact)
195    }
196
197    /// Returns why `fact` is present in this provenance relation.
198    ///
199    /// When `fact` is present, the returned witness is the deterministic exact
200    /// set of provenance tokens attached to that stored fact. When `fact` is
201    /// absent, this returns `None`.
202    ///
203    /// In this first G3 slice, every stored fact is a base fact with at least
204    /// one token, so `None` means the relation has no explanation for the fact
205    /// because the fact is absent.
206    ///
207    /// # Examples
208    ///
209    /// ```rust
210    /// use relmath::provenance::ProvenanceRelation;
211    ///
212    /// let evidence = ProvenanceRelation::from_facts([
213    ///     (("BRCA1", "BreastCancer"), "paper_12"),
214    ///     (("BRCA1", "BreastCancer"), "curated_panel"),
215    /// ]);
216    ///
217    /// let why = evidence
218    ///     .why(&("BRCA1", "BreastCancer"))
219    ///     .expect("expected explanation");
220    ///
221    /// assert!(why.contains_token(&"paper_12"));
222    /// assert!(why.contains_token(&"curated_panel"));
223    /// assert!(evidence.why(&("BRCA1", "Olaparib")).is_none());
224    /// ```
225    #[must_use]
226    pub fn why(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
227        self.facts.get(fact)
228    }
229
230    /// Returns the deterministic provenance set for one fact.
231    ///
232    /// Prefer [`Self::why`] for explanation-oriented queries.
233    #[must_use]
234    pub fn provenance_of(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
235        self.why(fact)
236    }
237
238    /// Returns an iterator over facts and provenance in deterministic order.
239    ///
240    /// # Examples
241    ///
242    /// ```rust
243    /// use relmath::provenance::ProvenanceRelation;
244    ///
245    /// let evidence = ProvenanceRelation::from_facts([
246    ///     (("BRCA1", "BreastCancer"), "paper_12"),
247    ///     (("BRCA1", "BreastCancer"), "curated_panel"),
248    ///     (("TP53", "BreastCancer"), "paper_77"),
249    /// ]);
250    ///
251    /// assert_eq!(
252    ///     evidence
253    ///         .iter()
254    ///         .map(|(fact, witness)| (*fact, witness.to_vec()))
255    ///         .collect::<Vec<_>>(),
256    ///     vec![
257    ///         (("BRCA1", "BreastCancer"), vec!["curated_panel", "paper_12"]),
258    ///         (("TP53", "BreastCancer"), vec!["paper_77"]),
259    ///     ]
260    /// );
261    /// ```
262    pub fn iter(&self) -> impl Iterator<Item = (&F, &ProvenanceSet<P>)> {
263        self.facts.iter()
264    }
265
266    /// Returns the exact support relation of stored facts as a unary relation.
267    ///
268    /// This support forgets provenance and keeps only which facts are present.
269    ///
270    /// # Examples
271    ///
272    /// ```rust
273    /// use relmath::provenance::ProvenanceRelation;
274    ///
275    /// let evidence = ProvenanceRelation::from_facts([
276    ///     (("BRCA1", "BreastCancer"), "paper_12"),
277    ///     (("BRCA1", "BreastCancer"), "curated_panel"),
278    ///     (("TP53", "BreastCancer"), "paper_77"),
279    /// ]);
280    ///
281    /// assert_eq!(
282    ///     evidence.support().to_vec(),
283    ///     vec![("BRCA1", "BreastCancer"), ("TP53", "BreastCancer")]
284    /// );
285    /// ```
286    #[must_use]
287    pub fn support(&self) -> UnaryRelation<F>
288    where
289        F: Clone,
290    {
291        self.facts.keys().cloned().collect()
292    }
293}
294
295impl<F: Ord, P: Ord> Default for ProvenanceRelation<F, P> {
296    fn default() -> Self {
297        Self::new()
298    }
299}
300
301impl<F: Ord, P: Ord> FiniteRelation for ProvenanceRelation<F, P> {
302    fn len(&self) -> usize {
303        self.facts.len()
304    }
305}
306
307impl<F: Ord, P: Ord> FromIterator<(F, P)> for ProvenanceRelation<F, P> {
308    fn from_iter<I: IntoIterator<Item = (F, P)>>(iter: I) -> Self {
309        let mut relation = Self::new();
310        relation.extend(iter);
311        relation
312    }
313}
314
315impl<F: Ord, P: Ord> Extend<(F, P)> for ProvenanceRelation<F, P> {
316    fn extend<I: IntoIterator<Item = (F, P)>>(&mut self, iter: I) {
317        for (fact, token) in iter {
318            self.insert(fact, token);
319        }
320    }
321}
322
323impl<T: Ord + Clone, P: Ord> ProvenanceRelation<T, P> {
324    /// Converts scalar facts into a unary relation support set.
325    ///
326    /// # Examples
327    ///
328    /// ```rust
329    /// use relmath::provenance::ProvenanceRelation;
330    ///
331    /// let concepts = ProvenanceRelation::from_facts([
332    ///     ("Relations", "lecture_1"),
333    ///     ("Relations", "worksheet"),
334    ///     ("Closure", "lecture_2"),
335    /// ]);
336    ///
337    /// assert_eq!(concepts.to_unary_relation().to_vec(), vec!["Closure", "Relations"]);
338    /// ```
339    #[must_use]
340    pub fn to_unary_relation(&self) -> UnaryRelation<T> {
341        self.support()
342    }
343}
344
345impl<A: Ord + Clone, B: Ord + Clone, P: Ord> ProvenanceRelation<(A, B), P> {
346    /// Converts pair facts into a binary relation support set.
347    ///
348    /// # Examples
349    ///
350    /// ```rust
351    /// use relmath::provenance::ProvenanceRelation;
352    ///
353    /// let evidence = ProvenanceRelation::from_facts([
354    ///     (("Alice", "Reader"), "directory"),
355    ///     (("Bob", "Editor"), "directory"),
356    /// ]);
357    ///
358    /// assert_eq!(
359    ///     evidence.to_binary_relation().to_vec(),
360    ///     vec![("Alice", "Reader"), ("Bob", "Editor")]
361    /// );
362    /// ```
363    #[must_use]
364    pub fn to_binary_relation(&self) -> BinaryRelation<A, B> {
365        self.facts.keys().cloned().collect()
366    }
367}
368
369impl<T: Ord + Clone, P: Ord> ProvenanceRelation<Vec<T>, P> {
370    /// Converts row facts into an n-ary relation with the given schema.
371    ///
372    /// The explicit schema preserves the current exact n-ary boundary where row
373    /// values do not themselves encode column names. This method reuses the
374    /// current `NaryRelation` validation rules, so duplicate or blank column
375    /// names and row-arity mismatches return `NaryRelationError`.
376    ///
377    /// # Examples
378    ///
379    /// ```rust
380    /// use relmath::provenance::ProvenanceRelation;
381    ///
382    /// let rows = ProvenanceRelation::from_facts([
383    ///     (vec!["Alice", "Math", "passed"], "gradebook"),
384    ///     (vec!["Alice", "Math", "passed"], "reviewed"),
385    ///     (vec!["Bob", "Physics", "passed"], "gradebook"),
386    /// ]);
387    ///
388    /// let relation = rows
389    ///     .to_nary_relation(["student", "course", "status"])
390    ///     .expect("expected valid support relation");
391    ///
392    /// assert_eq!(
393    ///     relation.to_rows(),
394    ///     vec![
395    ///         vec!["Alice", "Math", "passed"],
396    ///         vec!["Bob", "Physics", "passed"],
397    ///     ]
398    /// );
399    /// ```
400    pub fn to_nary_relation<I, S>(&self, schema: I) -> Result<NaryRelation<T>, NaryRelationError>
401    where
402        I: IntoIterator<Item = S>,
403        S: Into<String>,
404    {
405        NaryRelation::from_rows(schema, self.facts.keys().cloned())
406    }
407}