Skip to main content

relmath/
provenance.rs

1//! Deterministic exact provenance support.
2//!
3//! In this first G3 slice, a provenance token or label is an explicit stable
4//! identifier attached directly to a stored fact. A witness is the exact,
5//! deterministic set of provenance tokens attached to one stored fact. A
6//! derived tuple is a tuple produced by later inference or algebraic
7//! derivation; this module does not compute derived tuples yet. The first
8//! explanation query in this module is
9//! [`crate::provenance::ProvenanceRelation::why`], which explains only stored
10//! base facts. When a fact is absent, `why` returns `None` rather than an
11//! empty [`crate::provenance::ProvenanceSet`]. No public rule API lives in
12//! this module yet; the first least-fixed-point rule layer is still
13//! documented as later G3 work. Relation-level exact support materialization
14//! for provenance surfaces is also available generically through
15//! [`crate::ExactSupport`].
16
17use std::collections::{BTreeMap, BTreeSet};
18
19use crate::{
20    BinaryRelation, NaryRelation, NaryRelationError, UnaryRelation, traits::FiniteRelation,
21};
22
23/// Deterministic provenance tokens attached to one stored fact.
24///
25/// `ProvenanceSet<P>` is the user-visible witness for a fact in this first G3
26/// slice. It is an exact `BTreeSet`-backed set of tokens or labels, not a
27/// derivation DAG.
28///
29/// # Examples
30///
31/// ```rust
32/// use relmath::provenance::{ProvenanceRelation, ProvenanceSet};
33///
34/// let empty = ProvenanceSet::<&str>::default();
35/// assert!(empty.is_empty());
36///
37/// let evidence = ProvenanceRelation::from_facts([
38///     (("BRCA1", "BreastCancer"), "paper_12"),
39///     (("BRCA1", "BreastCancer"), "curated_panel"),
40/// ]);
41/// let witness = evidence
42///     .why(&("BRCA1", "BreastCancer"))
43///     .expect("expected explanation");
44///
45/// assert_eq!(witness.len(), 2);
46/// assert!(witness.contains(&"paper_12"));
47/// assert!(witness.contains_token(&"curated_panel"));
48/// assert_eq!(
49///     witness.iter().copied().collect::<Vec<_>>(),
50///     vec!["curated_panel", "paper_12"]
51/// );
52/// assert_eq!(witness.to_vec(), vec!["curated_panel", "paper_12"]);
53/// ```
54#[derive(Clone, Debug, PartialEq, Eq)]
55pub struct ProvenanceSet<P: Ord> {
56    tokens: BTreeSet<P>,
57}
58
59impl<P: Ord> ProvenanceSet<P> {
60    /// Returns the number of provenance tokens in the set.
61    #[must_use]
62    pub fn len(&self) -> usize {
63        self.tokens.len()
64    }
65
66    /// Returns `true` when the provenance set contains no tokens.
67    #[must_use]
68    pub fn is_empty(&self) -> bool {
69        self.tokens.is_empty()
70    }
71
72    /// Returns `true` when the set contains the given token.
73    #[must_use]
74    pub fn contains(&self, token: &P) -> bool {
75        self.tokens.contains(token)
76    }
77
78    /// Returns `true` when the set contains the given provenance token.
79    #[must_use]
80    pub fn contains_token(&self, token: &P) -> bool {
81        self.contains(token)
82    }
83
84    /// Returns an iterator over provenance tokens in deterministic order.
85    pub fn iter(&self) -> impl Iterator<Item = &P> {
86        self.tokens.iter()
87    }
88
89    /// Converts the provenance set into a sorted vector of tokens.
90    #[must_use]
91    pub fn to_vec(&self) -> Vec<P>
92    where
93        P: Clone,
94    {
95        self.tokens.iter().cloned().collect()
96    }
97}
98
99impl<P: Ord> Default for ProvenanceSet<P> {
100    fn default() -> Self {
101        Self {
102            tokens: BTreeSet::new(),
103        }
104    }
105}
106
107/// Deterministic exact provenance attached to stored facts.
108///
109/// `ProvenanceRelation<F, P>` maps each stored fact to a deterministic set of
110/// provenance tokens. Repeated insertion of the same `(fact, token)` pair is
111/// idempotent. Inserting a new token for an existing fact combines provenance
112/// by exact set union.
113///
114/// In this first additive provenance slice, every stored fact is a base fact.
115/// The module does not yet derive new tuples. [`Self::why`] explains stored
116/// facts only; derived-tuple explanations and `why_not` queries remain later
117/// work.
118///
119/// # Examples
120///
121/// ```rust
122/// use relmath::{UnaryRelation, provenance::ProvenanceRelation};
123///
124/// let gene_disease = ProvenanceRelation::from_facts([
125///     (("BRCA1", "BreastCancer"), "paper_12"),
126///     (("BRCA1", "BreastCancer"), "curated_panel"),
127///     (("TP53", "BreastCancer"), "paper_77"),
128/// ]);
129///
130/// let supported = gene_disease.to_binary_relation();
131/// let brca1 = UnaryRelation::singleton("BRCA1");
132///
133/// assert_eq!(supported.image(&brca1).to_vec(), vec!["BreastCancer"]);
134/// assert_eq!(
135///     gene_disease
136///         .why(&("BRCA1", "BreastCancer"))
137///         .expect("expected explanation")
138///         .to_vec(),
139///     vec!["curated_panel", "paper_12"]
140/// );
141/// ```
142#[derive(Clone, Debug, PartialEq, Eq)]
143pub struct ProvenanceRelation<F: Ord, P: Ord> {
144    facts: BTreeMap<F, ProvenanceSet<P>>,
145}
146
147impl<F: Ord, P: Ord> ProvenanceRelation<F, P> {
148    /// Creates an empty provenance relation.
149    ///
150    /// # Examples
151    ///
152    /// ```rust
153    /// use relmath::provenance::ProvenanceRelation;
154    ///
155    /// let mut evidence = ProvenanceRelation::new();
156    ///
157    /// assert!(evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
158    /// assert!(!evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
159    /// assert!(evidence.contains_fact(&("BRCA1", "BreastCancer")));
160    /// assert_eq!(
161    ///     evidence
162    ///         .provenance_of(&("BRCA1", "BreastCancer"))
163    ///         .expect("expected explanation")
164    ///         .to_vec(),
165    ///     vec!["paper_12"]
166    /// );
167    /// ```
168    #[must_use]
169    pub fn new() -> Self {
170        Self {
171            facts: BTreeMap::new(),
172        }
173    }
174
175    /// Creates a provenance relation from `(fact, token)` entries.
176    ///
177    /// Repeated facts accumulate tokens by exact set union.
178    #[must_use]
179    pub fn from_facts<I>(facts: I) -> Self
180    where
181        I: IntoIterator<Item = (F, P)>,
182    {
183        facts.into_iter().collect()
184    }
185
186    /// Inserts one provenance token for a fact.
187    ///
188    /// Returns `true` when the token was not already attached to the fact.
189    pub fn insert(&mut self, fact: F, token: P) -> bool {
190        self.facts.entry(fact).or_default().tokens.insert(token)
191    }
192
193    /// Returns `true` when the relation contains the given fact.
194    #[must_use]
195    pub fn contains_fact(&self, fact: &F) -> bool {
196        self.facts.contains_key(fact)
197    }
198
199    /// Returns why `fact` is present in this provenance relation.
200    ///
201    /// When `fact` is present, the returned witness is the deterministic exact
202    /// set of provenance tokens attached to that stored fact. When `fact` is
203    /// absent, this returns `None`.
204    ///
205    /// In this first G3 slice, every stored fact is a base fact with at least
206    /// one token, so `None` means the relation has no explanation for the fact
207    /// because the fact is absent.
208    ///
209    /// # Examples
210    ///
211    /// ```rust
212    /// use relmath::provenance::ProvenanceRelation;
213    ///
214    /// let evidence = ProvenanceRelation::from_facts([
215    ///     (("BRCA1", "BreastCancer"), "paper_12"),
216    ///     (("BRCA1", "BreastCancer"), "curated_panel"),
217    /// ]);
218    ///
219    /// let why = evidence
220    ///     .why(&("BRCA1", "BreastCancer"))
221    ///     .expect("expected explanation");
222    ///
223    /// assert!(why.contains_token(&"paper_12"));
224    /// assert!(why.contains_token(&"curated_panel"));
225    /// assert!(evidence.why(&("BRCA1", "Olaparib")).is_none());
226    /// ```
227    #[must_use]
228    pub fn why(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
229        self.facts.get(fact)
230    }
231
232    /// Returns the deterministic provenance set for one fact.
233    ///
234    /// Prefer [`Self::why`] for explanation-oriented queries.
235    #[must_use]
236    pub fn provenance_of(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
237        self.why(fact)
238    }
239
240    /// Returns an iterator over facts and provenance in deterministic order.
241    ///
242    /// # Examples
243    ///
244    /// ```rust
245    /// use relmath::provenance::ProvenanceRelation;
246    ///
247    /// let evidence = ProvenanceRelation::from_facts([
248    ///     (("BRCA1", "BreastCancer"), "paper_12"),
249    ///     (("BRCA1", "BreastCancer"), "curated_panel"),
250    ///     (("TP53", "BreastCancer"), "paper_77"),
251    /// ]);
252    ///
253    /// assert_eq!(
254    ///     evidence
255    ///         .iter()
256    ///         .map(|(fact, witness)| (*fact, witness.to_vec()))
257    ///         .collect::<Vec<_>>(),
258    ///     vec![
259    ///         (("BRCA1", "BreastCancer"), vec!["curated_panel", "paper_12"]),
260    ///         (("TP53", "BreastCancer"), vec!["paper_77"]),
261    ///     ]
262    /// );
263    /// ```
264    pub fn iter(&self) -> impl Iterator<Item = (&F, &ProvenanceSet<P>)> {
265        self.facts.iter()
266    }
267
268    /// Returns the exact support relation of stored facts as a unary relation.
269    ///
270    /// This support forgets provenance and keeps only which facts are present.
271    /// Generic code can access the same relation-level boundary through
272    /// [`crate::ExactSupport::exact_support`].
273    ///
274    /// # Examples
275    ///
276    /// ```rust
277    /// use relmath::provenance::ProvenanceRelation;
278    ///
279    /// let evidence = ProvenanceRelation::from_facts([
280    ///     (("BRCA1", "BreastCancer"), "paper_12"),
281    ///     (("BRCA1", "BreastCancer"), "curated_panel"),
282    ///     (("TP53", "BreastCancer"), "paper_77"),
283    /// ]);
284    ///
285    /// assert_eq!(
286    ///     evidence.support().to_vec(),
287    ///     vec![("BRCA1", "BreastCancer"), ("TP53", "BreastCancer")]
288    /// );
289    /// ```
290    #[must_use]
291    pub fn support(&self) -> UnaryRelation<F>
292    where
293        F: Clone,
294    {
295        self.facts.keys().cloned().collect()
296    }
297}
298
299impl<F: Ord, P: Ord> Default for ProvenanceRelation<F, P> {
300    fn default() -> Self {
301        Self::new()
302    }
303}
304
305impl<F: Ord, P: Ord> FiniteRelation for ProvenanceRelation<F, P> {
306    fn len(&self) -> usize {
307        self.facts.len()
308    }
309}
310
311impl<F: Ord, P: Ord> FromIterator<(F, P)> for ProvenanceRelation<F, P> {
312    fn from_iter<I: IntoIterator<Item = (F, P)>>(iter: I) -> Self {
313        let mut relation = Self::new();
314        relation.extend(iter);
315        relation
316    }
317}
318
319impl<F: Ord, P: Ord> Extend<(F, P)> for ProvenanceRelation<F, P> {
320    fn extend<I: IntoIterator<Item = (F, P)>>(&mut self, iter: I) {
321        for (fact, token) in iter {
322            self.insert(fact, token);
323        }
324    }
325}
326
327impl<T: Ord + Clone, P: Ord> ProvenanceRelation<T, P> {
328    /// Converts scalar facts into a unary relation support set.
329    ///
330    /// Generic code can access the same unary materialization boundary through
331    /// [`crate::ToExactUnaryRelation`].
332    ///
333    /// # Examples
334    ///
335    /// ```rust
336    /// use relmath::provenance::ProvenanceRelation;
337    ///
338    /// let concepts = ProvenanceRelation::from_facts([
339    ///     ("Relations", "lecture_1"),
340    ///     ("Relations", "worksheet"),
341    ///     ("Closure", "lecture_2"),
342    /// ]);
343    ///
344    /// assert_eq!(concepts.to_unary_relation().to_vec(), vec!["Closure", "Relations"]);
345    /// ```
346    #[must_use]
347    pub fn to_unary_relation(&self) -> UnaryRelation<T> {
348        self.support()
349    }
350}
351
352impl<A: Ord + Clone, B: Ord + Clone, P: Ord> ProvenanceRelation<(A, B), P> {
353    /// Converts pair facts into a binary relation support set.
354    ///
355    /// Generic code can access the same binary materialization boundary
356    /// through [`crate::ToExactBinaryRelation`].
357    ///
358    /// # Examples
359    ///
360    /// ```rust
361    /// use relmath::provenance::ProvenanceRelation;
362    ///
363    /// let evidence = ProvenanceRelation::from_facts([
364    ///     (("Alice", "Reader"), "directory"),
365    ///     (("Bob", "Editor"), "directory"),
366    /// ]);
367    ///
368    /// assert_eq!(
369    ///     evidence.to_binary_relation().to_vec(),
370    ///     vec![("Alice", "Reader"), ("Bob", "Editor")]
371    /// );
372    /// ```
373    #[must_use]
374    pub fn to_binary_relation(&self) -> BinaryRelation<A, B> {
375        self.facts.keys().cloned().collect()
376    }
377}
378
379impl<T: Ord + Clone, P: Ord> ProvenanceRelation<Vec<T>, P> {
380    /// Converts row facts into an n-ary relation with the given schema.
381    ///
382    /// The explicit schema preserves the current exact n-ary boundary where row
383    /// values do not themselves encode column names. This method reuses the
384    /// current `NaryRelation` validation rules, so duplicate or blank column
385    /// names and row-arity mismatches return `NaryRelationError`. Generic code
386    /// can access the same n-ary materialization boundary through
387    /// [`crate::ToExactNaryRelation`].
388    ///
389    /// # Examples
390    ///
391    /// ```rust
392    /// use relmath::provenance::ProvenanceRelation;
393    ///
394    /// let rows = ProvenanceRelation::from_facts([
395    ///     (vec!["Alice", "Math", "passed"], "gradebook"),
396    ///     (vec!["Alice", "Math", "passed"], "reviewed"),
397    ///     (vec!["Bob", "Physics", "passed"], "gradebook"),
398    /// ]);
399    ///
400    /// let relation = rows
401    ///     .to_nary_relation(["student", "course", "status"])
402    ///     .expect("expected valid support relation");
403    ///
404    /// assert_eq!(
405    ///     relation.to_rows(),
406    ///     vec![
407    ///         vec!["Alice", "Math", "passed"],
408    ///         vec!["Bob", "Physics", "passed"],
409    ///     ]
410    /// );
411    /// ```
412    pub fn to_nary_relation<I, S>(&self, schema: I) -> Result<NaryRelation<T>, NaryRelationError>
413    where
414        I: IntoIterator<Item = S>,
415        S: Into<String>,
416    {
417        NaryRelation::from_rows(schema, self.facts.keys().cloned())
418    }
419}