relmath/provenance.rs
1//! Deterministic exact provenance support.
2//!
3//! In this first G3 slice, a provenance token or label is an explicit stable
4//! identifier attached directly to a stored fact. A witness is the exact,
5//! deterministic set of provenance tokens attached to one stored fact. A
6//! derived tuple is a tuple produced by later inference or algebraic
7//! derivation; this module does not compute derived tuples yet. The first
8//! explanation query in this module is
9//! [`crate::provenance::ProvenanceRelation::why`], which explains only stored
10//! base facts. When a fact is absent, `why` returns `None` rather than an
11//! empty [`crate::provenance::ProvenanceSet`]. No public rule API lives in
12//! this module yet; the first least-fixed-point rule layer is still
13//! documented as later G3 work.
14
15use std::collections::{BTreeMap, BTreeSet};
16
17use crate::{
18 BinaryRelation, NaryRelation, NaryRelationError, UnaryRelation, traits::FiniteRelation,
19};
20
21/// Deterministic provenance tokens attached to one stored fact.
22///
23/// `ProvenanceSet<P>` is the user-visible witness for a fact in this first G3
24/// slice. It is an exact `BTreeSet`-backed set of tokens or labels, not a
25/// derivation DAG.
26///
27/// # Examples
28///
29/// ```rust
30/// use relmath::provenance::{ProvenanceRelation, ProvenanceSet};
31///
32/// let empty = ProvenanceSet::<&str>::default();
33/// assert!(empty.is_empty());
34///
35/// let evidence = ProvenanceRelation::from_facts([
36/// (("BRCA1", "BreastCancer"), "paper_12"),
37/// (("BRCA1", "BreastCancer"), "curated_panel"),
38/// ]);
39/// let witness = evidence
40/// .why(&("BRCA1", "BreastCancer"))
41/// .expect("expected explanation");
42///
43/// assert_eq!(witness.len(), 2);
44/// assert!(witness.contains(&"paper_12"));
45/// assert!(witness.contains_token(&"curated_panel"));
46/// assert_eq!(
47/// witness.iter().copied().collect::<Vec<_>>(),
48/// vec!["curated_panel", "paper_12"]
49/// );
50/// assert_eq!(witness.to_vec(), vec!["curated_panel", "paper_12"]);
51/// ```
52#[derive(Clone, Debug, PartialEq, Eq)]
53pub struct ProvenanceSet<P: Ord> {
54 tokens: BTreeSet<P>,
55}
56
57impl<P: Ord> ProvenanceSet<P> {
58 /// Returns the number of provenance tokens in the set.
59 #[must_use]
60 pub fn len(&self) -> usize {
61 self.tokens.len()
62 }
63
64 /// Returns `true` when the provenance set contains no tokens.
65 #[must_use]
66 pub fn is_empty(&self) -> bool {
67 self.tokens.is_empty()
68 }
69
70 /// Returns `true` when the set contains the given token.
71 #[must_use]
72 pub fn contains(&self, token: &P) -> bool {
73 self.tokens.contains(token)
74 }
75
76 /// Returns `true` when the set contains the given provenance token.
77 #[must_use]
78 pub fn contains_token(&self, token: &P) -> bool {
79 self.contains(token)
80 }
81
82 /// Returns an iterator over provenance tokens in deterministic order.
83 pub fn iter(&self) -> impl Iterator<Item = &P> {
84 self.tokens.iter()
85 }
86
87 /// Converts the provenance set into a sorted vector of tokens.
88 #[must_use]
89 pub fn to_vec(&self) -> Vec<P>
90 where
91 P: Clone,
92 {
93 self.tokens.iter().cloned().collect()
94 }
95}
96
97impl<P: Ord> Default for ProvenanceSet<P> {
98 fn default() -> Self {
99 Self {
100 tokens: BTreeSet::new(),
101 }
102 }
103}
104
105/// Deterministic exact provenance attached to stored facts.
106///
107/// `ProvenanceRelation<F, P>` maps each stored fact to a deterministic set of
108/// provenance tokens. Repeated insertion of the same `(fact, token)` pair is
109/// idempotent. Inserting a new token for an existing fact combines provenance
110/// by exact set union.
111///
112/// In this first additive provenance slice, every stored fact is a base fact.
113/// The module does not yet derive new tuples. [`Self::why`] explains stored
114/// facts only; derived-tuple explanations and `why_not` queries remain later
115/// work.
116///
117/// # Examples
118///
119/// ```rust
120/// use relmath::{UnaryRelation, provenance::ProvenanceRelation};
121///
122/// let gene_disease = ProvenanceRelation::from_facts([
123/// (("BRCA1", "BreastCancer"), "paper_12"),
124/// (("BRCA1", "BreastCancer"), "curated_panel"),
125/// (("TP53", "BreastCancer"), "paper_77"),
126/// ]);
127///
128/// let supported = gene_disease.to_binary_relation();
129/// let brca1 = UnaryRelation::singleton("BRCA1");
130///
131/// assert_eq!(supported.image(&brca1).to_vec(), vec!["BreastCancer"]);
132/// assert_eq!(
133/// gene_disease
134/// .why(&("BRCA1", "BreastCancer"))
135/// .expect("expected explanation")
136/// .to_vec(),
137/// vec!["curated_panel", "paper_12"]
138/// );
139/// ```
140#[derive(Clone, Debug, PartialEq, Eq)]
141pub struct ProvenanceRelation<F: Ord, P: Ord> {
142 facts: BTreeMap<F, ProvenanceSet<P>>,
143}
144
145impl<F: Ord, P: Ord> ProvenanceRelation<F, P> {
146 /// Creates an empty provenance relation.
147 ///
148 /// # Examples
149 ///
150 /// ```rust
151 /// use relmath::provenance::ProvenanceRelation;
152 ///
153 /// let mut evidence = ProvenanceRelation::new();
154 ///
155 /// assert!(evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
156 /// assert!(!evidence.insert(("BRCA1", "BreastCancer"), "paper_12"));
157 /// assert!(evidence.contains_fact(&("BRCA1", "BreastCancer")));
158 /// assert_eq!(
159 /// evidence
160 /// .provenance_of(&("BRCA1", "BreastCancer"))
161 /// .expect("expected explanation")
162 /// .to_vec(),
163 /// vec!["paper_12"]
164 /// );
165 /// ```
166 #[must_use]
167 pub fn new() -> Self {
168 Self {
169 facts: BTreeMap::new(),
170 }
171 }
172
173 /// Creates a provenance relation from `(fact, token)` entries.
174 ///
175 /// Repeated facts accumulate tokens by exact set union.
176 #[must_use]
177 pub fn from_facts<I>(facts: I) -> Self
178 where
179 I: IntoIterator<Item = (F, P)>,
180 {
181 facts.into_iter().collect()
182 }
183
184 /// Inserts one provenance token for a fact.
185 ///
186 /// Returns `true` when the token was not already attached to the fact.
187 pub fn insert(&mut self, fact: F, token: P) -> bool {
188 self.facts.entry(fact).or_default().tokens.insert(token)
189 }
190
191 /// Returns `true` when the relation contains the given fact.
192 #[must_use]
193 pub fn contains_fact(&self, fact: &F) -> bool {
194 self.facts.contains_key(fact)
195 }
196
197 /// Returns why `fact` is present in this provenance relation.
198 ///
199 /// When `fact` is present, the returned witness is the deterministic exact
200 /// set of provenance tokens attached to that stored fact. When `fact` is
201 /// absent, this returns `None`.
202 ///
203 /// In this first G3 slice, every stored fact is a base fact with at least
204 /// one token, so `None` means the relation has no explanation for the fact
205 /// because the fact is absent.
206 ///
207 /// # Examples
208 ///
209 /// ```rust
210 /// use relmath::provenance::ProvenanceRelation;
211 ///
212 /// let evidence = ProvenanceRelation::from_facts([
213 /// (("BRCA1", "BreastCancer"), "paper_12"),
214 /// (("BRCA1", "BreastCancer"), "curated_panel"),
215 /// ]);
216 ///
217 /// let why = evidence
218 /// .why(&("BRCA1", "BreastCancer"))
219 /// .expect("expected explanation");
220 ///
221 /// assert!(why.contains_token(&"paper_12"));
222 /// assert!(why.contains_token(&"curated_panel"));
223 /// assert!(evidence.why(&("BRCA1", "Olaparib")).is_none());
224 /// ```
225 #[must_use]
226 pub fn why(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
227 self.facts.get(fact)
228 }
229
230 /// Returns the deterministic provenance set for one fact.
231 ///
232 /// Prefer [`Self::why`] for explanation-oriented queries.
233 #[must_use]
234 pub fn provenance_of(&self, fact: &F) -> Option<&ProvenanceSet<P>> {
235 self.why(fact)
236 }
237
238 /// Returns an iterator over facts and provenance in deterministic order.
239 ///
240 /// # Examples
241 ///
242 /// ```rust
243 /// use relmath::provenance::ProvenanceRelation;
244 ///
245 /// let evidence = ProvenanceRelation::from_facts([
246 /// (("BRCA1", "BreastCancer"), "paper_12"),
247 /// (("BRCA1", "BreastCancer"), "curated_panel"),
248 /// (("TP53", "BreastCancer"), "paper_77"),
249 /// ]);
250 ///
251 /// assert_eq!(
252 /// evidence
253 /// .iter()
254 /// .map(|(fact, witness)| (*fact, witness.to_vec()))
255 /// .collect::<Vec<_>>(),
256 /// vec![
257 /// (("BRCA1", "BreastCancer"), vec!["curated_panel", "paper_12"]),
258 /// (("TP53", "BreastCancer"), vec!["paper_77"]),
259 /// ]
260 /// );
261 /// ```
262 pub fn iter(&self) -> impl Iterator<Item = (&F, &ProvenanceSet<P>)> {
263 self.facts.iter()
264 }
265
266 /// Returns the exact support relation of stored facts as a unary relation.
267 ///
268 /// This support forgets provenance and keeps only which facts are present.
269 ///
270 /// # Examples
271 ///
272 /// ```rust
273 /// use relmath::provenance::ProvenanceRelation;
274 ///
275 /// let evidence = ProvenanceRelation::from_facts([
276 /// (("BRCA1", "BreastCancer"), "paper_12"),
277 /// (("BRCA1", "BreastCancer"), "curated_panel"),
278 /// (("TP53", "BreastCancer"), "paper_77"),
279 /// ]);
280 ///
281 /// assert_eq!(
282 /// evidence.support().to_vec(),
283 /// vec![("BRCA1", "BreastCancer"), ("TP53", "BreastCancer")]
284 /// );
285 /// ```
286 #[must_use]
287 pub fn support(&self) -> UnaryRelation<F>
288 where
289 F: Clone,
290 {
291 self.facts.keys().cloned().collect()
292 }
293}
294
295impl<F: Ord, P: Ord> Default for ProvenanceRelation<F, P> {
296 fn default() -> Self {
297 Self::new()
298 }
299}
300
301impl<F: Ord, P: Ord> FiniteRelation for ProvenanceRelation<F, P> {
302 fn len(&self) -> usize {
303 self.facts.len()
304 }
305}
306
307impl<F: Ord, P: Ord> FromIterator<(F, P)> for ProvenanceRelation<F, P> {
308 fn from_iter<I: IntoIterator<Item = (F, P)>>(iter: I) -> Self {
309 let mut relation = Self::new();
310 relation.extend(iter);
311 relation
312 }
313}
314
315impl<F: Ord, P: Ord> Extend<(F, P)> for ProvenanceRelation<F, P> {
316 fn extend<I: IntoIterator<Item = (F, P)>>(&mut self, iter: I) {
317 for (fact, token) in iter {
318 self.insert(fact, token);
319 }
320 }
321}
322
323impl<T: Ord + Clone, P: Ord> ProvenanceRelation<T, P> {
324 /// Converts scalar facts into a unary relation support set.
325 ///
326 /// # Examples
327 ///
328 /// ```rust
329 /// use relmath::provenance::ProvenanceRelation;
330 ///
331 /// let concepts = ProvenanceRelation::from_facts([
332 /// ("Relations", "lecture_1"),
333 /// ("Relations", "worksheet"),
334 /// ("Closure", "lecture_2"),
335 /// ]);
336 ///
337 /// assert_eq!(concepts.to_unary_relation().to_vec(), vec!["Closure", "Relations"]);
338 /// ```
339 #[must_use]
340 pub fn to_unary_relation(&self) -> UnaryRelation<T> {
341 self.support()
342 }
343}
344
345impl<A: Ord + Clone, B: Ord + Clone, P: Ord> ProvenanceRelation<(A, B), P> {
346 /// Converts pair facts into a binary relation support set.
347 ///
348 /// # Examples
349 ///
350 /// ```rust
351 /// use relmath::provenance::ProvenanceRelation;
352 ///
353 /// let evidence = ProvenanceRelation::from_facts([
354 /// (("Alice", "Reader"), "directory"),
355 /// (("Bob", "Editor"), "directory"),
356 /// ]);
357 ///
358 /// assert_eq!(
359 /// evidence.to_binary_relation().to_vec(),
360 /// vec![("Alice", "Reader"), ("Bob", "Editor")]
361 /// );
362 /// ```
363 #[must_use]
364 pub fn to_binary_relation(&self) -> BinaryRelation<A, B> {
365 self.facts.keys().cloned().collect()
366 }
367}
368
369impl<T: Ord + Clone, P: Ord> ProvenanceRelation<Vec<T>, P> {
370 /// Converts row facts into an n-ary relation with the given schema.
371 ///
372 /// The explicit schema preserves the current exact n-ary boundary where row
373 /// values do not themselves encode column names. This method reuses the
374 /// current `NaryRelation` validation rules, so duplicate or blank column
375 /// names and row-arity mismatches return `NaryRelationError`.
376 ///
377 /// # Examples
378 ///
379 /// ```rust
380 /// use relmath::provenance::ProvenanceRelation;
381 ///
382 /// let rows = ProvenanceRelation::from_facts([
383 /// (vec!["Alice", "Math", "passed"], "gradebook"),
384 /// (vec!["Alice", "Math", "passed"], "reviewed"),
385 /// (vec!["Bob", "Physics", "passed"], "gradebook"),
386 /// ]);
387 ///
388 /// let relation = rows
389 /// .to_nary_relation(["student", "course", "status"])
390 /// .expect("expected valid support relation");
391 ///
392 /// assert_eq!(
393 /// relation.to_rows(),
394 /// vec![
395 /// vec!["Alice", "Math", "passed"],
396 /// vec!["Bob", "Physics", "passed"],
397 /// ]
398 /// );
399 /// ```
400 pub fn to_nary_relation<I, S>(&self, schema: I) -> Result<NaryRelation<T>, NaryRelationError>
401 where
402 I: IntoIterator<Item = S>,
403 S: Into<String>,
404 {
405 NaryRelation::from_rows(schema, self.facts.keys().cloned())
406 }
407}