Skip to main content

relmath/core/
nary.rs

1use std::{
2    collections::{BTreeMap, BTreeSet},
3    error::Error,
4    fmt,
5};
6
7use crate::traits::FiniteRelation;
8
9/// Errors returned by schema-aware n-ary relation operations.
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub enum NaryRelationError {
12    /// The provided schema has no columns.
13    EmptySchema,
14    /// A column name is blank or only whitespace.
15    BlankColumnName {
16        /// The zero-based schema position with the blank name.
17        index: usize,
18    },
19    /// A column name is repeated where uniqueness is required.
20    DuplicateColumn {
21        /// The repeated column name.
22        column: String,
23    },
24    /// A row does not match the relation arity.
25    RowArityMismatch {
26        /// The schema arity expected by the relation.
27        expected: usize,
28        /// The number of values that actually appeared in the row.
29        actual: usize,
30    },
31    /// An operation referenced a column that is not present in the schema.
32    UnknownColumn {
33        /// The missing column name.
34        column: String,
35    },
36    /// A named-row input omitted a column required by the schema.
37    MissingColumn {
38        /// The missing column name.
39        column: String,
40    },
41    /// A named-row input provided a column that is not in the schema.
42    UnexpectedColumn {
43        /// The unexpected column name.
44        column: String,
45    },
46    /// An operation requires identical schemas but received different ones.
47    SchemaMismatch {
48        /// The left-hand schema.
49        left: Vec<String>,
50        /// The right-hand schema.
51        right: Vec<String>,
52    },
53}
54
55impl fmt::Display for NaryRelationError {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        match self {
58            Self::EmptySchema => f.write_str("n-ary relation schema cannot be empty"),
59            Self::BlankColumnName { index } => {
60                write!(f, "column at index {index} must have a non-blank name")
61            }
62            Self::DuplicateColumn { column } => {
63                write!(f, "column `{column}` appears more than once")
64            }
65            Self::RowArityMismatch { expected, actual } => write!(
66                f,
67                "row arity mismatch: expected {expected} values but found {actual}"
68            ),
69            Self::UnknownColumn { column } => {
70                write!(f, "column `{column}` is not present in the schema")
71            }
72            Self::MissingColumn { column } => {
73                write!(f, "named row is missing required column `{column}`")
74            }
75            Self::UnexpectedColumn { column } => {
76                write!(f, "named row contains unexpected column `{column}`")
77            }
78            Self::SchemaMismatch { left, right } => {
79                write!(f, "schema mismatch: left = {:?}, right = {:?}", left, right)
80            }
81        }
82    }
83}
84
85impl Error for NaryRelationError {}
86
87/// A deterministic exact grouping of an n-ary relation by named key columns.
88///
89/// `GroupedRelation<T>` is the first G2 grouping surface for exact n-ary
90/// relations. Groups are keyed by projected key rows and stored in a
91/// `BTreeMap`, so group iteration order is deterministic.
92///
93/// In this first exact grouping slice, each grouped member relation keeps the
94/// original relation schema rather than removing key columns. This avoids
95/// introducing zero-ary relation semantics while keeping the grouped output
96/// relation-first and easy to inspect.
97///
98/// The first exact aggregate on grouped output is [`Self::counts`], which
99/// returns the number of stored rows in each group.
100///
101/// # Examples
102///
103/// ```rust
104/// use relmath::NaryRelation;
105///
106/// let completed = NaryRelation::from_rows(
107///     ["student", "course", "term"],
108///     [
109///         ["Alice", "Math", "Fall"],
110///         ["Alice", "Physics", "Fall"],
111///         ["Bob", "Math", "Spring"],
112///     ],
113/// )?;
114///
115/// let grouped = completed.group_by(["term", "student"])?;
116///
117/// assert_eq!(
118///     grouped.key_schema(),
119///     &["term".to_string(), "student".to_string()]
120/// );
121/// assert_eq!(
122///     grouped.counts(),
123///     vec![(vec!["Fall", "Alice"], 2), (vec!["Spring", "Bob"], 1)]
124/// );
125/// # Ok::<(), relmath::NaryRelationError>(())
126/// ```
127#[derive(Clone, Debug, PartialEq, Eq)]
128pub struct GroupedRelation<T: Ord> {
129    key_schema: Vec<String>,
130    member_schema: Vec<String>,
131    groups: BTreeMap<Vec<T>, NaryRelation<T>>,
132}
133
134impl<T: Ord> GroupedRelation<T> {
135    /// Returns the grouping key schema in order.
136    #[must_use]
137    pub fn key_schema(&self) -> &[String] {
138        &self.key_schema
139    }
140
141    /// Returns the schema of each member relation.
142    ///
143    /// In the current exact grouping slice this is the full original relation
144    /// schema.
145    #[must_use]
146    pub fn member_schema(&self) -> &[String] {
147        &self.member_schema
148    }
149
150    /// Returns the number of groups.
151    #[must_use]
152    pub fn len(&self) -> usize {
153        self.groups.len()
154    }
155
156    /// Returns `true` when the grouping contains no groups.
157    #[must_use]
158    pub fn is_empty(&self) -> bool {
159        self.groups.is_empty()
160    }
161
162    /// Returns the member relation for the given grouping key.
163    #[must_use]
164    pub fn group(&self, key: &[T]) -> Option<&NaryRelation<T>> {
165        self.groups.get(key)
166    }
167
168    /// Returns an iterator over groups in deterministic key order.
169    pub fn iter(&self) -> impl Iterator<Item = (&[T], &NaryRelation<T>)> {
170        self.groups
171            .iter()
172            .map(|(key, group)| (key.as_slice(), group))
173    }
174
175    /// Returns the first exact aggregate over the grouped relation: row counts.
176    ///
177    /// Each count is the number of stored rows in the corresponding group after
178    /// the base relation's set semantics have already deduplicated equal rows.
179    /// Results follow the deterministic key order of the grouping.
180    #[must_use]
181    pub fn counts(&self) -> Vec<(Vec<T>, usize)>
182    where
183        T: Clone,
184    {
185        self.groups
186            .iter()
187            .map(|(key, group)| (key.clone(), group.len()))
188            .collect()
189    }
190}
191
192/// A deterministic exact n-ary relation with a named schema.
193///
194/// `NaryRelation<T>` is the first G2 building block for schema-aware
195/// relations. It stores column names explicitly and keeps rows in a `BTreeSet`
196/// so iteration order is deterministic.
197///
198/// In this initial G2 step, all cells in a row share the same value type `T`.
199///
200/// # Examples
201///
202/// ```rust
203/// use relmath::NaryRelation;
204///
205/// let mut passed = NaryRelation::new(["student", "course", "status"])?;
206///
207/// assert_eq!(passed.column_index("course")?, 1);
208/// assert!(passed.insert_row(["Alice", "Math", "passed"])?);
209/// assert!(passed.insert_row(["Alice", "Physics", "passed"])?);
210/// assert!(passed.insert_row(["Bob", "Physics", "in_progress"])?);
211///
212/// let completed = passed.select(|row| row[2] == "passed");
213/// let students = completed.project(["student"])?;
214/// let renamed = students.rename("student", "learner")?;
215///
216/// assert_eq!(passed.arity(), 3);
217/// assert!(completed.contains_row(&["Alice", "Math", "passed"]));
218/// assert_eq!(renamed.schema(), &["learner".to_string()]);
219/// # Ok::<(), relmath::NaryRelationError>(())
220/// ```
221#[derive(Clone, Debug, PartialEq, Eq)]
222pub struct NaryRelation<T: Ord> {
223    schema: Vec<String>,
224    rows: BTreeSet<Vec<T>>,
225}
226
227impl<T: Ord> NaryRelation<T> {
228    /// Creates an empty n-ary relation with the given schema.
229    ///
230    /// Column names must be unique and non-blank.
231    pub fn new<I, S>(schema: I) -> Result<Self, NaryRelationError>
232    where
233        I: IntoIterator<Item = S>,
234        S: Into<String>,
235    {
236        Ok(Self {
237            schema: Self::validate_schema(schema)?,
238            rows: BTreeSet::new(),
239        })
240    }
241
242    /// Creates an n-ary relation from a schema and rows.
243    ///
244    /// Column names must be unique and non-blank, and every row must have the
245    /// same arity as the schema.
246    pub fn from_rows<I, S, R, Row>(schema: I, rows: R) -> Result<Self, NaryRelationError>
247    where
248        I: IntoIterator<Item = S>,
249        S: Into<String>,
250        R: IntoIterator<Item = Row>,
251        Row: IntoIterator<Item = T>,
252    {
253        let schema = Self::validate_schema(schema)?;
254        let arity = schema.len();
255        let mut dedup_rows = BTreeSet::new();
256
257        for row in rows {
258            dedup_rows.insert(Self::collect_row(arity, row)?);
259        }
260
261        Ok(Self {
262            schema,
263            rows: dedup_rows,
264        })
265    }
266
267    /// Creates an n-ary relation from a schema and named rows.
268    ///
269    /// This is the current std-only G2 onboarding boundary. Each input row
270    /// must provide exactly the schema columns by name. The schema order stays
271    /// explicit in the relation rather than being inferred from map iteration.
272    ///
273    /// # Examples
274    ///
275    /// ```rust
276    /// use std::collections::BTreeMap;
277    ///
278    /// use relmath::NaryRelation;
279    ///
280    /// let progress = NaryRelation::from_named_rows(
281    ///     ["student", "course", "status"],
282    ///     [
283    ///         BTreeMap::from([
284    ///             ("course", "Math"),
285    ///             ("status", "passed"),
286    ///             ("student", "Alice"),
287    ///         ]),
288    ///         BTreeMap::from([
289    ///             ("course", "Physics"),
290    ///             ("status", "in_progress"),
291    ///             ("student", "Bob"),
292    ///         ]),
293    ///     ],
294    /// )?;
295    ///
296    /// assert_eq!(
297    ///     progress.to_rows(),
298    ///     vec![
299    ///         vec!["Alice", "Math", "passed"],
300    ///         vec!["Bob", "Physics", "in_progress"],
301    ///     ]
302    /// );
303    /// # Ok::<(), relmath::NaryRelationError>(())
304    /// ```
305    pub fn from_named_rows<I, S, R, K>(schema: I, rows: R) -> Result<Self, NaryRelationError>
306    where
307        I: IntoIterator<Item = S>,
308        S: Into<String>,
309        R: IntoIterator<Item = BTreeMap<K, T>>,
310        K: Into<String> + Ord,
311    {
312        let schema = Self::validate_schema(schema)?;
313        let mut dedup_rows = BTreeSet::new();
314
315        for named_row in rows {
316            dedup_rows.insert(Self::collect_named_row(&schema, named_row)?);
317        }
318
319        Ok(Self {
320            schema,
321            rows: dedup_rows,
322        })
323    }
324
325    /// Returns the schema column names in order.
326    #[must_use]
327    pub fn schema(&self) -> &[String] {
328        &self.schema
329    }
330
331    /// Returns the arity of the relation.
332    #[must_use]
333    pub fn arity(&self) -> usize {
334        self.schema.len()
335    }
336
337    /// Returns the zero-based position of a named column.
338    ///
339    /// # Examples
340    ///
341    /// ```rust
342    /// use relmath::NaryRelation;
343    ///
344    /// let students = NaryRelation::<&str>::new(["student", "course"])?;
345    ///
346    /// assert_eq!(students.column_index("student")?, 0);
347    /// assert_eq!(students.column_index("course")?, 1);
348    /// # Ok::<(), relmath::NaryRelationError>(())
349    /// ```
350    pub fn column_index(&self, column: &str) -> Result<usize, NaryRelationError> {
351        self.schema
352            .iter()
353            .position(|candidate| candidate == column)
354            .ok_or_else(|| NaryRelationError::UnknownColumn {
355                column: column.to_string(),
356            })
357    }
358
359    /// Inserts a row into the relation.
360    ///
361    /// Returns `true` when the row was not already present.
362    pub fn insert_row<I>(&mut self, row: I) -> Result<bool, NaryRelationError>
363    where
364        I: IntoIterator<Item = T>,
365    {
366        Ok(self.rows.insert(Self::collect_row(self.arity(), row)?))
367    }
368
369    /// Returns `true` when the relation contains the given row.
370    #[must_use]
371    pub fn contains_row(&self, row: &[T]) -> bool {
372        self.rows.contains(row)
373    }
374
375    /// Returns an iterator over the stored rows in deterministic order.
376    pub fn iter(&self) -> impl Iterator<Item = &[T]> {
377        self.rows.iter().map(Vec::as_slice)
378    }
379
380    /// Returns the rows as a deterministically ordered vector.
381    #[must_use]
382    pub fn to_rows(&self) -> Vec<Vec<T>>
383    where
384        T: Clone,
385    {
386        self.rows.iter().cloned().collect()
387    }
388
389    /// Exports the relation as named rows.
390    ///
391    /// The returned vector follows the relation's deterministic row order.
392    /// Each row is materialized as a `BTreeMap`, so key lookup is by column
393    /// name rather than schema position. Preserve [`Self::schema`] separately
394    /// when schema order matters to a downstream consumer.
395    ///
396    /// # Examples
397    ///
398    /// ```rust
399    /// use std::collections::BTreeMap;
400    ///
401    /// use relmath::NaryRelation;
402    ///
403    /// let progress = NaryRelation::from_rows(
404    ///     ["student", "course"],
405    ///     [["Alice", "Math"], ["Bob", "Physics"]],
406    /// )?;
407    ///
408    /// assert_eq!(
409    ///     progress.to_named_rows(),
410    ///     vec![
411    ///         BTreeMap::from([
412    ///             ("course".to_string(), "Math"),
413    ///             ("student".to_string(), "Alice"),
414    ///         ]),
415    ///         BTreeMap::from([
416    ///             ("course".to_string(), "Physics"),
417    ///             ("student".to_string(), "Bob"),
418    ///         ]),
419    ///     ]
420    /// );
421    /// # Ok::<(), relmath::NaryRelationError>(())
422    /// ```
423    #[must_use]
424    pub fn to_named_rows(&self) -> Vec<BTreeMap<String, T>>
425    where
426        T: Clone,
427    {
428        self.rows
429            .iter()
430            .map(|row| {
431                self.schema
432                    .iter()
433                    .cloned()
434                    .zip(row.iter().cloned())
435                    .collect()
436            })
437            .collect()
438    }
439
440    /// Groups the relation by the named key columns.
441    ///
442    /// Group keys follow the exact order of `columns`. Empty key selections,
443    /// duplicate key columns, and unknown columns are rejected by the current
444    /// exact core.
445    ///
446    /// Each member relation keeps the original relation schema and contains the
447    /// subset of rows whose projected key matches the group key. Groups are
448    /// stored in a `BTreeMap`, so group iteration order is deterministic.
449    ///
450    /// The first exact aggregate over the grouped output is
451    /// [`GroupedRelation::counts`], which reports the number of stored rows in
452    /// each group.
453    ///
454    /// # Examples
455    ///
456    /// ```rust
457    /// use relmath::NaryRelation;
458    ///
459    /// let completed = NaryRelation::from_rows(
460    ///     ["student", "course", "term"],
461    ///     [
462    ///         ["Alice", "Math", "Fall"],
463    ///         ["Alice", "Physics", "Fall"],
464    ///         ["Bob", "Math", "Spring"],
465    ///     ],
466    /// )?;
467    ///
468    /// let grouped = completed.group_by(["term", "student"])?;
469    ///
470    /// assert_eq!(
471    ///     grouped.key_schema(),
472    ///     &["term".to_string(), "student".to_string()]
473    /// );
474    /// assert_eq!(
475    ///     grouped.counts(),
476    ///     vec![(vec!["Fall", "Alice"], 2), (vec!["Spring", "Bob"], 1)]
477    /// );
478    /// # Ok::<(), relmath::NaryRelationError>(())
479    /// ```
480    pub fn group_by<I, S>(&self, columns: I) -> Result<GroupedRelation<T>, NaryRelationError>
481    where
482        I: IntoIterator<Item = S>,
483        S: AsRef<str>,
484        T: Clone,
485    {
486        let columns: Vec<String> = columns
487            .into_iter()
488            .map(|column| column.as_ref().to_string())
489            .collect();
490        let key_schema = Self::validate_schema(columns)?;
491        let mut indices = Vec::with_capacity(key_schema.len());
492
493        for column in &key_schema {
494            indices.push(self.column_index(column)?);
495        }
496
497        let mut raw_groups: BTreeMap<Vec<T>, BTreeSet<Vec<T>>> = BTreeMap::new();
498        for row in &self.rows {
499            let key = indices.iter().map(|index| row[*index].clone()).collect();
500            raw_groups.entry(key).or_default().insert(row.clone());
501        }
502
503        let member_schema = self.schema.clone();
504        let groups = raw_groups
505            .into_iter()
506            .map(|(key, rows)| {
507                (
508                    key,
509                    Self {
510                        schema: member_schema.clone(),
511                        rows,
512                    },
513                )
514            })
515            .collect();
516
517        Ok(GroupedRelation {
518            key_schema,
519            member_schema,
520            groups,
521        })
522    }
523
524    /// Returns the subset of rows that satisfy `predicate`.
525    ///
526    /// The output keeps the same schema as `self`. Surviving rows remain in the
527    /// deterministic order induced by the underlying `BTreeSet`.
528    ///
529    /// # Examples
530    ///
531    /// ```rust
532    /// use relmath::NaryRelation;
533    ///
534    /// let progress = NaryRelation::from_rows(
535    ///     ["student", "course", "status"],
536    ///     [
537    ///         ["Alice", "Math", "passed"],
538    ///         ["Alice", "Physics", "passed"],
539    ///         ["Bob", "Physics", "in_progress"],
540    ///     ],
541    /// )?;
542    ///
543    /// let completed = progress.select(|row| row[2] == "passed");
544    ///
545    /// assert_eq!(
546    ///     completed.to_rows(),
547    ///     vec![
548    ///         vec!["Alice", "Math", "passed"],
549    ///         vec!["Alice", "Physics", "passed"],
550    ///     ]
551    /// );
552    /// # Ok::<(), relmath::NaryRelationError>(())
553    /// ```
554    #[must_use]
555    pub fn select<F>(&self, mut predicate: F) -> Self
556    where
557        F: FnMut(&[T]) -> bool,
558        T: Clone,
559    {
560        let rows = self
561            .rows
562            .iter()
563            .filter(|row| predicate(row))
564            .cloned()
565            .collect();
566
567        Self {
568            schema: self.schema.clone(),
569            rows,
570        }
571    }
572
573    /// Projects the relation onto the named columns in the requested order.
574    ///
575    /// The output schema follows the exact order of `columns`. Duplicate
576    /// column names, unknown columns, and empty projections are rejected by the
577    /// current exact core.
578    ///
579    /// # Examples
580    ///
581    /// ```rust
582    /// use relmath::NaryRelation;
583    ///
584    /// let passed = NaryRelation::from_rows(
585    ///     ["student", "course", "status"],
586    ///     [
587    ///         ["Alice", "Math", "passed"],
588    ///         ["Cara", "Math", "passed"],
589    ///         ["Alice", "Physics", "passed"],
590    ///     ],
591    /// )?;
592    ///
593    /// let projected = passed.project(["course", "student"])?;
594    ///
595    /// assert_eq!(
596    ///     projected.schema(),
597    ///     &["course".to_string(), "student".to_string()]
598    /// );
599    /// assert_eq!(
600    ///     projected.to_rows(),
601    ///     vec![
602    ///         vec!["Math", "Alice"],
603    ///         vec!["Math", "Cara"],
604    ///         vec!["Physics", "Alice"],
605    ///     ]
606    /// );
607    /// # Ok::<(), relmath::NaryRelationError>(())
608    /// ```
609    pub fn project<I, S>(&self, columns: I) -> Result<Self, NaryRelationError>
610    where
611        I: IntoIterator<Item = S>,
612        S: AsRef<str>,
613        T: Clone,
614    {
615        let columns: Vec<String> = columns
616            .into_iter()
617            .map(|column| column.as_ref().to_string())
618            .collect();
619        let projected_schema = Self::validate_schema(columns)?;
620        let mut indices = Vec::with_capacity(projected_schema.len());
621
622        for column in &projected_schema {
623            indices.push(self.column_index(column)?);
624        }
625
626        let rows = self
627            .rows
628            .iter()
629            .map(|row| indices.iter().map(|index| row[*index].clone()).collect())
630            .collect();
631
632        Ok(Self {
633            schema: projected_schema,
634            rows,
635        })
636    }
637
638    /// Returns a relation with one column renamed.
639    ///
640    /// Renaming a column to its current name is a no-op. The new column name
641    /// must be non-blank and must not duplicate another column in the schema.
642    ///
643    /// # Examples
644    ///
645    /// ```rust
646    /// use relmath::NaryRelation;
647    ///
648    /// let completed = NaryRelation::from_rows(
649    ///     ["student", "course"],
650    ///     [["Alice", "Math"], ["Bob", "Physics"]],
651    /// )?;
652    ///
653    /// let renamed = completed.rename("student", "learner")?;
654    ///
655    /// assert_eq!(
656    ///     renamed.schema(),
657    ///     &["learner".to_string(), "course".to_string()]
658    /// );
659    /// assert_eq!(
660    ///     renamed.to_rows(),
661    ///     vec![vec!["Alice", "Math"], vec!["Bob", "Physics"]]
662    /// );
663    /// # Ok::<(), relmath::NaryRelationError>(())
664    /// ```
665    pub fn rename<S>(&self, from: &str, to: S) -> Result<Self, NaryRelationError>
666    where
667        S: Into<String>,
668        T: Clone,
669    {
670        let to = to.into();
671        let index = self.column_index(from)?;
672
673        Self::validate_column_name(&to, index)?;
674
675        if from != to && self.schema.iter().any(|column| column == &to) {
676            return Err(NaryRelationError::DuplicateColumn { column: to });
677        }
678
679        let mut schema = self.schema.clone();
680        schema[index] = to;
681
682        Ok(Self {
683            schema,
684            rows: self.rows.clone(),
685        })
686    }
687
688    /// Returns the union of `self` and `other` when schemas match exactly.
689    ///
690    /// Schema compatibility for the current exact core means exact schema
691    /// equality, including column order.
692    ///
693    /// # Examples
694    ///
695    /// ```rust
696    /// use relmath::NaryRelation;
697    ///
698    /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
699    /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
700    ///
701    /// let union = left.union(&right)?;
702    ///
703    /// assert_eq!(
704    ///     union.to_rows(),
705    ///     vec![vec!["Alice"], vec!["Bob"], vec!["Cara"]]
706    /// );
707    /// # Ok::<(), relmath::NaryRelationError>(())
708    /// ```
709    pub fn union(&self, other: &Self) -> Result<Self, NaryRelationError>
710    where
711        T: Clone,
712    {
713        self.ensure_same_schema(other)?;
714
715        Ok(Self {
716            schema: self.schema.clone(),
717            rows: self.rows.union(&other.rows).cloned().collect(),
718        })
719    }
720
721    /// Returns the intersection of `self` and `other` when schemas match
722    /// exactly.
723    ///
724    /// Schema compatibility for the current exact core means exact schema
725    /// equality, including column order.
726    ///
727    /// # Examples
728    ///
729    /// ```rust
730    /// use relmath::NaryRelation;
731    ///
732    /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
733    /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
734    ///
735    /// let intersection = left.intersection(&right)?;
736    ///
737    /// assert_eq!(intersection.to_rows(), vec![vec!["Bob"]]);
738    /// # Ok::<(), relmath::NaryRelationError>(())
739    /// ```
740    pub fn intersection(&self, other: &Self) -> Result<Self, NaryRelationError>
741    where
742        T: Clone,
743    {
744        self.ensure_same_schema(other)?;
745
746        Ok(Self {
747            schema: self.schema.clone(),
748            rows: self.rows.intersection(&other.rows).cloned().collect(),
749        })
750    }
751
752    /// Returns the set difference `self \ other` when schemas match exactly.
753    ///
754    /// Schema compatibility for the current exact core means exact schema
755    /// equality, including column order.
756    ///
757    /// # Examples
758    ///
759    /// ```rust
760    /// use relmath::NaryRelation;
761    ///
762    /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
763    /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
764    ///
765    /// let only_left = left.difference(&right)?;
766    ///
767    /// assert_eq!(only_left.to_rows(), vec![vec!["Alice"]]);
768    /// # Ok::<(), relmath::NaryRelationError>(())
769    /// ```
770    pub fn difference(&self, other: &Self) -> Result<Self, NaryRelationError>
771    where
772        T: Clone,
773    {
774        self.ensure_same_schema(other)?;
775
776        Ok(Self {
777            schema: self.schema.clone(),
778            rows: self.rows.difference(&other.rows).cloned().collect(),
779        })
780    }
781
782    /// Returns the natural join of `self` and `other`.
783    ///
784    /// Shared column names constrain row matching: a pair of rows joins only
785    /// when every shared column has equal values on both sides. When the
786    /// schemas are disjoint, natural join behaves as a cartesian product.
787    ///
788    /// The output schema keeps the entire left-hand schema in order, followed
789    /// by the right-hand columns that are not shared, in their original order.
790    /// Output rows are stored in a `BTreeSet`, so iteration remains
791    /// deterministic. When no rows match, the result is empty but still keeps
792    /// the joined schema.
793    ///
794    /// # Examples
795    ///
796    /// ```rust
797    /// use relmath::NaryRelation;
798    ///
799    /// let completed = NaryRelation::from_rows(
800    ///     ["student", "course"],
801    ///     [
802    ///         ["Alice", "Math"],
803    ///         ["Alice", "Physics"],
804    ///         ["Bob", "Math"],
805    ///     ],
806    /// )?;
807    /// let rooms = NaryRelation::from_rows(
808    ///     ["course", "room"],
809    ///     [["Math", "R101"], ["Physics", "R201"]],
810    /// )?;
811    ///
812    /// let scheduled = completed.natural_join(&rooms);
813    ///
814    /// assert_eq!(
815    ///     scheduled.schema(),
816    ///     &["student".to_string(), "course".to_string(), "room".to_string()]
817    /// );
818    /// assert_eq!(
819    ///     scheduled.to_rows(),
820    ///     vec![
821    ///         vec!["Alice", "Math", "R101"],
822    ///         vec!["Alice", "Physics", "R201"],
823    ///         vec!["Bob", "Math", "R101"],
824    ///     ]
825    /// );
826    /// # Ok::<(), relmath::NaryRelationError>(())
827    /// ```
828    pub fn natural_join(&self, other: &Self) -> Self
829    where
830        T: Clone,
831    {
832        let right_index_by_name: BTreeMap<&str, usize> = other
833            .schema
834            .iter()
835            .enumerate()
836            .map(|(index, column)| (column.as_str(), index))
837            .collect();
838
839        let mut shared_indices = Vec::new();
840        let mut shared_right_indices = BTreeSet::new();
841        for (left_index, column) in self.schema.iter().enumerate() {
842            if let Some(&right_index) = right_index_by_name.get(column.as_str()) {
843                shared_indices.push((left_index, right_index));
844                shared_right_indices.insert(right_index);
845            }
846        }
847
848        let mut schema = self.schema.clone();
849        let mut right_only_indices = Vec::new();
850        for (right_index, column) in other.schema.iter().enumerate() {
851            if !shared_right_indices.contains(&right_index) {
852                schema.push(column.clone());
853                right_only_indices.push(right_index);
854            }
855        }
856
857        let mut rows = BTreeSet::new();
858        for left_row in &self.rows {
859            for right_row in &other.rows {
860                if shared_indices.iter().all(|(left_index, right_index)| {
861                    left_row[*left_index] == right_row[*right_index]
862                }) {
863                    let mut joined_row =
864                        Vec::with_capacity(self.arity() + right_only_indices.len());
865                    joined_row.extend(left_row.iter().cloned());
866                    for right_index in &right_only_indices {
867                        joined_row.push(right_row[*right_index].clone());
868                    }
869                    rows.insert(joined_row);
870                }
871            }
872        }
873
874        Self { schema, rows }
875    }
876
877    fn validate_schema<I, S>(schema: I) -> Result<Vec<String>, NaryRelationError>
878    where
879        I: IntoIterator<Item = S>,
880        S: Into<String>,
881    {
882        let schema: Vec<String> = schema.into_iter().map(Into::into).collect();
883
884        if schema.is_empty() {
885            return Err(NaryRelationError::EmptySchema);
886        }
887
888        let mut seen = BTreeSet::new();
889        for (index, column) in schema.iter().enumerate() {
890            Self::validate_column_name(column, index)?;
891
892            if !seen.insert(column.clone()) {
893                return Err(NaryRelationError::DuplicateColumn {
894                    column: column.clone(),
895                });
896            }
897        }
898
899        Ok(schema)
900    }
901
902    fn collect_row<I>(arity: usize, row: I) -> Result<Vec<T>, NaryRelationError>
903    where
904        I: IntoIterator<Item = T>,
905    {
906        let row: Vec<T> = row.into_iter().collect();
907
908        if row.len() != arity {
909            return Err(NaryRelationError::RowArityMismatch {
910                expected: arity,
911                actual: row.len(),
912            });
913        }
914
915        Ok(row)
916    }
917
918    fn collect_named_row<K>(
919        schema: &[String],
920        named_row: BTreeMap<K, T>,
921    ) -> Result<Vec<T>, NaryRelationError>
922    where
923        K: Into<String> + Ord,
924    {
925        let mut normalized_row = BTreeMap::new();
926        for (column, value) in named_row {
927            let column = column.into();
928            if normalized_row.insert(column.clone(), value).is_some() {
929                return Err(NaryRelationError::DuplicateColumn { column });
930            }
931        }
932
933        let mut row = Vec::with_capacity(schema.len());
934        for column in schema {
935            row.push(normalized_row.remove(column.as_str()).ok_or_else(|| {
936                NaryRelationError::MissingColumn {
937                    column: column.clone(),
938                }
939            })?);
940        }
941
942        if let Some((column, _)) = normalized_row.into_iter().next() {
943            return Err(NaryRelationError::UnexpectedColumn { column });
944        }
945
946        Ok(row)
947    }
948
949    fn validate_column_name(column: &str, index: usize) -> Result<(), NaryRelationError> {
950        if column.trim().is_empty() {
951            Err(NaryRelationError::BlankColumnName { index })
952        } else {
953            Ok(())
954        }
955    }
956
957    fn ensure_same_schema(&self, other: &Self) -> Result<(), NaryRelationError> {
958        if self.schema == other.schema {
959            Ok(())
960        } else {
961            Err(NaryRelationError::SchemaMismatch {
962                left: self.schema.clone(),
963                right: other.schema.clone(),
964            })
965        }
966    }
967}
968
969impl<T: Ord> FiniteRelation for NaryRelation<T> {
970    fn len(&self) -> usize {
971        self.rows.len()
972    }
973}