relmath/core/nary.rs
1use std::{
2 collections::{BTreeMap, BTreeSet},
3 error::Error,
4 fmt,
5};
6
7use crate::traits::FiniteRelation;
8
9/// Errors returned by schema-aware n-ary relation operations.
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub enum NaryRelationError {
12 /// The provided schema has no columns.
13 EmptySchema,
14 /// A column name is blank or only whitespace.
15 BlankColumnName {
16 /// The zero-based schema position with the blank name.
17 index: usize,
18 },
19 /// A column name is repeated where uniqueness is required.
20 DuplicateColumn {
21 /// The repeated column name.
22 column: String,
23 },
24 /// A row does not match the relation arity.
25 RowArityMismatch {
26 /// The schema arity expected by the relation.
27 expected: usize,
28 /// The number of values that actually appeared in the row.
29 actual: usize,
30 },
31 /// An operation referenced a column that is not present in the schema.
32 UnknownColumn {
33 /// The missing column name.
34 column: String,
35 },
36 /// A named-row input omitted a column required by the schema.
37 MissingColumn {
38 /// The missing column name.
39 column: String,
40 },
41 /// A named-row input provided a column that is not in the schema.
42 UnexpectedColumn {
43 /// The unexpected column name.
44 column: String,
45 },
46 /// An operation requires identical schemas but received different ones.
47 SchemaMismatch {
48 /// The left-hand schema.
49 left: Vec<String>,
50 /// The right-hand schema.
51 right: Vec<String>,
52 },
53}
54
55impl fmt::Display for NaryRelationError {
56 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57 match self {
58 Self::EmptySchema => f.write_str("n-ary relation schema cannot be empty"),
59 Self::BlankColumnName { index } => {
60 write!(f, "column at index {index} must have a non-blank name")
61 }
62 Self::DuplicateColumn { column } => {
63 write!(f, "column `{column}` appears more than once")
64 }
65 Self::RowArityMismatch { expected, actual } => write!(
66 f,
67 "row arity mismatch: expected {expected} values but found {actual}"
68 ),
69 Self::UnknownColumn { column } => {
70 write!(f, "column `{column}` is not present in the schema")
71 }
72 Self::MissingColumn { column } => {
73 write!(f, "named row is missing required column `{column}`")
74 }
75 Self::UnexpectedColumn { column } => {
76 write!(f, "named row contains unexpected column `{column}`")
77 }
78 Self::SchemaMismatch { left, right } => {
79 write!(f, "schema mismatch: left = {:?}, right = {:?}", left, right)
80 }
81 }
82 }
83}
84
85impl Error for NaryRelationError {}
86
87/// A deterministic exact grouping of an n-ary relation by named key columns.
88///
89/// `GroupedRelation<T>` is the first G2 grouping surface for exact n-ary
90/// relations. Groups are keyed by projected key rows and stored in a
91/// `BTreeMap`, so group iteration order is deterministic.
92///
93/// In this first exact grouping slice, each grouped member relation keeps the
94/// original relation schema rather than removing key columns. This avoids
95/// introducing zero-ary relation semantics while keeping the grouped output
96/// relation-first and easy to inspect.
97///
98/// The first exact aggregate on grouped output is [`Self::counts`], which
99/// returns the number of stored rows in each group.
100///
101/// # Examples
102///
103/// ```rust
104/// use relmath::NaryRelation;
105///
106/// let completed = NaryRelation::from_rows(
107/// ["student", "course", "term"],
108/// [
109/// ["Alice", "Math", "Fall"],
110/// ["Alice", "Physics", "Fall"],
111/// ["Bob", "Math", "Spring"],
112/// ],
113/// )?;
114///
115/// let grouped = completed.group_by(["term", "student"])?;
116///
117/// assert_eq!(
118/// grouped.key_schema(),
119/// &["term".to_string(), "student".to_string()]
120/// );
121/// assert_eq!(
122/// grouped.counts(),
123/// vec![(vec!["Fall", "Alice"], 2), (vec!["Spring", "Bob"], 1)]
124/// );
125/// # Ok::<(), relmath::NaryRelationError>(())
126/// ```
127#[derive(Clone, Debug, PartialEq, Eq)]
128pub struct GroupedRelation<T: Ord> {
129 key_schema: Vec<String>,
130 member_schema: Vec<String>,
131 groups: BTreeMap<Vec<T>, NaryRelation<T>>,
132}
133
134impl<T: Ord> GroupedRelation<T> {
135 /// Returns the grouping key schema in order.
136 #[must_use]
137 pub fn key_schema(&self) -> &[String] {
138 &self.key_schema
139 }
140
141 /// Returns the schema of each member relation.
142 ///
143 /// In the current exact grouping slice this is the full original relation
144 /// schema.
145 #[must_use]
146 pub fn member_schema(&self) -> &[String] {
147 &self.member_schema
148 }
149
150 /// Returns the number of groups.
151 #[must_use]
152 pub fn len(&self) -> usize {
153 self.groups.len()
154 }
155
156 /// Returns `true` when the grouping contains no groups.
157 #[must_use]
158 pub fn is_empty(&self) -> bool {
159 self.groups.is_empty()
160 }
161
162 /// Returns the member relation for the given grouping key.
163 #[must_use]
164 pub fn group(&self, key: &[T]) -> Option<&NaryRelation<T>> {
165 self.groups.get(key)
166 }
167
168 /// Returns an iterator over groups in deterministic key order.
169 pub fn iter(&self) -> impl Iterator<Item = (&[T], &NaryRelation<T>)> {
170 self.groups
171 .iter()
172 .map(|(key, group)| (key.as_slice(), group))
173 }
174
175 /// Returns the first exact aggregate over the grouped relation: row counts.
176 ///
177 /// Each count is the number of stored rows in the corresponding group after
178 /// the base relation's set semantics have already deduplicated equal rows.
179 /// Results follow the deterministic key order of the grouping.
180 #[must_use]
181 pub fn counts(&self) -> Vec<(Vec<T>, usize)>
182 where
183 T: Clone,
184 {
185 self.groups
186 .iter()
187 .map(|(key, group)| (key.clone(), group.len()))
188 .collect()
189 }
190}
191
192/// A deterministic exact n-ary relation with a named schema.
193///
194/// `NaryRelation<T>` is the first G2 building block for schema-aware
195/// relations. It stores column names explicitly and keeps rows in a `BTreeSet`
196/// so iteration order is deterministic.
197///
198/// In this initial G2 step, all cells in a row share the same value type `T`.
199///
200/// # Examples
201///
202/// ```rust
203/// use relmath::NaryRelation;
204///
205/// let mut passed = NaryRelation::new(["student", "course", "status"])?;
206///
207/// assert_eq!(passed.column_index("course")?, 1);
208/// assert!(passed.insert_row(["Alice", "Math", "passed"])?);
209/// assert!(passed.insert_row(["Alice", "Physics", "passed"])?);
210/// assert!(passed.insert_row(["Bob", "Physics", "in_progress"])?);
211///
212/// let completed = passed.select(|row| row[2] == "passed");
213/// let students = completed.project(["student"])?;
214/// let renamed = students.rename("student", "learner")?;
215///
216/// assert_eq!(passed.arity(), 3);
217/// assert!(completed.contains_row(&["Alice", "Math", "passed"]));
218/// assert_eq!(renamed.schema(), &["learner".to_string()]);
219/// # Ok::<(), relmath::NaryRelationError>(())
220/// ```
221#[derive(Clone, Debug, PartialEq, Eq)]
222pub struct NaryRelation<T: Ord> {
223 schema: Vec<String>,
224 rows: BTreeSet<Vec<T>>,
225}
226
227impl<T: Ord> NaryRelation<T> {
228 /// Creates an empty n-ary relation with the given schema.
229 ///
230 /// Column names must be unique and non-blank.
231 pub fn new<I, S>(schema: I) -> Result<Self, NaryRelationError>
232 where
233 I: IntoIterator<Item = S>,
234 S: Into<String>,
235 {
236 Ok(Self {
237 schema: Self::validate_schema(schema)?,
238 rows: BTreeSet::new(),
239 })
240 }
241
242 /// Creates an n-ary relation from a schema and rows.
243 ///
244 /// Column names must be unique and non-blank, and every row must have the
245 /// same arity as the schema.
246 pub fn from_rows<I, S, R, Row>(schema: I, rows: R) -> Result<Self, NaryRelationError>
247 where
248 I: IntoIterator<Item = S>,
249 S: Into<String>,
250 R: IntoIterator<Item = Row>,
251 Row: IntoIterator<Item = T>,
252 {
253 let schema = Self::validate_schema(schema)?;
254 let arity = schema.len();
255 let mut dedup_rows = BTreeSet::new();
256
257 for row in rows {
258 dedup_rows.insert(Self::collect_row(arity, row)?);
259 }
260
261 Ok(Self {
262 schema,
263 rows: dedup_rows,
264 })
265 }
266
267 /// Creates an n-ary relation from a schema and named rows.
268 ///
269 /// This is the current std-only G2 onboarding boundary. Each input row
270 /// must provide exactly the schema columns by name. The schema order stays
271 /// explicit in the relation rather than being inferred from map iteration.
272 ///
273 /// # Examples
274 ///
275 /// ```rust
276 /// use std::collections::BTreeMap;
277 ///
278 /// use relmath::NaryRelation;
279 ///
280 /// let progress = NaryRelation::from_named_rows(
281 /// ["student", "course", "status"],
282 /// [
283 /// BTreeMap::from([
284 /// ("course", "Math"),
285 /// ("status", "passed"),
286 /// ("student", "Alice"),
287 /// ]),
288 /// BTreeMap::from([
289 /// ("course", "Physics"),
290 /// ("status", "in_progress"),
291 /// ("student", "Bob"),
292 /// ]),
293 /// ],
294 /// )?;
295 ///
296 /// assert_eq!(
297 /// progress.to_rows(),
298 /// vec![
299 /// vec!["Alice", "Math", "passed"],
300 /// vec!["Bob", "Physics", "in_progress"],
301 /// ]
302 /// );
303 /// # Ok::<(), relmath::NaryRelationError>(())
304 /// ```
305 pub fn from_named_rows<I, S, R, K>(schema: I, rows: R) -> Result<Self, NaryRelationError>
306 where
307 I: IntoIterator<Item = S>,
308 S: Into<String>,
309 R: IntoIterator<Item = BTreeMap<K, T>>,
310 K: Into<String> + Ord,
311 {
312 let schema = Self::validate_schema(schema)?;
313 let mut dedup_rows = BTreeSet::new();
314
315 for named_row in rows {
316 dedup_rows.insert(Self::collect_named_row(&schema, named_row)?);
317 }
318
319 Ok(Self {
320 schema,
321 rows: dedup_rows,
322 })
323 }
324
325 /// Returns the schema column names in order.
326 #[must_use]
327 pub fn schema(&self) -> &[String] {
328 &self.schema
329 }
330
331 /// Returns the arity of the relation.
332 #[must_use]
333 pub fn arity(&self) -> usize {
334 self.schema.len()
335 }
336
337 /// Returns the zero-based position of a named column.
338 ///
339 /// # Examples
340 ///
341 /// ```rust
342 /// use relmath::NaryRelation;
343 ///
344 /// let students = NaryRelation::<&str>::new(["student", "course"])?;
345 ///
346 /// assert_eq!(students.column_index("student")?, 0);
347 /// assert_eq!(students.column_index("course")?, 1);
348 /// # Ok::<(), relmath::NaryRelationError>(())
349 /// ```
350 pub fn column_index(&self, column: &str) -> Result<usize, NaryRelationError> {
351 self.schema
352 .iter()
353 .position(|candidate| candidate == column)
354 .ok_or_else(|| NaryRelationError::UnknownColumn {
355 column: column.to_string(),
356 })
357 }
358
359 /// Inserts a row into the relation.
360 ///
361 /// Returns `true` when the row was not already present.
362 pub fn insert_row<I>(&mut self, row: I) -> Result<bool, NaryRelationError>
363 where
364 I: IntoIterator<Item = T>,
365 {
366 Ok(self.rows.insert(Self::collect_row(self.arity(), row)?))
367 }
368
369 /// Returns `true` when the relation contains the given row.
370 #[must_use]
371 pub fn contains_row(&self, row: &[T]) -> bool {
372 self.rows.contains(row)
373 }
374
375 /// Returns an iterator over the stored rows in deterministic order.
376 pub fn iter(&self) -> impl Iterator<Item = &[T]> {
377 self.rows.iter().map(Vec::as_slice)
378 }
379
380 /// Returns the rows as a deterministically ordered vector.
381 #[must_use]
382 pub fn to_rows(&self) -> Vec<Vec<T>>
383 where
384 T: Clone,
385 {
386 self.rows.iter().cloned().collect()
387 }
388
389 /// Exports the relation as named rows.
390 ///
391 /// The returned vector follows the relation's deterministic row order.
392 /// Each row is materialized as a `BTreeMap`, so key lookup is by column
393 /// name rather than schema position. Preserve [`Self::schema`] separately
394 /// when schema order matters to a downstream consumer.
395 ///
396 /// # Examples
397 ///
398 /// ```rust
399 /// use std::collections::BTreeMap;
400 ///
401 /// use relmath::NaryRelation;
402 ///
403 /// let progress = NaryRelation::from_rows(
404 /// ["student", "course"],
405 /// [["Alice", "Math"], ["Bob", "Physics"]],
406 /// )?;
407 ///
408 /// assert_eq!(
409 /// progress.to_named_rows(),
410 /// vec![
411 /// BTreeMap::from([
412 /// ("course".to_string(), "Math"),
413 /// ("student".to_string(), "Alice"),
414 /// ]),
415 /// BTreeMap::from([
416 /// ("course".to_string(), "Physics"),
417 /// ("student".to_string(), "Bob"),
418 /// ]),
419 /// ]
420 /// );
421 /// # Ok::<(), relmath::NaryRelationError>(())
422 /// ```
423 #[must_use]
424 pub fn to_named_rows(&self) -> Vec<BTreeMap<String, T>>
425 where
426 T: Clone,
427 {
428 self.rows
429 .iter()
430 .map(|row| {
431 self.schema
432 .iter()
433 .cloned()
434 .zip(row.iter().cloned())
435 .collect()
436 })
437 .collect()
438 }
439
440 /// Groups the relation by the named key columns.
441 ///
442 /// Group keys follow the exact order of `columns`. Empty key selections,
443 /// duplicate key columns, and unknown columns are rejected by the current
444 /// exact core.
445 ///
446 /// Each member relation keeps the original relation schema and contains the
447 /// subset of rows whose projected key matches the group key. Groups are
448 /// stored in a `BTreeMap`, so group iteration order is deterministic.
449 ///
450 /// The first exact aggregate over the grouped output is
451 /// [`GroupedRelation::counts`], which reports the number of stored rows in
452 /// each group.
453 ///
454 /// # Examples
455 ///
456 /// ```rust
457 /// use relmath::NaryRelation;
458 ///
459 /// let completed = NaryRelation::from_rows(
460 /// ["student", "course", "term"],
461 /// [
462 /// ["Alice", "Math", "Fall"],
463 /// ["Alice", "Physics", "Fall"],
464 /// ["Bob", "Math", "Spring"],
465 /// ],
466 /// )?;
467 ///
468 /// let grouped = completed.group_by(["term", "student"])?;
469 ///
470 /// assert_eq!(
471 /// grouped.key_schema(),
472 /// &["term".to_string(), "student".to_string()]
473 /// );
474 /// assert_eq!(
475 /// grouped.counts(),
476 /// vec![(vec!["Fall", "Alice"], 2), (vec!["Spring", "Bob"], 1)]
477 /// );
478 /// # Ok::<(), relmath::NaryRelationError>(())
479 /// ```
480 pub fn group_by<I, S>(&self, columns: I) -> Result<GroupedRelation<T>, NaryRelationError>
481 where
482 I: IntoIterator<Item = S>,
483 S: AsRef<str>,
484 T: Clone,
485 {
486 let columns: Vec<String> = columns
487 .into_iter()
488 .map(|column| column.as_ref().to_string())
489 .collect();
490 let key_schema = Self::validate_schema(columns)?;
491 let mut indices = Vec::with_capacity(key_schema.len());
492
493 for column in &key_schema {
494 indices.push(self.column_index(column)?);
495 }
496
497 let mut raw_groups: BTreeMap<Vec<T>, BTreeSet<Vec<T>>> = BTreeMap::new();
498 for row in &self.rows {
499 let key = indices.iter().map(|index| row[*index].clone()).collect();
500 raw_groups.entry(key).or_default().insert(row.clone());
501 }
502
503 let member_schema = self.schema.clone();
504 let groups = raw_groups
505 .into_iter()
506 .map(|(key, rows)| {
507 (
508 key,
509 Self {
510 schema: member_schema.clone(),
511 rows,
512 },
513 )
514 })
515 .collect();
516
517 Ok(GroupedRelation {
518 key_schema,
519 member_schema,
520 groups,
521 })
522 }
523
524 /// Returns the subset of rows that satisfy `predicate`.
525 ///
526 /// The output keeps the same schema as `self`. Surviving rows remain in the
527 /// deterministic order induced by the underlying `BTreeSet`.
528 ///
529 /// # Examples
530 ///
531 /// ```rust
532 /// use relmath::NaryRelation;
533 ///
534 /// let progress = NaryRelation::from_rows(
535 /// ["student", "course", "status"],
536 /// [
537 /// ["Alice", "Math", "passed"],
538 /// ["Alice", "Physics", "passed"],
539 /// ["Bob", "Physics", "in_progress"],
540 /// ],
541 /// )?;
542 ///
543 /// let completed = progress.select(|row| row[2] == "passed");
544 ///
545 /// assert_eq!(
546 /// completed.to_rows(),
547 /// vec![
548 /// vec!["Alice", "Math", "passed"],
549 /// vec!["Alice", "Physics", "passed"],
550 /// ]
551 /// );
552 /// # Ok::<(), relmath::NaryRelationError>(())
553 /// ```
554 #[must_use]
555 pub fn select<F>(&self, mut predicate: F) -> Self
556 where
557 F: FnMut(&[T]) -> bool,
558 T: Clone,
559 {
560 let rows = self
561 .rows
562 .iter()
563 .filter(|row| predicate(row))
564 .cloned()
565 .collect();
566
567 Self {
568 schema: self.schema.clone(),
569 rows,
570 }
571 }
572
573 /// Projects the relation onto the named columns in the requested order.
574 ///
575 /// The output schema follows the exact order of `columns`. Duplicate
576 /// column names, unknown columns, and empty projections are rejected by the
577 /// current exact core.
578 ///
579 /// # Examples
580 ///
581 /// ```rust
582 /// use relmath::NaryRelation;
583 ///
584 /// let passed = NaryRelation::from_rows(
585 /// ["student", "course", "status"],
586 /// [
587 /// ["Alice", "Math", "passed"],
588 /// ["Cara", "Math", "passed"],
589 /// ["Alice", "Physics", "passed"],
590 /// ],
591 /// )?;
592 ///
593 /// let projected = passed.project(["course", "student"])?;
594 ///
595 /// assert_eq!(
596 /// projected.schema(),
597 /// &["course".to_string(), "student".to_string()]
598 /// );
599 /// assert_eq!(
600 /// projected.to_rows(),
601 /// vec![
602 /// vec!["Math", "Alice"],
603 /// vec!["Math", "Cara"],
604 /// vec!["Physics", "Alice"],
605 /// ]
606 /// );
607 /// # Ok::<(), relmath::NaryRelationError>(())
608 /// ```
609 pub fn project<I, S>(&self, columns: I) -> Result<Self, NaryRelationError>
610 where
611 I: IntoIterator<Item = S>,
612 S: AsRef<str>,
613 T: Clone,
614 {
615 let columns: Vec<String> = columns
616 .into_iter()
617 .map(|column| column.as_ref().to_string())
618 .collect();
619 let projected_schema = Self::validate_schema(columns)?;
620 let mut indices = Vec::with_capacity(projected_schema.len());
621
622 for column in &projected_schema {
623 indices.push(self.column_index(column)?);
624 }
625
626 let rows = self
627 .rows
628 .iter()
629 .map(|row| indices.iter().map(|index| row[*index].clone()).collect())
630 .collect();
631
632 Ok(Self {
633 schema: projected_schema,
634 rows,
635 })
636 }
637
638 /// Returns a relation with one column renamed.
639 ///
640 /// Renaming a column to its current name is a no-op. The new column name
641 /// must be non-blank and must not duplicate another column in the schema.
642 ///
643 /// # Examples
644 ///
645 /// ```rust
646 /// use relmath::NaryRelation;
647 ///
648 /// let completed = NaryRelation::from_rows(
649 /// ["student", "course"],
650 /// [["Alice", "Math"], ["Bob", "Physics"]],
651 /// )?;
652 ///
653 /// let renamed = completed.rename("student", "learner")?;
654 ///
655 /// assert_eq!(
656 /// renamed.schema(),
657 /// &["learner".to_string(), "course".to_string()]
658 /// );
659 /// assert_eq!(
660 /// renamed.to_rows(),
661 /// vec![vec!["Alice", "Math"], vec!["Bob", "Physics"]]
662 /// );
663 /// # Ok::<(), relmath::NaryRelationError>(())
664 /// ```
665 pub fn rename<S>(&self, from: &str, to: S) -> Result<Self, NaryRelationError>
666 where
667 S: Into<String>,
668 T: Clone,
669 {
670 let to = to.into();
671 let index = self.column_index(from)?;
672
673 Self::validate_column_name(&to, index)?;
674
675 if from != to && self.schema.iter().any(|column| column == &to) {
676 return Err(NaryRelationError::DuplicateColumn { column: to });
677 }
678
679 let mut schema = self.schema.clone();
680 schema[index] = to;
681
682 Ok(Self {
683 schema,
684 rows: self.rows.clone(),
685 })
686 }
687
688 /// Returns the union of `self` and `other` when schemas match exactly.
689 ///
690 /// Schema compatibility for the current exact core means exact schema
691 /// equality, including column order.
692 ///
693 /// # Examples
694 ///
695 /// ```rust
696 /// use relmath::NaryRelation;
697 ///
698 /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
699 /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
700 ///
701 /// let union = left.union(&right)?;
702 ///
703 /// assert_eq!(
704 /// union.to_rows(),
705 /// vec![vec!["Alice"], vec!["Bob"], vec!["Cara"]]
706 /// );
707 /// # Ok::<(), relmath::NaryRelationError>(())
708 /// ```
709 pub fn union(&self, other: &Self) -> Result<Self, NaryRelationError>
710 where
711 T: Clone,
712 {
713 self.ensure_same_schema(other)?;
714
715 Ok(Self {
716 schema: self.schema.clone(),
717 rows: self.rows.union(&other.rows).cloned().collect(),
718 })
719 }
720
721 /// Returns the intersection of `self` and `other` when schemas match
722 /// exactly.
723 ///
724 /// Schema compatibility for the current exact core means exact schema
725 /// equality, including column order.
726 ///
727 /// # Examples
728 ///
729 /// ```rust
730 /// use relmath::NaryRelation;
731 ///
732 /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
733 /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
734 ///
735 /// let intersection = left.intersection(&right)?;
736 ///
737 /// assert_eq!(intersection.to_rows(), vec![vec!["Bob"]]);
738 /// # Ok::<(), relmath::NaryRelationError>(())
739 /// ```
740 pub fn intersection(&self, other: &Self) -> Result<Self, NaryRelationError>
741 where
742 T: Clone,
743 {
744 self.ensure_same_schema(other)?;
745
746 Ok(Self {
747 schema: self.schema.clone(),
748 rows: self.rows.intersection(&other.rows).cloned().collect(),
749 })
750 }
751
752 /// Returns the set difference `self \ other` when schemas match exactly.
753 ///
754 /// Schema compatibility for the current exact core means exact schema
755 /// equality, including column order.
756 ///
757 /// # Examples
758 ///
759 /// ```rust
760 /// use relmath::NaryRelation;
761 ///
762 /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
763 /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
764 ///
765 /// let only_left = left.difference(&right)?;
766 ///
767 /// assert_eq!(only_left.to_rows(), vec![vec!["Alice"]]);
768 /// # Ok::<(), relmath::NaryRelationError>(())
769 /// ```
770 pub fn difference(&self, other: &Self) -> Result<Self, NaryRelationError>
771 where
772 T: Clone,
773 {
774 self.ensure_same_schema(other)?;
775
776 Ok(Self {
777 schema: self.schema.clone(),
778 rows: self.rows.difference(&other.rows).cloned().collect(),
779 })
780 }
781
782 /// Returns the natural join of `self` and `other`.
783 ///
784 /// Shared column names constrain row matching: a pair of rows joins only
785 /// when every shared column has equal values on both sides. When the
786 /// schemas are disjoint, natural join behaves as a cartesian product.
787 ///
788 /// The output schema keeps the entire left-hand schema in order, followed
789 /// by the right-hand columns that are not shared, in their original order.
790 /// Output rows are stored in a `BTreeSet`, so iteration remains
791 /// deterministic. When no rows match, the result is empty but still keeps
792 /// the joined schema.
793 ///
794 /// # Examples
795 ///
796 /// ```rust
797 /// use relmath::NaryRelation;
798 ///
799 /// let completed = NaryRelation::from_rows(
800 /// ["student", "course"],
801 /// [
802 /// ["Alice", "Math"],
803 /// ["Alice", "Physics"],
804 /// ["Bob", "Math"],
805 /// ],
806 /// )?;
807 /// let rooms = NaryRelation::from_rows(
808 /// ["course", "room"],
809 /// [["Math", "R101"], ["Physics", "R201"]],
810 /// )?;
811 ///
812 /// let scheduled = completed.natural_join(&rooms);
813 ///
814 /// assert_eq!(
815 /// scheduled.schema(),
816 /// &["student".to_string(), "course".to_string(), "room".to_string()]
817 /// );
818 /// assert_eq!(
819 /// scheduled.to_rows(),
820 /// vec![
821 /// vec!["Alice", "Math", "R101"],
822 /// vec!["Alice", "Physics", "R201"],
823 /// vec!["Bob", "Math", "R101"],
824 /// ]
825 /// );
826 /// # Ok::<(), relmath::NaryRelationError>(())
827 /// ```
828 pub fn natural_join(&self, other: &Self) -> Self
829 where
830 T: Clone,
831 {
832 let right_index_by_name: BTreeMap<&str, usize> = other
833 .schema
834 .iter()
835 .enumerate()
836 .map(|(index, column)| (column.as_str(), index))
837 .collect();
838
839 let mut shared_indices = Vec::new();
840 let mut shared_right_indices = BTreeSet::new();
841 for (left_index, column) in self.schema.iter().enumerate() {
842 if let Some(&right_index) = right_index_by_name.get(column.as_str()) {
843 shared_indices.push((left_index, right_index));
844 shared_right_indices.insert(right_index);
845 }
846 }
847
848 let mut schema = self.schema.clone();
849 let mut right_only_indices = Vec::new();
850 for (right_index, column) in other.schema.iter().enumerate() {
851 if !shared_right_indices.contains(&right_index) {
852 schema.push(column.clone());
853 right_only_indices.push(right_index);
854 }
855 }
856
857 let mut rows = BTreeSet::new();
858 for left_row in &self.rows {
859 for right_row in &other.rows {
860 if shared_indices.iter().all(|(left_index, right_index)| {
861 left_row[*left_index] == right_row[*right_index]
862 }) {
863 let mut joined_row =
864 Vec::with_capacity(self.arity() + right_only_indices.len());
865 joined_row.extend(left_row.iter().cloned());
866 for right_index in &right_only_indices {
867 joined_row.push(right_row[*right_index].clone());
868 }
869 rows.insert(joined_row);
870 }
871 }
872 }
873
874 Self { schema, rows }
875 }
876
877 fn validate_schema<I, S>(schema: I) -> Result<Vec<String>, NaryRelationError>
878 where
879 I: IntoIterator<Item = S>,
880 S: Into<String>,
881 {
882 let schema: Vec<String> = schema.into_iter().map(Into::into).collect();
883
884 if schema.is_empty() {
885 return Err(NaryRelationError::EmptySchema);
886 }
887
888 let mut seen = BTreeSet::new();
889 for (index, column) in schema.iter().enumerate() {
890 Self::validate_column_name(column, index)?;
891
892 if !seen.insert(column.clone()) {
893 return Err(NaryRelationError::DuplicateColumn {
894 column: column.clone(),
895 });
896 }
897 }
898
899 Ok(schema)
900 }
901
902 fn collect_row<I>(arity: usize, row: I) -> Result<Vec<T>, NaryRelationError>
903 where
904 I: IntoIterator<Item = T>,
905 {
906 let row: Vec<T> = row.into_iter().collect();
907
908 if row.len() != arity {
909 return Err(NaryRelationError::RowArityMismatch {
910 expected: arity,
911 actual: row.len(),
912 });
913 }
914
915 Ok(row)
916 }
917
918 fn collect_named_row<K>(
919 schema: &[String],
920 named_row: BTreeMap<K, T>,
921 ) -> Result<Vec<T>, NaryRelationError>
922 where
923 K: Into<String> + Ord,
924 {
925 let mut normalized_row = BTreeMap::new();
926 for (column, value) in named_row {
927 let column = column.into();
928 if normalized_row.insert(column.clone(), value).is_some() {
929 return Err(NaryRelationError::DuplicateColumn { column });
930 }
931 }
932
933 let mut row = Vec::with_capacity(schema.len());
934 for column in schema {
935 row.push(normalized_row.remove(column.as_str()).ok_or_else(|| {
936 NaryRelationError::MissingColumn {
937 column: column.clone(),
938 }
939 })?);
940 }
941
942 if let Some((column, _)) = normalized_row.into_iter().next() {
943 return Err(NaryRelationError::UnexpectedColumn { column });
944 }
945
946 Ok(row)
947 }
948
949 fn validate_column_name(column: &str, index: usize) -> Result<(), NaryRelationError> {
950 if column.trim().is_empty() {
951 Err(NaryRelationError::BlankColumnName { index })
952 } else {
953 Ok(())
954 }
955 }
956
957 fn ensure_same_schema(&self, other: &Self) -> Result<(), NaryRelationError> {
958 if self.schema == other.schema {
959 Ok(())
960 } else {
961 Err(NaryRelationError::SchemaMismatch {
962 left: self.schema.clone(),
963 right: other.schema.clone(),
964 })
965 }
966 }
967}
968
969impl<T: Ord> FiniteRelation for NaryRelation<T> {
970 fn len(&self) -> usize {
971 self.rows.len()
972 }
973}