relmath/core/nary.rs
1//! Deterministic exact n-ary relation foundations.
2
3use std::{
4 collections::{BTreeMap, BTreeSet},
5 error::Error,
6 fmt,
7};
8
9use crate::traits::FiniteRelation;
10
11/// Errors returned by schema-aware n-ary relation operations.
12#[derive(Clone, Debug, PartialEq, Eq)]
13pub enum NaryRelationError {
14 /// The provided schema has no columns.
15 EmptySchema,
16 /// A column name is blank or only whitespace.
17 BlankColumnName {
18 /// The zero-based schema position with the blank name.
19 index: usize,
20 },
21 /// A column name is repeated where uniqueness is required.
22 DuplicateColumn {
23 /// The repeated column name.
24 column: String,
25 },
26 /// A row does not match the relation arity.
27 RowArityMismatch {
28 /// The schema arity expected by the relation.
29 expected: usize,
30 /// The number of values that actually appeared in the row.
31 actual: usize,
32 },
33 /// An operation referenced a column that is not present in the schema.
34 UnknownColumn {
35 /// The missing column name.
36 column: String,
37 },
38 /// A named-row input omitted a column required by the schema.
39 MissingColumn {
40 /// The missing column name.
41 column: String,
42 },
43 /// A named-row input provided a column that is not in the schema.
44 UnexpectedColumn {
45 /// The unexpected column name.
46 column: String,
47 },
48 /// An operation requires identical schemas but received different ones.
49 SchemaMismatch {
50 /// The left-hand schema.
51 left: Vec<String>,
52 /// The right-hand schema.
53 right: Vec<String>,
54 },
55}
56
57impl fmt::Display for NaryRelationError {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 match self {
60 Self::EmptySchema => f.write_str("n-ary relation schema cannot be empty"),
61 Self::BlankColumnName { index } => {
62 write!(f, "column at index {index} must have a non-blank name")
63 }
64 Self::DuplicateColumn { column } => {
65 write!(f, "column `{column}` appears more than once")
66 }
67 Self::RowArityMismatch { expected, actual } => write!(
68 f,
69 "row arity mismatch: expected {expected} values but found {actual}"
70 ),
71 Self::UnknownColumn { column } => {
72 write!(f, "column `{column}` is not present in the schema")
73 }
74 Self::MissingColumn { column } => {
75 write!(f, "named row is missing required column `{column}`")
76 }
77 Self::UnexpectedColumn { column } => {
78 write!(f, "named row contains unexpected column `{column}`")
79 }
80 Self::SchemaMismatch { left, right } => {
81 write!(f, "schema mismatch: left = {:?}, right = {:?}", left, right)
82 }
83 }
84 }
85}
86
87impl Error for NaryRelationError {}
88
89/// A deterministic exact grouping of an n-ary relation by named key columns.
90///
91/// `GroupedRelation<T>` is the first G2 grouping surface for exact n-ary
92/// relations. Groups are keyed by projected key rows and stored in a
93/// `BTreeMap`, so group iteration order is deterministic.
94///
95/// In this first exact grouping slice, each grouped member relation keeps the
96/// original relation schema rather than removing key columns. This avoids
97/// introducing zero-ary relation semantics while keeping the grouped output
98/// relation-first and easy to inspect.
99///
100/// The first exact aggregate on grouped output is [`Self::counts`], which
101/// returns the number of stored rows in each group.
102///
103/// # Examples
104///
105/// ```rust
106/// use relmath::NaryRelation;
107///
108/// let completed = NaryRelation::from_rows(
109/// ["student", "course", "term"],
110/// [
111/// ["Alice", "Math", "Fall"],
112/// ["Alice", "Physics", "Fall"],
113/// ["Bob", "Math", "Spring"],
114/// ],
115/// )?;
116///
117/// let grouped = completed.group_by(["term", "student"])?;
118///
119/// assert_eq!(
120/// grouped.key_schema(),
121/// &["term".to_string(), "student".to_string()]
122/// );
123/// assert_eq!(
124/// grouped.counts(),
125/// vec![(vec!["Fall", "Alice"], 2), (vec!["Spring", "Bob"], 1)]
126/// );
127/// # Ok::<(), relmath::NaryRelationError>(())
128/// ```
129#[derive(Clone, Debug, PartialEq, Eq)]
130pub struct GroupedRelation<T: Ord> {
131 key_schema: Vec<String>,
132 member_schema: Vec<String>,
133 groups: BTreeMap<Vec<T>, NaryRelation<T>>,
134}
135
136impl<T: Ord> GroupedRelation<T> {
137 /// Returns the grouping key schema in order.
138 #[must_use]
139 pub fn key_schema(&self) -> &[String] {
140 &self.key_schema
141 }
142
143 /// Returns the schema of each member relation.
144 ///
145 /// In the current exact grouping slice this is the full original relation
146 /// schema.
147 #[must_use]
148 pub fn member_schema(&self) -> &[String] {
149 &self.member_schema
150 }
151
152 /// Returns the number of groups.
153 #[must_use]
154 pub fn len(&self) -> usize {
155 self.groups.len()
156 }
157
158 /// Returns `true` when the grouping contains no groups.
159 #[must_use]
160 pub fn is_empty(&self) -> bool {
161 self.groups.is_empty()
162 }
163
164 /// Returns the member relation for the given grouping key.
165 #[must_use]
166 pub fn group(&self, key: &[T]) -> Option<&NaryRelation<T>> {
167 self.groups.get(key)
168 }
169
170 /// Returns an iterator over groups in deterministic key order.
171 pub fn iter(&self) -> impl Iterator<Item = (&[T], &NaryRelation<T>)> {
172 self.groups
173 .iter()
174 .map(|(key, group)| (key.as_slice(), group))
175 }
176
177 /// Returns the first exact aggregate over the grouped relation: row counts.
178 ///
179 /// Each count is the number of stored rows in the corresponding group after
180 /// the base relation's set semantics have already deduplicated equal rows.
181 /// Results follow the deterministic key order of the grouping.
182 #[must_use]
183 pub fn counts(&self) -> Vec<(Vec<T>, usize)>
184 where
185 T: Clone,
186 {
187 self.groups
188 .iter()
189 .map(|(key, group)| (key.clone(), group.len()))
190 .collect()
191 }
192}
193
194/// A deterministic exact n-ary relation with a named schema.
195///
196/// `NaryRelation<T>` is the first G2 building block for schema-aware
197/// relations. It stores column names explicitly and keeps rows in a `BTreeSet`
198/// so iteration order is deterministic.
199///
200/// In this initial G2 step, all cells in a row share the same value type `T`.
201///
202/// # Examples
203///
204/// ```rust
205/// use relmath::NaryRelation;
206///
207/// let mut passed = NaryRelation::new(["student", "course", "status"])?;
208///
209/// assert_eq!(passed.column_index("course")?, 1);
210/// assert!(passed.insert_row(["Alice", "Math", "passed"])?);
211/// assert!(passed.insert_row(["Alice", "Physics", "passed"])?);
212/// assert!(passed.insert_row(["Bob", "Physics", "in_progress"])?);
213///
214/// let completed = passed.select(|row| row[2] == "passed");
215/// let students = completed.project(["student"])?;
216/// let renamed = students.rename("student", "learner")?;
217///
218/// assert_eq!(passed.arity(), 3);
219/// assert!(completed.contains_row(&["Alice", "Math", "passed"]));
220/// assert_eq!(renamed.schema(), &["learner".to_string()]);
221/// # Ok::<(), relmath::NaryRelationError>(())
222/// ```
223#[derive(Clone, Debug, PartialEq, Eq)]
224pub struct NaryRelation<T: Ord> {
225 schema: Vec<String>,
226 rows: BTreeSet<Vec<T>>,
227}
228
229impl<T: Ord> NaryRelation<T> {
230 /// Creates an empty n-ary relation with the given schema.
231 ///
232 /// Column names must be unique and non-blank.
233 pub fn new<I, S>(schema: I) -> Result<Self, NaryRelationError>
234 where
235 I: IntoIterator<Item = S>,
236 S: Into<String>,
237 {
238 Ok(Self {
239 schema: Self::validate_schema(schema)?,
240 rows: BTreeSet::new(),
241 })
242 }
243
244 /// Creates an n-ary relation from a schema and rows.
245 ///
246 /// Column names must be unique and non-blank, and every row must have the
247 /// same arity as the schema.
248 pub fn from_rows<I, S, R, Row>(schema: I, rows: R) -> Result<Self, NaryRelationError>
249 where
250 I: IntoIterator<Item = S>,
251 S: Into<String>,
252 R: IntoIterator<Item = Row>,
253 Row: IntoIterator<Item = T>,
254 {
255 let schema = Self::validate_schema(schema)?;
256 let arity = schema.len();
257 let mut dedup_rows = BTreeSet::new();
258
259 for row in rows {
260 dedup_rows.insert(Self::collect_row(arity, row)?);
261 }
262
263 Ok(Self {
264 schema,
265 rows: dedup_rows,
266 })
267 }
268
269 /// Creates an n-ary relation from a schema and named rows.
270 ///
271 /// This is the current std-only G2 onboarding boundary. Each input row
272 /// must provide exactly the schema columns by name. The schema order stays
273 /// explicit in the relation rather than being inferred from map iteration.
274 ///
275 /// # Examples
276 ///
277 /// ```rust
278 /// use std::collections::BTreeMap;
279 ///
280 /// use relmath::NaryRelation;
281 ///
282 /// let progress = NaryRelation::from_named_rows(
283 /// ["student", "course", "status"],
284 /// [
285 /// BTreeMap::from([
286 /// ("course", "Math"),
287 /// ("status", "passed"),
288 /// ("student", "Alice"),
289 /// ]),
290 /// BTreeMap::from([
291 /// ("course", "Physics"),
292 /// ("status", "in_progress"),
293 /// ("student", "Bob"),
294 /// ]),
295 /// ],
296 /// )?;
297 ///
298 /// assert_eq!(
299 /// progress.to_rows(),
300 /// vec![
301 /// vec!["Alice", "Math", "passed"],
302 /// vec!["Bob", "Physics", "in_progress"],
303 /// ]
304 /// );
305 /// # Ok::<(), relmath::NaryRelationError>(())
306 /// ```
307 pub fn from_named_rows<I, S, R, K>(schema: I, rows: R) -> Result<Self, NaryRelationError>
308 where
309 I: IntoIterator<Item = S>,
310 S: Into<String>,
311 R: IntoIterator<Item = BTreeMap<K, T>>,
312 K: Into<String> + Ord,
313 {
314 let schema = Self::validate_schema(schema)?;
315 let mut dedup_rows = BTreeSet::new();
316
317 for named_row in rows {
318 dedup_rows.insert(Self::collect_named_row(&schema, named_row)?);
319 }
320
321 Ok(Self {
322 schema,
323 rows: dedup_rows,
324 })
325 }
326
327 /// Returns the schema column names in order.
328 #[must_use]
329 pub fn schema(&self) -> &[String] {
330 &self.schema
331 }
332
333 /// Returns the arity of the relation.
334 #[must_use]
335 pub fn arity(&self) -> usize {
336 self.schema.len()
337 }
338
339 /// Returns the zero-based position of a named column.
340 ///
341 /// # Examples
342 ///
343 /// ```rust
344 /// use relmath::NaryRelation;
345 ///
346 /// let students = NaryRelation::<&str>::new(["student", "course"])?;
347 ///
348 /// assert_eq!(students.column_index("student")?, 0);
349 /// assert_eq!(students.column_index("course")?, 1);
350 /// # Ok::<(), relmath::NaryRelationError>(())
351 /// ```
352 pub fn column_index(&self, column: &str) -> Result<usize, NaryRelationError> {
353 self.schema
354 .iter()
355 .position(|candidate| candidate == column)
356 .ok_or_else(|| NaryRelationError::UnknownColumn {
357 column: column.to_string(),
358 })
359 }
360
361 /// Inserts a row into the relation.
362 ///
363 /// Returns `true` when the row was not already present.
364 pub fn insert_row<I>(&mut self, row: I) -> Result<bool, NaryRelationError>
365 where
366 I: IntoIterator<Item = T>,
367 {
368 Ok(self.rows.insert(Self::collect_row(self.arity(), row)?))
369 }
370
371 /// Returns `true` when the relation contains the given row.
372 #[must_use]
373 pub fn contains_row(&self, row: &[T]) -> bool {
374 self.rows.contains(row)
375 }
376
377 /// Returns an iterator over the stored rows in deterministic order.
378 pub fn iter(&self) -> impl Iterator<Item = &[T]> {
379 self.rows.iter().map(Vec::as_slice)
380 }
381
382 /// Returns the rows as a deterministically ordered vector.
383 #[must_use]
384 pub fn to_rows(&self) -> Vec<Vec<T>>
385 where
386 T: Clone,
387 {
388 self.rows.iter().cloned().collect()
389 }
390
391 /// Exports the relation as named rows.
392 ///
393 /// The returned vector follows the relation's deterministic row order.
394 /// Each row is materialized as a `BTreeMap`, so key lookup is by column
395 /// name rather than schema position. Preserve [`Self::schema`] separately
396 /// when schema order matters to a downstream consumer.
397 ///
398 /// # Examples
399 ///
400 /// ```rust
401 /// use std::collections::BTreeMap;
402 ///
403 /// use relmath::NaryRelation;
404 ///
405 /// let progress = NaryRelation::from_rows(
406 /// ["student", "course"],
407 /// [["Alice", "Math"], ["Bob", "Physics"]],
408 /// )?;
409 ///
410 /// assert_eq!(
411 /// progress.to_named_rows(),
412 /// vec![
413 /// BTreeMap::from([
414 /// ("course".to_string(), "Math"),
415 /// ("student".to_string(), "Alice"),
416 /// ]),
417 /// BTreeMap::from([
418 /// ("course".to_string(), "Physics"),
419 /// ("student".to_string(), "Bob"),
420 /// ]),
421 /// ]
422 /// );
423 /// # Ok::<(), relmath::NaryRelationError>(())
424 /// ```
425 #[must_use]
426 pub fn to_named_rows(&self) -> Vec<BTreeMap<String, T>>
427 where
428 T: Clone,
429 {
430 self.rows
431 .iter()
432 .map(|row| {
433 self.schema
434 .iter()
435 .cloned()
436 .zip(row.iter().cloned())
437 .collect()
438 })
439 .collect()
440 }
441
442 /// Groups the relation by the named key columns.
443 ///
444 /// Group keys follow the exact order of `columns`. Empty key selections,
445 /// duplicate key columns, and unknown columns are rejected by the current
446 /// exact core.
447 ///
448 /// Each member relation keeps the original relation schema and contains the
449 /// subset of rows whose projected key matches the group key. Groups are
450 /// stored in a `BTreeMap`, so group iteration order is deterministic.
451 ///
452 /// The first exact aggregate over the grouped output is
453 /// [`GroupedRelation::counts`], which reports the number of stored rows in
454 /// each group.
455 ///
456 /// # Examples
457 ///
458 /// ```rust
459 /// use relmath::NaryRelation;
460 ///
461 /// let completed = NaryRelation::from_rows(
462 /// ["student", "course", "term"],
463 /// [
464 /// ["Alice", "Math", "Fall"],
465 /// ["Alice", "Physics", "Fall"],
466 /// ["Bob", "Math", "Spring"],
467 /// ],
468 /// )?;
469 ///
470 /// let grouped = completed.group_by(["term", "student"])?;
471 ///
472 /// assert_eq!(
473 /// grouped.key_schema(),
474 /// &["term".to_string(), "student".to_string()]
475 /// );
476 /// assert_eq!(
477 /// grouped.counts(),
478 /// vec![(vec!["Fall", "Alice"], 2), (vec!["Spring", "Bob"], 1)]
479 /// );
480 /// # Ok::<(), relmath::NaryRelationError>(())
481 /// ```
482 pub fn group_by<I, S>(&self, columns: I) -> Result<GroupedRelation<T>, NaryRelationError>
483 where
484 I: IntoIterator<Item = S>,
485 S: AsRef<str>,
486 T: Clone,
487 {
488 let columns: Vec<String> = columns
489 .into_iter()
490 .map(|column| column.as_ref().to_string())
491 .collect();
492 let key_schema = Self::validate_schema(columns)?;
493 let mut indices = Vec::with_capacity(key_schema.len());
494
495 for column in &key_schema {
496 indices.push(self.column_index(column)?);
497 }
498
499 let mut raw_groups: BTreeMap<Vec<T>, BTreeSet<Vec<T>>> = BTreeMap::new();
500 for row in &self.rows {
501 let key = indices.iter().map(|index| row[*index].clone()).collect();
502 raw_groups.entry(key).or_default().insert(row.clone());
503 }
504
505 let member_schema = self.schema.clone();
506 let groups = raw_groups
507 .into_iter()
508 .map(|(key, rows)| {
509 (
510 key,
511 Self {
512 schema: member_schema.clone(),
513 rows,
514 },
515 )
516 })
517 .collect();
518
519 Ok(GroupedRelation {
520 key_schema,
521 member_schema,
522 groups,
523 })
524 }
525
526 /// Returns the subset of rows that satisfy `predicate`.
527 ///
528 /// The output keeps the same schema as `self`. Surviving rows remain in the
529 /// deterministic order induced by the underlying `BTreeSet`.
530 ///
531 /// # Examples
532 ///
533 /// ```rust
534 /// use relmath::NaryRelation;
535 ///
536 /// let progress = NaryRelation::from_rows(
537 /// ["student", "course", "status"],
538 /// [
539 /// ["Alice", "Math", "passed"],
540 /// ["Alice", "Physics", "passed"],
541 /// ["Bob", "Physics", "in_progress"],
542 /// ],
543 /// )?;
544 ///
545 /// let completed = progress.select(|row| row[2] == "passed");
546 ///
547 /// assert_eq!(
548 /// completed.to_rows(),
549 /// vec![
550 /// vec!["Alice", "Math", "passed"],
551 /// vec!["Alice", "Physics", "passed"],
552 /// ]
553 /// );
554 /// # Ok::<(), relmath::NaryRelationError>(())
555 /// ```
556 #[must_use]
557 pub fn select<F>(&self, mut predicate: F) -> Self
558 where
559 F: FnMut(&[T]) -> bool,
560 T: Clone,
561 {
562 let rows = self
563 .rows
564 .iter()
565 .filter(|row| predicate(row))
566 .cloned()
567 .collect();
568
569 Self {
570 schema: self.schema.clone(),
571 rows,
572 }
573 }
574
575 /// Projects the relation onto the named columns in the requested order.
576 ///
577 /// The output schema follows the exact order of `columns`. Duplicate
578 /// column names, unknown columns, and empty projections are rejected by the
579 /// current exact core.
580 ///
581 /// # Examples
582 ///
583 /// ```rust
584 /// use relmath::NaryRelation;
585 ///
586 /// let passed = NaryRelation::from_rows(
587 /// ["student", "course", "status"],
588 /// [
589 /// ["Alice", "Math", "passed"],
590 /// ["Cara", "Math", "passed"],
591 /// ["Alice", "Physics", "passed"],
592 /// ],
593 /// )?;
594 ///
595 /// let projected = passed.project(["course", "student"])?;
596 ///
597 /// assert_eq!(
598 /// projected.schema(),
599 /// &["course".to_string(), "student".to_string()]
600 /// );
601 /// assert_eq!(
602 /// projected.to_rows(),
603 /// vec![
604 /// vec!["Math", "Alice"],
605 /// vec!["Math", "Cara"],
606 /// vec!["Physics", "Alice"],
607 /// ]
608 /// );
609 /// # Ok::<(), relmath::NaryRelationError>(())
610 /// ```
611 pub fn project<I, S>(&self, columns: I) -> Result<Self, NaryRelationError>
612 where
613 I: IntoIterator<Item = S>,
614 S: AsRef<str>,
615 T: Clone,
616 {
617 let columns: Vec<String> = columns
618 .into_iter()
619 .map(|column| column.as_ref().to_string())
620 .collect();
621 let projected_schema = Self::validate_schema(columns)?;
622 let mut indices = Vec::with_capacity(projected_schema.len());
623
624 for column in &projected_schema {
625 indices.push(self.column_index(column)?);
626 }
627
628 let rows = self
629 .rows
630 .iter()
631 .map(|row| indices.iter().map(|index| row[*index].clone()).collect())
632 .collect();
633
634 Ok(Self {
635 schema: projected_schema,
636 rows,
637 })
638 }
639
640 /// Returns a relation with one column renamed.
641 ///
642 /// Renaming a column to its current name is a no-op. The new column name
643 /// must be non-blank and must not duplicate another column in the schema.
644 ///
645 /// # Examples
646 ///
647 /// ```rust
648 /// use relmath::NaryRelation;
649 ///
650 /// let completed = NaryRelation::from_rows(
651 /// ["student", "course"],
652 /// [["Alice", "Math"], ["Bob", "Physics"]],
653 /// )?;
654 ///
655 /// let renamed = completed.rename("student", "learner")?;
656 ///
657 /// assert_eq!(
658 /// renamed.schema(),
659 /// &["learner".to_string(), "course".to_string()]
660 /// );
661 /// assert_eq!(
662 /// renamed.to_rows(),
663 /// vec![vec!["Alice", "Math"], vec!["Bob", "Physics"]]
664 /// );
665 /// # Ok::<(), relmath::NaryRelationError>(())
666 /// ```
667 pub fn rename<S>(&self, from: &str, to: S) -> Result<Self, NaryRelationError>
668 where
669 S: Into<String>,
670 T: Clone,
671 {
672 let to = to.into();
673 let index = self.column_index(from)?;
674
675 Self::validate_column_name(&to, index)?;
676
677 if from != to && self.schema.iter().any(|column| column == &to) {
678 return Err(NaryRelationError::DuplicateColumn { column: to });
679 }
680
681 let mut schema = self.schema.clone();
682 schema[index] = to;
683
684 Ok(Self {
685 schema,
686 rows: self.rows.clone(),
687 })
688 }
689
690 /// Returns the union of `self` and `other` when schemas match exactly.
691 ///
692 /// Schema compatibility for the current exact core means exact schema
693 /// equality, including column order.
694 ///
695 /// # Examples
696 ///
697 /// ```rust
698 /// use relmath::NaryRelation;
699 ///
700 /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
701 /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
702 ///
703 /// let union = left.union(&right)?;
704 ///
705 /// assert_eq!(
706 /// union.to_rows(),
707 /// vec![vec!["Alice"], vec!["Bob"], vec!["Cara"]]
708 /// );
709 /// # Ok::<(), relmath::NaryRelationError>(())
710 /// ```
711 pub fn union(&self, other: &Self) -> Result<Self, NaryRelationError>
712 where
713 T: Clone,
714 {
715 self.ensure_same_schema(other)?;
716
717 Ok(Self {
718 schema: self.schema.clone(),
719 rows: self.rows.union(&other.rows).cloned().collect(),
720 })
721 }
722
723 /// Returns the intersection of `self` and `other` when schemas match
724 /// exactly.
725 ///
726 /// Schema compatibility for the current exact core means exact schema
727 /// equality, including column order.
728 ///
729 /// # Examples
730 ///
731 /// ```rust
732 /// use relmath::NaryRelation;
733 ///
734 /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
735 /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
736 ///
737 /// let intersection = left.intersection(&right)?;
738 ///
739 /// assert_eq!(intersection.to_rows(), vec![vec!["Bob"]]);
740 /// # Ok::<(), relmath::NaryRelationError>(())
741 /// ```
742 pub fn intersection(&self, other: &Self) -> Result<Self, NaryRelationError>
743 where
744 T: Clone,
745 {
746 self.ensure_same_schema(other)?;
747
748 Ok(Self {
749 schema: self.schema.clone(),
750 rows: self.rows.intersection(&other.rows).cloned().collect(),
751 })
752 }
753
754 /// Returns the set difference `self \ other` when schemas match exactly.
755 ///
756 /// Schema compatibility for the current exact core means exact schema
757 /// equality, including column order.
758 ///
759 /// # Examples
760 ///
761 /// ```rust
762 /// use relmath::NaryRelation;
763 ///
764 /// let left = NaryRelation::from_rows(["student"], [["Alice"], ["Bob"]])?;
765 /// let right = NaryRelation::from_rows(["student"], [["Bob"], ["Cara"]])?;
766 ///
767 /// let only_left = left.difference(&right)?;
768 ///
769 /// assert_eq!(only_left.to_rows(), vec![vec!["Alice"]]);
770 /// # Ok::<(), relmath::NaryRelationError>(())
771 /// ```
772 pub fn difference(&self, other: &Self) -> Result<Self, NaryRelationError>
773 where
774 T: Clone,
775 {
776 self.ensure_same_schema(other)?;
777
778 Ok(Self {
779 schema: self.schema.clone(),
780 rows: self.rows.difference(&other.rows).cloned().collect(),
781 })
782 }
783
784 /// Returns the natural join of `self` and `other`.
785 ///
786 /// Shared column names constrain row matching: a pair of rows joins only
787 /// when every shared column has equal values on both sides. When the
788 /// schemas are disjoint, natural join behaves as a cartesian product.
789 ///
790 /// The output schema keeps the entire left-hand schema in order, followed
791 /// by the right-hand columns that are not shared, in their original order.
792 /// Output rows are stored in a `BTreeSet`, so iteration remains
793 /// deterministic. When no rows match, the result is empty but still keeps
794 /// the joined schema.
795 ///
796 /// # Examples
797 ///
798 /// ```rust
799 /// use relmath::NaryRelation;
800 ///
801 /// let completed = NaryRelation::from_rows(
802 /// ["student", "course"],
803 /// [
804 /// ["Alice", "Math"],
805 /// ["Alice", "Physics"],
806 /// ["Bob", "Math"],
807 /// ],
808 /// )?;
809 /// let rooms = NaryRelation::from_rows(
810 /// ["course", "room"],
811 /// [["Math", "R101"], ["Physics", "R201"]],
812 /// )?;
813 ///
814 /// let scheduled = completed.natural_join(&rooms);
815 ///
816 /// assert_eq!(
817 /// scheduled.schema(),
818 /// &["student".to_string(), "course".to_string(), "room".to_string()]
819 /// );
820 /// assert_eq!(
821 /// scheduled.to_rows(),
822 /// vec![
823 /// vec!["Alice", "Math", "R101"],
824 /// vec!["Alice", "Physics", "R201"],
825 /// vec!["Bob", "Math", "R101"],
826 /// ]
827 /// );
828 /// # Ok::<(), relmath::NaryRelationError>(())
829 /// ```
830 pub fn natural_join(&self, other: &Self) -> Self
831 where
832 T: Clone,
833 {
834 let right_index_by_name: BTreeMap<&str, usize> = other
835 .schema
836 .iter()
837 .enumerate()
838 .map(|(index, column)| (column.as_str(), index))
839 .collect();
840
841 let mut shared_indices = Vec::new();
842 let mut shared_right_indices = BTreeSet::new();
843 for (left_index, column) in self.schema.iter().enumerate() {
844 if let Some(&right_index) = right_index_by_name.get(column.as_str()) {
845 shared_indices.push((left_index, right_index));
846 shared_right_indices.insert(right_index);
847 }
848 }
849
850 let mut schema = self.schema.clone();
851 let mut right_only_indices = Vec::new();
852 for (right_index, column) in other.schema.iter().enumerate() {
853 if !shared_right_indices.contains(&right_index) {
854 schema.push(column.clone());
855 right_only_indices.push(right_index);
856 }
857 }
858
859 let mut rows = BTreeSet::new();
860 for left_row in &self.rows {
861 for right_row in &other.rows {
862 if shared_indices.iter().all(|(left_index, right_index)| {
863 left_row[*left_index] == right_row[*right_index]
864 }) {
865 let mut joined_row =
866 Vec::with_capacity(self.arity() + right_only_indices.len());
867 joined_row.extend(left_row.iter().cloned());
868 for right_index in &right_only_indices {
869 joined_row.push(right_row[*right_index].clone());
870 }
871 rows.insert(joined_row);
872 }
873 }
874 }
875
876 Self { schema, rows }
877 }
878
879 fn validate_schema<I, S>(schema: I) -> Result<Vec<String>, NaryRelationError>
880 where
881 I: IntoIterator<Item = S>,
882 S: Into<String>,
883 {
884 let schema: Vec<String> = schema.into_iter().map(Into::into).collect();
885
886 if schema.is_empty() {
887 return Err(NaryRelationError::EmptySchema);
888 }
889
890 let mut seen = BTreeSet::new();
891 for (index, column) in schema.iter().enumerate() {
892 Self::validate_column_name(column, index)?;
893
894 if !seen.insert(column.clone()) {
895 return Err(NaryRelationError::DuplicateColumn {
896 column: column.clone(),
897 });
898 }
899 }
900
901 Ok(schema)
902 }
903
904 fn collect_row<I>(arity: usize, row: I) -> Result<Vec<T>, NaryRelationError>
905 where
906 I: IntoIterator<Item = T>,
907 {
908 let row: Vec<T> = row.into_iter().collect();
909
910 if row.len() != arity {
911 return Err(NaryRelationError::RowArityMismatch {
912 expected: arity,
913 actual: row.len(),
914 });
915 }
916
917 Ok(row)
918 }
919
920 fn collect_named_row<K>(
921 schema: &[String],
922 named_row: BTreeMap<K, T>,
923 ) -> Result<Vec<T>, NaryRelationError>
924 where
925 K: Into<String> + Ord,
926 {
927 let mut normalized_row = BTreeMap::new();
928 for (column, value) in named_row {
929 let column = column.into();
930 if normalized_row.insert(column.clone(), value).is_some() {
931 return Err(NaryRelationError::DuplicateColumn { column });
932 }
933 }
934
935 let mut row = Vec::with_capacity(schema.len());
936 for column in schema {
937 row.push(normalized_row.remove(column.as_str()).ok_or_else(|| {
938 NaryRelationError::MissingColumn {
939 column: column.clone(),
940 }
941 })?);
942 }
943
944 if let Some((column, _)) = normalized_row.into_iter().next() {
945 return Err(NaryRelationError::UnexpectedColumn { column });
946 }
947
948 Ok(row)
949 }
950
951 fn validate_column_name(column: &str, index: usize) -> Result<(), NaryRelationError> {
952 if column.trim().is_empty() {
953 Err(NaryRelationError::BlankColumnName { index })
954 } else {
955 Ok(())
956 }
957 }
958
959 fn ensure_same_schema(&self, other: &Self) -> Result<(), NaryRelationError> {
960 if self.schema == other.schema {
961 Ok(())
962 } else {
963 Err(NaryRelationError::SchemaMismatch {
964 left: self.schema.clone(),
965 right: other.schema.clone(),
966 })
967 }
968 }
969}
970
971impl<T: Ord> FiniteRelation for NaryRelation<T> {
972 fn len(&self) -> usize {
973 self.rows.len()
974 }
975}