substrait_validator/parse/relations/
set.rs1use std::sync::Arc;
12
13use crate::input::proto::substrait;
14use crate::output::diagnostic;
15use crate::parse::context;
16use crate::parse::types;
17
18enum Operation {
19 Invalid,
20 Subtract,
21 SubtractByUnion,
22 SubtractByIntersection,
23 Intersect,
24 IntersectWithUnion,
25 Union,
26 Merge,
27}
28
29pub fn parse_set_rel(x: &substrait::SetRel, y: &mut context::Context) -> diagnostic::Result<()> {
31 use substrait::set_rel::SetOp;
32
33 let in_types: Vec<_> = handle_rel_inputs!(x, y).collect();
35
36 if in_types.len() < 2 {
38 diagnostic!(
39 y,
40 Error,
41 RelationMissing,
42 "set operations require at least two input relations"
43 );
44 }
45 let mut schema = Arc::default();
46 for in_type in in_types.iter() {
47 schema = types::assert_equal(
48 y,
49 &in_type.strip_field_names(),
50 &schema,
51 "all set inputs must have matching schemas",
52 );
53 }
54 y.set_schema(schema);
55
56 let op = proto_required_enum_field!(x, y, op, SetOp)
58 .1
59 .unwrap_or_default();
60 let op = match (op, in_types.len() > 2) {
61 (SetOp::Unspecified, _) => Operation::Invalid,
62 (SetOp::MinusPrimary, true) => Operation::SubtractByUnion,
63 (SetOp::MinusPrimary, false) => Operation::Subtract,
64 (SetOp::MinusMultiset, true) => Operation::SubtractByIntersection,
65 (SetOp::MinusMultiset, false) => Operation::Subtract,
66 (SetOp::IntersectionPrimary, true) => Operation::IntersectWithUnion,
67 (SetOp::IntersectionPrimary, false) => Operation::Intersect,
68 (SetOp::IntersectionMultiset, _) => Operation::Intersect,
69 (SetOp::UnionDistinct, _) => Operation::Union,
70 (SetOp::UnionAll, _) => Operation::Merge,
71 (SetOp::MinusPrimaryAll, _) | (SetOp::IntersectionMultisetAll, _) => {
72 diagnostic!(
73 y,
74 Warning,
75 NotYetImplemented,
76 "Set variant {:?} not yet supported",
77 op
78 );
79
80 handle_rel_common!(x, y);
81 handle_advanced_extension!(x, y);
82 return Ok(());
83 }
84 };
85
86 match op {
88 Operation::Invalid => {
89 describe!(y, Relation, "Invalid set operation");
90 }
91 Operation::Subtract => {
92 describe!(y, Relation, "Set subtraction");
93 summary!(
94 y,
95 "Yields all rows from the first dataset that do not exist \
96 in the second dataset."
97 );
98 }
99 Operation::SubtractByUnion => {
100 describe!(y, Relation, "Set subtract by union");
101 summary!(
102 y,
103 "Yields all rows from the first dataset that do not exist \
104 in any of the other datasets."
105 );
106 }
107 Operation::SubtractByIntersection => {
108 describe!(y, Relation, "Set subtract by intersection");
109 summary!(
110 y,
111 "Yields all rows from the first dataset that do not exist in \
112 all of the other datasets."
113 );
114 }
115 Operation::Intersect => {
116 describe!(y, Relation, "Set intersection");
117 summary!(
118 y,
119 "Yields all rows from the first dataset that exist in all \
120 datasets."
121 );
122 }
123 Operation::IntersectWithUnion => {
124 describe!(y, Relation, "Set intersect with union");
125 summary!(
126 y,
127 "Yields all rows from the first dataset that exist in any of \
128 the other datasets."
129 );
130 }
131 Operation::Union => {
132 describe!(y, Relation, "Set union");
133 summary!(
134 y,
135 "Yields all rows that exist in any dataset, removing duplicates."
136 );
137 }
138 Operation::Merge => {
139 describe!(y, Relation, "Merge");
140 summary!(y, "Yields all rows from all incoming datasets.");
141 }
142 };
143
144 handle_rel_common!(x, y);
146
147 handle_advanced_extension!(x, y);
149
150 Ok(())
151}