1use std::fmt::{Debug, Display, Formatter};
5use std::hash::Hash;
6
7use vortex_array::compute::list_contains as compute_list_contains;
8use vortex_array::{ArrayRef, DeserializeMetadata, EmptyMetadata};
9use vortex_dtype::DType;
10use vortex_error::{VortexResult, vortex_bail};
11
12use crate::{
13 AnalysisExpr, ExprEncodingRef, ExprId, ExprRef, IntoExpr, LiteralVTable, Scope, StatsCatalog,
14 VTable, and, gt, lit, lt, or, vtable,
15};
16
17vtable!(ListContains);
18
19#[allow(clippy::derived_hash_with_manual_eq)]
20#[derive(Debug, Clone, Hash, Eq)]
21pub struct ListContainsExpr {
22 list: ExprRef,
23 value: ExprRef,
24}
25
26impl PartialEq for ListContainsExpr {
27 fn eq(&self, other: &Self) -> bool {
28 self.list.eq(&other.list) && self.value.eq(&other.value)
29 }
30}
31
32pub struct ListContainsExprEncoding;
33
34impl VTable for ListContainsVTable {
35 type Expr = ListContainsExpr;
36 type Encoding = ListContainsExprEncoding;
37 type Metadata = EmptyMetadata;
38
39 fn id(_encoding: &Self::Encoding) -> ExprId {
40 ExprId::new_ref("list_contains")
41 }
42
43 fn encoding(_expr: &Self::Expr) -> ExprEncodingRef {
44 ExprEncodingRef::new_ref(ListContainsExprEncoding.as_ref())
45 }
46
47 fn metadata(_expr: &Self::Expr) -> Option<Self::Metadata> {
48 Some(EmptyMetadata)
49 }
50
51 fn children(expr: &Self::Expr) -> Vec<&ExprRef> {
52 vec![&expr.list, &expr.value]
53 }
54
55 fn with_children(_expr: &Self::Expr, children: Vec<ExprRef>) -> VortexResult<Self::Expr> {
56 Ok(ListContainsExpr::new(
57 children[0].clone(),
58 children[1].clone(),
59 ))
60 }
61
62 fn build(
63 _encoding: &Self::Encoding,
64 _metadata: &<Self::Metadata as DeserializeMetadata>::Output,
65 children: Vec<ExprRef>,
66 ) -> VortexResult<Self::Expr> {
67 if children.len() != 2 {
68 vortex_bail!(
69 "ListContains expression must have exactly 2 children, got {}",
70 children.len()
71 );
72 }
73 Ok(ListContainsExpr::new(
74 children[0].clone(),
75 children[1].clone(),
76 ))
77 }
78
79 fn evaluate(expr: &Self::Expr, scope: &Scope) -> VortexResult<ArrayRef> {
80 compute_list_contains(
81 expr.list.evaluate(scope)?.as_ref(),
82 expr.value.evaluate(scope)?.as_ref(),
83 )
84 }
85
86 fn return_dtype(expr: &Self::Expr, scope: &DType) -> VortexResult<DType> {
87 Ok(DType::Bool(
88 expr.list.return_dtype(scope)?.nullability()
89 | expr.value.return_dtype(scope)?.nullability(),
90 ))
91 }
92}
93
94impl ListContainsExpr {
95 pub fn new(list: ExprRef, value: ExprRef) -> Self {
96 Self { list, value }
97 }
98
99 pub fn new_expr(list: ExprRef, value: ExprRef) -> ExprRef {
100 Self::new(list, value).into_expr()
101 }
102
103 pub fn value(&self) -> &ExprRef {
104 &self.value
105 }
106}
107
108pub fn list_contains(list: ExprRef, value: ExprRef) -> ExprRef {
109 ListContainsExpr::new(list, value).into_expr()
110}
111
112impl Display for ListContainsExpr {
113 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
114 write!(f, "contains({}, {})", &self.list, &self.value)
115 }
116}
117
118impl AnalysisExpr for ListContainsExpr {
119 fn stat_falsification(&self, catalog: &mut dyn StatsCatalog) -> Option<ExprRef> {
123 let min = self.list.min(catalog)?;
124 let max = self.list.max(catalog)?;
125 if min == max {
127 let list_ = min
128 .as_opt::<LiteralVTable>()
129 .and_then(|l| l.value().as_list_opt())
130 .and_then(|l| l.elements())?;
131 if list_.is_empty() {
132 return Some(lit(true));
134 }
135 let value_max = self.value.max(catalog)?;
136 let value_min = self.value.min(catalog)?;
137
138 return list_
139 .iter()
140 .map(move |v| {
141 or(
142 lt(value_max.clone(), lit(v.clone())),
143 gt(value_min.clone(), lit(v.clone())),
144 )
145 })
146 .reduce(and);
147 }
148
149 None
150 }
151}
152
153#[cfg(test)]
154mod tests {
155 use vortex_array::arrays::{BoolArray, BooleanBuffer, ListArray, PrimitiveArray};
156 use vortex_array::stats::Stat;
157 use vortex_array::validity::Validity;
158 use vortex_array::{Array, ArrayRef, IntoArray};
159 use vortex_dtype::PType::I32;
160 use vortex_dtype::{DType, Field, FieldPath, FieldPathSet, Nullability, StructFields};
161 use vortex_scalar::Scalar;
162 use vortex_utils::aliases::hash_map::HashMap;
163
164 use crate::list_contains::list_contains;
165 use crate::pruning::checked_pruning_expr;
166 use crate::{Arc, HashSet, Scope, and, col, get_item, gt, lit, lt, or, root};
167
168 fn test_array() -> ArrayRef {
169 ListArray::try_new(
170 PrimitiveArray::from_iter(vec![1, 1, 2, 2, 2, 2, 2, 3, 3, 3]).into_array(),
171 PrimitiveArray::from_iter(vec![0, 5, 10]).into_array(),
172 Validity::AllValid,
173 )
174 .unwrap()
175 .into_array()
176 }
177
178 #[test]
179 pub fn test_one() {
180 let arr = test_array();
181
182 let expr = list_contains(root(), lit(1));
183 let item = expr.evaluate(&Scope::new(arr)).unwrap();
184
185 assert_eq!(
186 item.scalar_at(0).unwrap(),
187 Scalar::bool(true, Nullability::Nullable)
188 );
189 assert_eq!(
190 item.scalar_at(1).unwrap(),
191 Scalar::bool(false, Nullability::Nullable)
192 );
193 }
194
195 #[test]
196 pub fn test_all() {
197 let arr = test_array();
198
199 let expr = list_contains(root(), lit(2));
200 let item = expr.evaluate(&Scope::new(arr)).unwrap();
201
202 assert_eq!(
203 item.scalar_at(0).unwrap(),
204 Scalar::bool(true, Nullability::Nullable)
205 );
206 assert_eq!(
207 item.scalar_at(1).unwrap(),
208 Scalar::bool(true, Nullability::Nullable)
209 );
210 }
211
212 #[test]
213 pub fn test_none() {
214 let arr = test_array();
215
216 let expr = list_contains(root(), lit(4));
217 let item = expr.evaluate(&Scope::new(arr)).unwrap();
218
219 assert_eq!(
220 item.scalar_at(0).unwrap(),
221 Scalar::bool(false, Nullability::Nullable)
222 );
223 assert_eq!(
224 item.scalar_at(1).unwrap(),
225 Scalar::bool(false, Nullability::Nullable)
226 );
227 }
228
229 #[test]
230 pub fn test_empty() {
231 let arr = ListArray::try_new(
232 PrimitiveArray::from_iter(vec![1, 1, 2, 2, 2]).into_array(),
233 PrimitiveArray::from_iter(vec![0, 5, 5]).into_array(),
234 Validity::AllValid,
235 )
236 .unwrap()
237 .into_array();
238
239 let expr = list_contains(root(), lit(2));
240 let item = expr.evaluate(&Scope::new(arr)).unwrap();
241
242 assert_eq!(
243 item.scalar_at(0).unwrap(),
244 Scalar::bool(true, Nullability::Nullable)
245 );
246 assert_eq!(
247 item.scalar_at(1).unwrap(),
248 Scalar::bool(false, Nullability::Nullable)
249 );
250 }
251
252 #[test]
253 pub fn test_nullable() {
254 let arr = ListArray::try_new(
255 PrimitiveArray::from_iter(vec![1, 1, 2, 2, 2]).into_array(),
256 PrimitiveArray::from_iter(vec![0, 5, 5]).into_array(),
257 Validity::Array(BoolArray::from(BooleanBuffer::from(vec![true, false])).into_array()),
258 )
259 .unwrap()
260 .into_array();
261
262 let expr = list_contains(root(), lit(2));
263 let item = expr.evaluate(&Scope::new(arr)).unwrap();
264
265 assert_eq!(
266 item.scalar_at(0).unwrap(),
267 Scalar::bool(true, Nullability::Nullable)
268 );
269 assert!(!item.is_valid(1).unwrap());
270 }
271
272 #[test]
273 pub fn test_return_type() {
274 let scope = DType::Struct(
275 StructFields::new(
276 ["array"].into(),
277 vec![DType::List(
278 Arc::new(DType::Primitive(I32, Nullability::NonNullable)),
279 Nullability::Nullable,
280 )],
281 ),
282 Nullability::NonNullable,
283 );
284
285 let expr = list_contains(get_item("array", root()), lit(2));
286
287 assert_eq!(
289 expr.return_dtype(&scope).unwrap(),
290 DType::Bool(Nullability::Nullable)
291 );
292 }
293
294 #[test]
295 pub fn list_falsification() {
296 let expr = list_contains(
297 lit(Scalar::list(
298 Arc::new(DType::Primitive(I32, Nullability::NonNullable)),
299 vec![1.into(), 2.into(), 3.into()],
300 Nullability::NonNullable,
301 )),
302 col("a"),
303 );
304
305 let (expr, st) = checked_pruning_expr(
306 &expr,
307 &FieldPathSet::from_iter([
308 FieldPath::from_iter([Field::Name("a".into()), Field::Name("max".into())]),
309 FieldPath::from_iter([Field::Name("a".into()), Field::Name("min".into())]),
310 ]),
311 )
312 .unwrap();
313
314 assert_eq!(
315 &expr,
316 &and(
317 and(
318 or(lt(col("a_max"), lit(1i32)), gt(col("a_min"), lit(1i32)),),
319 or(lt(col("a_max"), lit(2i32)), gt(col("a_min"), lit(2i32)),)
320 ),
321 or(lt(col("a_max"), lit(3i32)), gt(col("a_min"), lit(3i32)),)
322 )
323 );
324
325 assert_eq!(
326 st.map(),
327 &HashMap::from_iter([(
328 FieldPath::from_name("a"),
329 HashSet::from([Stat::Min, Stat::Max])
330 )])
331 );
332 }
333}