datafusion_physical_expr/expressions/
not.rs1use std::any::Any;
21use std::fmt;
22use std::hash::Hash;
23use std::sync::Arc;
24
25use crate::PhysicalExpr;
26
27use arrow::datatypes::{DataType, FieldRef, Schema};
28use arrow::record_batch::RecordBatch;
29use datafusion_common::{Result, ScalarValue, cast::as_boolean_array, internal_err};
30use datafusion_expr::ColumnarValue;
31use datafusion_expr::interval_arithmetic::Interval;
32use datafusion_expr::statistics::Distribution::{self, Bernoulli};
33
34#[derive(Debug, Eq)]
36pub struct NotExpr {
37 arg: Arc<dyn PhysicalExpr>,
39}
40
41impl PartialEq for NotExpr {
43 fn eq(&self, other: &Self) -> bool {
44 self.arg.eq(&other.arg)
45 }
46}
47
48impl Hash for NotExpr {
49 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
50 self.arg.hash(state);
51 }
52}
53
54impl NotExpr {
55 pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
57 Self { arg }
58 }
59
60 pub fn arg(&self) -> &Arc<dyn PhysicalExpr> {
62 &self.arg
63 }
64}
65
66impl fmt::Display for NotExpr {
67 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
68 write!(f, "NOT {}", self.arg)
69 }
70}
71
72impl PhysicalExpr for NotExpr {
73 fn as_any(&self) -> &dyn Any {
75 self
76 }
77
78 fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
79 Ok(DataType::Boolean)
80 }
81
82 fn nullable(&self, input_schema: &Schema) -> Result<bool> {
83 self.arg.nullable(input_schema)
84 }
85
86 fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
87 match self.arg.evaluate(batch)? {
88 ColumnarValue::Array(array) => {
89 let array = as_boolean_array(&array)?;
90 Ok(ColumnarValue::Array(Arc::new(
91 arrow::compute::kernels::boolean::not(array)?,
92 )))
93 }
94 ColumnarValue::Scalar(scalar) => {
95 if scalar.is_null() {
96 return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
97 }
98 let bool_value: bool = scalar.try_into()?;
99 Ok(ColumnarValue::Scalar(ScalarValue::from(!bool_value)))
100 }
101 }
102 }
103
104 fn return_field(&self, input_schema: &Schema) -> Result<FieldRef> {
105 self.arg.return_field(input_schema)
106 }
107
108 fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
109 vec![&self.arg]
110 }
111
112 fn with_new_children(
113 self: Arc<Self>,
114 children: Vec<Arc<dyn PhysicalExpr>>,
115 ) -> Result<Arc<dyn PhysicalExpr>> {
116 Ok(Arc::new(NotExpr::new(Arc::clone(&children[0]))))
117 }
118
119 fn evaluate_bounds(&self, children: &[&Interval]) -> Result<Interval> {
120 children[0].not()
121 }
122
123 fn propagate_constraints(
124 &self,
125 interval: &Interval,
126 children: &[&Interval],
127 ) -> Result<Option<Vec<Interval>>> {
128 let complemented_interval = interval.not()?;
129
130 Ok(children[0]
131 .intersect(complemented_interval)?
132 .map(|result| vec![result]))
133 }
134
135 fn evaluate_statistics(&self, children: &[&Distribution]) -> Result<Distribution> {
136 match children[0] {
137 Bernoulli(b) => {
138 let p_value = b.p_value();
139 if p_value.is_null() {
140 Ok(children[0].clone())
141 } else {
142 let one = ScalarValue::new_one(&p_value.data_type())?;
143 Distribution::new_bernoulli(one.sub_checked(p_value)?)
144 }
145 }
146 _ => internal_err!("NotExpr can only operate on Boolean datatypes"),
147 }
148 }
149
150 fn propagate_statistics(
151 &self,
152 parent: &Distribution,
153 children: &[&Distribution],
154 ) -> Result<Option<Vec<Distribution>>> {
155 match (parent, children[0]) {
156 (Bernoulli(parent), Bernoulli(child)) => {
157 let parent_range = parent.range();
158 let result = if parent_range == Interval::TRUE {
159 if child.range() == Interval::TRUE {
160 None
161 } else {
162 Some(vec![Distribution::new_bernoulli(ScalarValue::new_zero(
163 &child.data_type(),
164 )?)?])
165 }
166 } else if parent_range == Interval::FALSE {
167 if child.range() == Interval::FALSE {
168 None
169 } else {
170 Some(vec![Distribution::new_bernoulli(ScalarValue::new_one(
171 &child.data_type(),
172 )?)?])
173 }
174 } else {
175 Some(vec![])
176 };
177 Ok(result)
178 }
179 _ => internal_err!("NotExpr can only operate on Boolean datatypes"),
180 }
181 }
182
183 fn fmt_sql(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184 write!(f, "NOT ")?;
185 self.arg.fmt_sql(f)
186 }
187}
188
189pub fn not(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
191 Ok(Arc::new(NotExpr::new(arg)))
192}
193
194#[cfg(test)]
195mod tests {
196 use std::sync::LazyLock;
197
198 use super::*;
199 use crate::expressions::{Column, col};
200
201 use arrow::{array::BooleanArray, datatypes::*};
202 use datafusion_physical_expr_common::physical_expr::fmt_sql;
203
204 #[test]
205 fn neg_op() -> Result<()> {
206 let schema = schema();
207
208 let expr = not(col("a", &schema)?)?;
209 assert_eq!(expr.data_type(&schema)?, DataType::Boolean);
210 assert!(expr.nullable(&schema)?);
211
212 let input = BooleanArray::from(vec![Some(true), None, Some(false)]);
213 let expected = &BooleanArray::from(vec![Some(false), None, Some(true)]);
214
215 let batch = RecordBatch::try_new(schema, vec![Arc::new(input)])?;
216
217 let result = expr
218 .evaluate(&batch)?
219 .into_array(batch.num_rows())
220 .expect("Failed to convert to array");
221 let result =
222 as_boolean_array(&result).expect("failed to downcast to BooleanArray");
223 assert_eq!(result, expected);
224
225 Ok(())
226 }
227
228 #[test]
229 fn test_evaluate_bounds() -> Result<()> {
230 assert_evaluate_bounds(
236 Interval::make(Some(false), Some(true))?,
237 Interval::make(Some(false), Some(true))?,
238 )?;
239 assert_evaluate_bounds(
242 Interval::make(Some(true), Some(true))?,
243 Interval::make(Some(false), Some(false))?,
244 )?;
245 assert_evaluate_bounds(
246 Interval::make(Some(false), Some(false))?,
247 Interval::make(Some(true), Some(true))?,
248 )?;
249 Ok(())
250 }
251
252 fn assert_evaluate_bounds(
253 interval: Interval,
254 expected_interval: Interval,
255 ) -> Result<()> {
256 let not_expr = not(col("a", &schema())?)?;
257 assert_eq!(not_expr.evaluate_bounds(&[&interval])?, expected_interval);
258 Ok(())
259 }
260
261 #[test]
262 fn test_evaluate_statistics() -> Result<()> {
263 let _schema = &Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
264 let a = Arc::new(Column::new("a", 0)) as _;
265 let expr = not(a)?;
266
267 assert!(
269 expr.evaluate_statistics(&[&Distribution::new_uniform(
270 Interval::make_unbounded(&DataType::Float64)?
271 )?])
272 .is_err()
273 );
274
275 assert!(
277 expr.evaluate_statistics(&[&Distribution::new_exponential(
278 ScalarValue::from(1.0),
279 ScalarValue::from(1.0),
280 true
281 )?])
282 .is_err()
283 );
284
285 assert!(
287 expr.evaluate_statistics(&[&Distribution::new_gaussian(
288 ScalarValue::from(1.0),
289 ScalarValue::from(1.0),
290 )?])
291 .is_err()
292 );
293
294 assert_eq!(
296 expr.evaluate_statistics(&[&Distribution::new_bernoulli(
297 ScalarValue::from(0.0),
298 )?])?,
299 Distribution::new_bernoulli(ScalarValue::from(1.))?
300 );
301
302 assert_eq!(
303 expr.evaluate_statistics(&[&Distribution::new_bernoulli(
304 ScalarValue::from(1.0),
305 )?])?,
306 Distribution::new_bernoulli(ScalarValue::from(0.))?
307 );
308
309 assert_eq!(
310 expr.evaluate_statistics(&[&Distribution::new_bernoulli(
311 ScalarValue::from(0.25),
312 )?])?,
313 Distribution::new_bernoulli(ScalarValue::from(0.75))?
314 );
315
316 assert!(
317 expr.evaluate_statistics(&[&Distribution::new_generic(
318 ScalarValue::Null,
319 ScalarValue::Null,
320 ScalarValue::Null,
321 Interval::make_unbounded(&DataType::UInt8)?
322 )?])
323 .is_err()
324 );
325
326 assert!(
328 expr.evaluate_statistics(&[&Distribution::new_generic(
329 ScalarValue::Null,
330 ScalarValue::Null,
331 ScalarValue::Null,
332 Interval::make_unbounded(&DataType::Float64)?
333 )?])
334 .is_err()
335 );
336
337 Ok(())
338 }
339
340 #[test]
341 fn test_fmt_sql() -> Result<()> {
342 let schema = schema();
343
344 let expr = not(col("a", &schema)?)?;
345
346 let display_string = expr.to_string();
347 assert_eq!(display_string, "NOT a@0");
348
349 let sql_string = fmt_sql(expr.as_ref()).to_string();
350 assert_eq!(sql_string, "NOT a");
351
352 Ok(())
353 }
354
355 fn schema() -> SchemaRef {
356 static SCHEMA: LazyLock<SchemaRef> = LazyLock::new(|| {
357 Arc::new(Schema::new(vec![Field::new("a", DataType::Boolean, true)]))
358 });
359 Arc::clone(&SCHEMA)
360 }
361}