vortex_array/aggregate_fn/fns/mean/
mod.rs1use vortex_error::VortexResult;
5use vortex_error::vortex_bail;
6
7use crate::ArrayRef;
8use crate::ExecutionCtx;
9use crate::aggregate_fn::Accumulator;
10use crate::aggregate_fn::AggregateFnId;
11use crate::aggregate_fn::AggregateFnVTable;
12use crate::aggregate_fn::DynAccumulator;
13use crate::aggregate_fn::EmptyOptions;
14use crate::aggregate_fn::combined::BinaryCombined;
15use crate::aggregate_fn::combined::Combined;
16use crate::aggregate_fn::combined::CombinedOptions;
17use crate::aggregate_fn::combined::PairOptions;
18use crate::aggregate_fn::fns::count::Count;
19use crate::aggregate_fn::fns::sum::Sum;
20use crate::builtins::ArrayBuiltins;
21use crate::dtype::DType;
22use crate::dtype::Nullability;
23use crate::dtype::PType;
24use crate::scalar::Scalar;
25use crate::scalar_fn::fns::operators::Operator;
26
27pub fn mean(array: &ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Scalar> {
31 let mut acc = Accumulator::try_new(
32 Mean::combined(),
33 PairOptions(EmptyOptions, EmptyOptions),
34 array.dtype().clone(),
35 )?;
36 acc.accumulate(array, ctx)?;
37 acc.finish()
38}
39
40#[derive(Clone, Debug)]
49pub struct Mean;
50
51impl Mean {
52 pub fn combined() -> Combined<Self> {
53 Combined(Mean)
54 }
55}
56
57impl BinaryCombined for Mean {
58 type Left = Sum;
59 type Right = Count;
60
61 fn id(&self) -> AggregateFnId {
62 AggregateFnId::new("vortex.mean")
63 }
64
65 fn left(&self) -> Sum {
66 Sum
67 }
68
69 fn right(&self) -> Count {
70 Count
71 }
72
73 fn left_name(&self) -> &'static str {
74 "sum"
75 }
76
77 fn right_name(&self) -> &'static str {
78 "count"
79 }
80
81 fn return_dtype(&self, input_dtype: &DType) -> Option<DType> {
82 Some(mean_output_dtype(input_dtype)?.with_nullability(Nullability::Nullable))
83 }
84
85 fn finalize(&self, sum: ArrayRef, count: ArrayRef) -> VortexResult<ArrayRef> {
86 let target = match sum.dtype() {
87 DType::Decimal(..) => sum.dtype().with_nullability(Nullability::Nullable),
88 _ => DType::Primitive(PType::F64, Nullability::Nullable),
89 };
90 let sum_cast = sum.cast(target.clone())?;
91 let count_cast = count.cast(target)?;
92 sum_cast.binary(count_cast, Operator::Div)
93 }
94
95 fn finalize_scalar(&self, left_scalar: Scalar, right_scalar: Scalar) -> VortexResult<Scalar> {
96 if let DType::Decimal(..) = left_scalar.dtype() {
97 vortex_bail!("mean::finalize_scalar not yet implemented for decimal inputs");
98 }
99
100 let target = DType::Primitive(PType::F64, Nullability::Nullable);
101 let sum_cast = left_scalar.cast(&target)?;
102 let count_cast = right_scalar.cast(&target)?;
103
104 let sum = sum_cast.as_primitive().typed_value::<f64>();
105 let count = count_cast.as_primitive().typed_value::<f64>();
106 let value = match (sum, count) {
107 (None, _) | (_, None) | (_, Some(0.0)) => return Ok(Scalar::null(target)), (Some(s), Some(c)) => s / c,
109 };
110 Ok(Scalar::primitive(value, Nullability::Nullable))
111 }
112
113 fn serialize(&self, _options: &CombinedOptions<Self>) -> VortexResult<Option<Vec<u8>>> {
114 unimplemented!("mean is not yet serializable");
115 }
116
117 fn coerce_args(
118 &self,
119 _options: &PairOptions<
120 <Sum as AggregateFnVTable>::Options,
121 <Count as AggregateFnVTable>::Options,
122 >,
123 input_dtype: &DType,
124 ) -> VortexResult<DType> {
125 Ok(coerced_input_dtype(input_dtype).unwrap_or_else(|| input_dtype.clone()))
128 }
129}
130
131fn coerced_input_dtype(input_dtype: &DType) -> Option<DType> {
137 match input_dtype {
138 DType::Bool(_) => Some(input_dtype.clone()),
139 DType::Primitive(_, n) => Some(DType::Primitive(PType::F64, *n)),
140 DType::Decimal(..) => {
141 unimplemented!("mean is not implemented for decimals yet")
142 }
143 _ => None,
144 }
145}
146
147fn mean_output_dtype(input_dtype: &DType) -> Option<DType> {
148 match input_dtype {
149 DType::Bool(_) | DType::Primitive(..) => {
150 Some(DType::Primitive(PType::F64, Nullability::Nullable))
151 }
152 DType::Decimal(..) => {
153 unimplemented!("mean for decimals is not yet implemented");
154 }
155 _ => None,
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use vortex_buffer::buffer;
162 use vortex_error::VortexResult;
163
164 use super::*;
165 use crate::IntoArray;
166 use crate::LEGACY_SESSION;
167 use crate::VortexSessionExecute;
168 use crate::arrays::BoolArray;
169 use crate::arrays::ChunkedArray;
170 use crate::arrays::ConstantArray;
171 use crate::arrays::PrimitiveArray;
172 use crate::validity::Validity;
173
174 #[test]
175 fn mean_all_valid() -> VortexResult<()> {
176 let array = PrimitiveArray::new(buffer![1.0f64, 2.0, 3.0, 4.0, 5.0], Validity::NonNullable)
177 .into_array();
178 let mut ctx = LEGACY_SESSION.create_execution_ctx();
179 let result = mean(&array, &mut ctx)?;
180 assert_eq!(result.as_primitive().as_::<f64>(), Some(3.0));
181 Ok(())
182 }
183
184 #[test]
185 fn mean_with_nulls() -> VortexResult<()> {
186 let array = PrimitiveArray::from_option_iter([Some(2.0f64), None, Some(4.0)]).into_array();
187 let mut ctx = LEGACY_SESSION.create_execution_ctx();
188 let result = mean(&array, &mut ctx)?;
189 assert_eq!(result.as_primitive().as_::<f64>(), Some(3.0));
190 Ok(())
191 }
192
193 #[test]
194 fn mean_integers() -> VortexResult<()> {
195 let array = PrimitiveArray::new(buffer![10i32, 20, 30], Validity::NonNullable).into_array();
196 let mut ctx = LEGACY_SESSION.create_execution_ctx();
197 let result = mean(&array, &mut ctx)?;
198 assert_eq!(result.as_primitive().as_::<f64>(), Some(20.0));
199 Ok(())
200 }
201
202 #[test]
203 fn mean_bool() -> VortexResult<()> {
204 let array: BoolArray = [true, false, true, true].into_iter().collect();
205 let mut ctx = LEGACY_SESSION.create_execution_ctx();
206 let result = mean(&array.into_array(), &mut ctx)?;
207 assert_eq!(result.as_primitive().as_::<f64>(), Some(0.75));
208 Ok(())
209 }
210
211 #[test]
212 fn mean_constant_non_null() -> VortexResult<()> {
213 let array = ConstantArray::new(5.0f64, 4);
214 let mut ctx = LEGACY_SESSION.create_execution_ctx();
215 let result = mean(&array.into_array(), &mut ctx)?;
216 assert_eq!(result.as_primitive().as_::<f64>(), Some(5.0));
217 Ok(())
218 }
219
220 #[test]
221 fn mean_chunked() -> VortexResult<()> {
222 let chunk1 = PrimitiveArray::from_option_iter([Some(1.0f64), None, Some(3.0)]);
223 let chunk2 = PrimitiveArray::from_option_iter([Some(5.0f64), None]);
224 let dtype = chunk1.dtype().clone();
225 let chunked = ChunkedArray::try_new(vec![chunk1.into_array(), chunk2.into_array()], dtype)?;
226 let mut ctx = LEGACY_SESSION.create_execution_ctx();
227 let result = mean(&chunked.into_array(), &mut ctx)?;
228 assert_eq!(result.as_primitive().as_::<f64>(), Some(3.0));
229 Ok(())
230 }
231
232 #[test]
233 fn mean_all_null_returns_null() -> VortexResult<()> {
234 let array = PrimitiveArray::from_option_iter::<f64, _>([None, None, None]).into_array();
235 let mut ctx = LEGACY_SESSION.create_execution_ctx();
236 let result = mean(&array, &mut ctx)?;
237 assert_eq!(result.as_primitive().as_::<f64>(), None);
238 Ok(())
239 }
240
241 #[test]
242 fn mean_multi_batch() -> VortexResult<()> {
243 let mut ctx = LEGACY_SESSION.create_execution_ctx();
244 let dtype = DType::Primitive(PType::F64, Nullability::NonNullable);
245 let mut acc = Accumulator::try_new(
246 Mean::combined(),
247 PairOptions(EmptyOptions, EmptyOptions),
248 dtype,
249 )?;
250
251 let batch1 =
252 PrimitiveArray::new(buffer![1.0f64, 2.0, 3.0], Validity::NonNullable).into_array();
253 acc.accumulate(&batch1, &mut ctx)?;
254
255 let batch2 = PrimitiveArray::new(buffer![4.0f64, 5.0], Validity::NonNullable).into_array();
256 acc.accumulate(&batch2, &mut ctx)?;
257
258 let result = acc.finish()?;
259 assert_eq!(result.as_primitive().as_::<f64>(), Some(3.0));
260 Ok(())
261 }
262}