vortex_array/arrays/primitive/array/
top_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5
6use rustc_hash::FxBuildHasher;
7use vortex_dtype::NativePType;
8use vortex_dtype::match_each_native_ptype;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_mask::AllOr;
12use vortex_mask::Mask;
13use vortex_scalar::PValue;
14use vortex_utils::aliases::hash_map::HashMap;
15
16use crate::arrays::NativeValue;
17use crate::arrays::PrimitiveArray;
18
19impl PrimitiveArray {
20    /// Compute most common present value of this array
21    pub fn top_value(&self) -> VortexResult<Option<(PValue, usize)>> {
22        if self.is_empty() {
23            return Ok(None);
24        }
25
26        if self.all_invalid() {
27            return Ok(None);
28        }
29
30        match_each_native_ptype!(self.ptype(), |P| {
31            let (top, count) = typed_top_value(self.as_slice::<P>(), self.validity_mask());
32            Ok(Some((top.into(), count)))
33        })
34    }
35}
36
37fn typed_top_value<T>(values: &[T], mask: Mask) -> (T, usize)
38where
39    T: NativePType,
40    NativeValue<T>: Eq + Hash,
41{
42    let mut distinct_values: HashMap<NativeValue<T>, usize, FxBuildHasher> =
43        HashMap::with_hasher(FxBuildHasher);
44    match mask.indices() {
45        AllOr::All => {
46            for value in values.iter().copied() {
47                *distinct_values.entry(NativeValue(value)).or_insert(0) += 1;
48            }
49        }
50        AllOr::None => unreachable!("All invalid arrays should be handled earlier"),
51        AllOr::Some(idxs) => {
52            for &i in idxs {
53                *distinct_values
54                    .entry(NativeValue(unsafe { *values.get_unchecked(i) }))
55                    .or_insert(0) += 1
56            }
57        }
58    }
59
60    let (&top_value, &top_count) = distinct_values
61        .iter()
62        .max_by_key(|&(_, &count)| count)
63        .vortex_expect("non-empty");
64    (top_value.0, top_count)
65}