Skip to main content

vortex_array/arrays/primitive/array/
top_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5
6use rustc_hash::FxBuildHasher;
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_mask::AllOr;
10use vortex_mask::Mask;
11use vortex_utils::aliases::hash_map::HashMap;
12
13use crate::LEGACY_SESSION;
14use crate::VortexSessionExecute;
15use crate::arrays::PrimitiveArray;
16use crate::arrays::primitive::NativeValue;
17use crate::dtype::NativePType;
18use crate::match_each_native_ptype;
19use crate::scalar::PValue;
20use crate::validity::Validity;
21
22impl PrimitiveArray {
23    /// Compute most common present value of this array
24    pub fn top_value(&self) -> VortexResult<Option<(PValue, usize)>> {
25        if self.is_empty() {
26            return Ok(None);
27        }
28
29        if matches!(self.validity()?, Validity::AllInvalid) {
30            return Ok(None);
31        }
32
33        match_each_native_ptype!(self.ptype(), |P| {
34            let (top, count) = typed_top_value(
35                self.as_slice::<P>(),
36                self.as_ref().validity()?.to_mask(
37                    self.as_ref().len(),
38                    &mut LEGACY_SESSION.create_execution_ctx(),
39                )?,
40            );
41            Ok(Some((top.into(), count)))
42        })
43    }
44}
45
46fn typed_top_value<T>(values: &[T], mask: Mask) -> (T, usize)
47where
48    T: NativePType,
49    NativeValue<T>: Eq + Hash,
50{
51    let mut distinct_values: HashMap<NativeValue<T>, usize, FxBuildHasher> =
52        HashMap::with_hasher(FxBuildHasher);
53    match mask.indices() {
54        AllOr::All => {
55            for value in values.iter().copied() {
56                *distinct_values.entry(NativeValue(value)).or_insert(0) += 1;
57            }
58        }
59        AllOr::None => unreachable!("All invalid arrays should be handled earlier"),
60        AllOr::Some(idxs) => {
61            for &i in idxs {
62                *distinct_values
63                    .entry(NativeValue(unsafe { *values.get_unchecked(i) }))
64                    .or_insert(0) += 1
65            }
66        }
67    }
68
69    let (&top_value, &top_count) = distinct_values
70        .iter()
71        .max_by_key(|&(_, &count)| count)
72        .vortex_expect("non-empty");
73    (top_value.0, top_count)
74}