vortex_array/arrays/primitive/
top_value.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::hash::Hash;
5
6use rustc_hash::FxBuildHasher;
7use vortex_dtype::{NativePType, match_each_native_ptype};
8use vortex_error::{VortexExpect, VortexResult};
9use vortex_mask::{AllOr, Mask};
10use vortex_scalar::PValue;
11use vortex_utils::aliases::hash_map::HashMap;
12
13use crate::arrays::{NativeValue, PrimitiveArray};
14
15impl PrimitiveArray {
16    /// Compute most common present value of this array
17    pub fn top_value(&self) -> VortexResult<Option<(PValue, usize)>> {
18        if self.is_empty() {
19            return Ok(None);
20        }
21
22        if self.all_invalid()? {
23            return Ok(None);
24        }
25
26        match_each_native_ptype!(self.ptype(), |P| {
27            let (top, count) = typed_top_value(self.as_slice::<P>(), self.validity_mask()?);
28            Ok(Some((top.into(), count)))
29        })
30    }
31}
32
33fn typed_top_value<T>(values: &[T], mask: Mask) -> (T, usize)
34where
35    T: NativePType,
36    NativeValue<T>: Eq + Hash,
37{
38    let mut distinct_values: HashMap<NativeValue<T>, usize, FxBuildHasher> =
39        HashMap::with_hasher(FxBuildHasher);
40    match mask.indices() {
41        AllOr::All => {
42            for value in values.iter().copied() {
43                *distinct_values.entry(NativeValue(value)).or_insert(0) += 1;
44            }
45        }
46        AllOr::None => unreachable!("All invalid arrays should be handled earlier"),
47        AllOr::Some(idxs) => {
48            for &i in idxs {
49                *distinct_values
50                    .entry(NativeValue(unsafe { *values.get_unchecked(i) }))
51                    .or_insert(0) += 1
52            }
53        }
54    }
55
56    let (&top_value, &top_count) = distinct_values
57        .iter()
58        .max_by_key(|&(_, &count)| count)
59        .vortex_expect("non-empty");
60    (top_value.0, top_count)
61}