lance_encoding/
statistics.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::{
5    fmt::{self},
6    hash::{Hash, RandomState},
7    sync::Arc,
8};
9
10use arrow::{array::AsArray, datatypes::UInt64Type};
11use arrow_array::{Array, ArrowPrimitiveType, UInt64Array};
12use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
13use num_traits::PrimInt;
14
15use crate::data::{
16    AllNullDataBlock, DataBlock, DictionaryDataBlock, FixedSizeListBlock, FixedWidthDataBlock,
17    NullableDataBlock, OpaqueBlock, StructDataBlock, VariableWidthBlock,
18};
19
20#[derive(Clone, Copy, PartialEq, Eq, Hash)]
21pub enum Stat {
22    BitWidth,
23    DataSize,
24    Cardinality,
25    FixedSize,
26    NullCount,
27    MaxLength,
28    RunCount,
29}
30
31impl fmt::Debug for Stat {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        match self {
34            Self::BitWidth => write!(f, "BitWidth"),
35            Self::DataSize => write!(f, "DataSize"),
36            Self::Cardinality => write!(f, "Cardinality"),
37            Self::FixedSize => write!(f, "FixedSize"),
38            Self::NullCount => write!(f, "NullCount"),
39            Self::MaxLength => write!(f, "MaxLength"),
40            Self::RunCount => write!(f, "RunCount"),
41        }
42    }
43}
44
45impl fmt::Display for Stat {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        write!(f, "{:?}", self)
48    }
49}
50
51pub trait ComputeStat {
52    fn compute_stat(&mut self);
53}
54
55impl ComputeStat for DataBlock {
56    fn compute_stat(&mut self) {
57        match self {
58            Self::Empty() => {}
59            Self::Constant(_) => {}
60            Self::AllNull(_) => {}
61            Self::Nullable(data_block) => data_block.data.compute_stat(),
62            Self::FixedWidth(data_block) => data_block.compute_stat(),
63            Self::FixedSizeList(data_block) => data_block.compute_stat(),
64            Self::VariableWidth(data_block) => data_block.compute_stat(),
65            Self::Opaque(data_block) => data_block.compute_stat(),
66            Self::Struct(data_block) => data_block.compute_stat(),
67            Self::Dictionary(_) => {}
68        }
69    }
70}
71
72impl ComputeStat for VariableWidthBlock {
73    fn compute_stat(&mut self) {
74        if !self.block_info.0.read().unwrap().is_empty() {
75            panic!("compute_stat should only be called once during DataBlock construction");
76        }
77        let data_size = self.data_size();
78        let data_size_array = Arc::new(UInt64Array::from(vec![data_size]));
79
80        let cardinality_array = self.cardinality();
81
82        let max_length_array = self.max_length();
83
84        let mut info = self.block_info.0.write().unwrap();
85        info.insert(Stat::DataSize, data_size_array);
86        info.insert(Stat::Cardinality, cardinality_array);
87        info.insert(Stat::MaxLength, max_length_array);
88    }
89}
90
91impl ComputeStat for FixedWidthDataBlock {
92    fn compute_stat(&mut self) {
93        // compute this datablock's data_size
94        let data_size = self.data_size();
95        let data_size_array = Arc::new(UInt64Array::from(vec![data_size]));
96
97        // compute this datablock's max_bit_width
98        let max_bit_widths = self.max_bit_widths();
99
100        // the MaxLength of FixedWidthDataBlock is it's self.bits_per_value / 8
101        let max_len = self.bits_per_value / 8;
102        let max_len_array = Arc::new(UInt64Array::from(vec![max_len]));
103
104        let cardidinality_array = if self.bits_per_value == 128 {
105            Some(self.cardinality())
106        } else {
107            None
108        };
109
110        // compute run count
111        let run_count_array = self.run_count();
112
113        let mut info = self.block_info.0.write().unwrap();
114        info.insert(Stat::DataSize, data_size_array);
115        info.insert(Stat::BitWidth, max_bit_widths);
116        info.insert(Stat::MaxLength, max_len_array);
117        info.insert(Stat::RunCount, run_count_array);
118        if let Some(cardinality_array) = cardidinality_array {
119            info.insert(Stat::Cardinality, cardinality_array);
120        }
121    }
122}
123
124impl ComputeStat for FixedSizeListBlock {
125    fn compute_stat(&mut self) {
126        // We leave the child stats unchanged.  This may seem odd (e.g. should bit width be the
127        // bit width of the child * dimension?) but it's because we use these stats to determine
128        // compression and we are currently just compressing the child data.
129        //
130        // There is a potential opportunity here to do better.  For example, if we have a FSL of
131        // 4 32-bit integers then we should probably treat them as a single 128-bit integer or maybe
132        // even 4 columns of 32-bit integers.  This might yield better compression.
133        self.child.compute_stat();
134    }
135}
136
137impl ComputeStat for OpaqueBlock {
138    fn compute_stat(&mut self) {
139        // compute this datablock's data_size
140        let data_size = self.data_size();
141        let data_size_array = Arc::new(UInt64Array::from(vec![data_size]));
142        let mut info = self.block_info.0.write().unwrap();
143        info.insert(Stat::DataSize, data_size_array);
144    }
145}
146
147pub trait GetStat: fmt::Debug {
148    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>>;
149
150    fn expect_stat(&self, stat: Stat) -> Arc<dyn Array> {
151        self.get_stat(stat)
152            .unwrap_or_else(|| panic!("{:?} DataBlock does not have `{}` statistics.", self, stat))
153    }
154
155    fn expect_single_stat<T: ArrowPrimitiveType>(&self, stat: Stat) -> T::Native {
156        let stat_value = self.expect_stat(stat);
157        let stat_value = stat_value.as_primitive::<T>();
158        if stat_value.len() != 1 {
159            panic!(
160                "{:?} DataBlock does not have exactly one value for `{} statistics.",
161                self, stat
162            );
163        }
164        stat_value.value(0)
165    }
166}
167
168impl GetStat for DataBlock {
169    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
170        match self {
171            Self::Empty() => None,
172            Self::Constant(_) => None,
173            Self::AllNull(data_block) => data_block.get_stat(stat),
174            Self::Nullable(data_block) => data_block.get_stat(stat),
175            Self::FixedWidth(data_block) => data_block.get_stat(stat),
176            Self::FixedSizeList(data_block) => data_block.get_stat(stat),
177            Self::VariableWidth(data_block) => data_block.get_stat(stat),
178            Self::Opaque(data_block) => data_block.get_stat(stat),
179            Self::Struct(data_block) => data_block.get_stat(stat),
180            Self::Dictionary(data_block) => data_block.get_stat(stat),
181        }
182    }
183}
184
185// NullableDataBlock will be deprecated in Lance 2.1.
186impl GetStat for NullableDataBlock {
187    // This function simply returns the statistics of the inner `DataBlock` of `NullableDataBlock`,
188    // this is not accurate but `NullableDataBlock` is going to be deprecated in Lance 2.1 anyway.
189    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
190        self.data.get_stat(stat)
191    }
192}
193
194impl GetStat for VariableWidthBlock {
195    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
196        let block_info = self.block_info.0.read().unwrap();
197
198        if block_info.is_empty() {
199            panic!("get_stat should be called after statistics are computed.");
200        }
201        block_info.get(&stat).cloned()
202    }
203}
204
205impl GetStat for FixedSizeListBlock {
206    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
207        let child_stat = self.child.get_stat(stat);
208        match stat {
209            Stat::MaxLength => child_stat.map(|max_length| {
210                // this is conservative when working with variable length data as we shouldn't assume
211                // that we have a list of all max-length elements but it's cheap and easy to calculate
212                let max_length = max_length.as_primitive::<UInt64Type>().value(0);
213                Arc::new(UInt64Array::from(vec![max_length * self.dimension])) as Arc<dyn Array>
214            }),
215            _ => child_stat,
216        }
217    }
218}
219
220impl VariableWidthBlock {
221    // Caveat: the computation here assumes VariableWidthBlock.offsets maps directly to VariableWidthBlock.data
222    // without any adjustment(for example, no null_adjustment for offsets)
223    fn cardinality(&mut self) -> Arc<dyn Array> {
224        const PRECISION: u8 = 4;
225        // The default hasher (currently sip hash 1-3) does not seem to give good results
226        // with HLL.
227        //
228        // In particular, when using randomly generated 12-byte strings, the HLL count was
229        // suggested a cardinality of 500 (out of 1000 unique items and hashes) at least 10%
230        // of the time.
231        //
232        // Using xxhash3 consistently gives better results.
233        let mut hll: HyperLogLogPlus<&[u8], xxhash_rust::xxh3::Xxh3Builder> =
234            HyperLogLogPlus::new(PRECISION, xxhash_rust::xxh3::Xxh3Builder::default()).unwrap();
235
236        match self.bits_per_offset {
237            32 => {
238                let offsets_ref = self.offsets.borrow_to_typed_slice::<u32>();
239                let offsets: &[u32] = offsets_ref.as_ref();
240
241                offsets
242                    .iter()
243                    .zip(offsets.iter().skip(1))
244                    .for_each(|(&start, &end)| {
245                        hll.insert(&self.data[start as usize..end as usize]);
246                    });
247                let cardinality = hll.count() as u64;
248                Arc::new(UInt64Array::from(vec![cardinality]))
249            }
250            64 => {
251                let offsets_ref = self.offsets.borrow_to_typed_slice::<u64>();
252                let offsets: &[u64] = offsets_ref.as_ref();
253
254                offsets
255                    .iter()
256                    .zip(offsets.iter().skip(1))
257                    .for_each(|(&start, &end)| {
258                        hll.insert(&self.data[start as usize..end as usize]);
259                    });
260
261                let cardinality = hll.count() as u64;
262                Arc::new(UInt64Array::from(vec![cardinality]))
263            }
264            _ => {
265                unreachable!("the bits_per_offset of VariableWidthBlock can only be 32 or 64")
266            }
267        }
268    }
269
270    fn max_length(&mut self) -> Arc<dyn Array> {
271        match self.bits_per_offset {
272            32 => {
273                let offsets = self.offsets.borrow_to_typed_slice::<u32>();
274                let offsets = offsets.as_ref();
275                let max_len = offsets
276                    .windows(2)
277                    .map(|pair| pair[1] - pair[0])
278                    .max()
279                    .unwrap_or(0);
280                Arc::new(UInt64Array::from(vec![max_len as u64]))
281            }
282            64 => {
283                let offsets = self.offsets.borrow_to_typed_slice::<u64>();
284                let offsets = offsets.as_ref();
285                let max_len = offsets
286                    .windows(2)
287                    .map(|pair| pair[1] - pair[0])
288                    .max()
289                    .unwrap_or(0);
290                Arc::new(UInt64Array::from(vec![max_len]))
291            }
292            _ => {
293                unreachable!("the type of offsets in VariableWidth can only be u32 or u64");
294            }
295        }
296    }
297}
298
299impl GetStat for AllNullDataBlock {
300    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
301        match stat {
302            Stat::NullCount => {
303                let null_count = self.num_values;
304                Some(Arc::new(UInt64Array::from(vec![null_count])))
305            }
306            Stat::DataSize => Some(Arc::new(UInt64Array::from(vec![0]))),
307            _ => None,
308        }
309    }
310}
311
312impl GetStat for FixedWidthDataBlock {
313    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
314        let block_info = self.block_info.0.read().unwrap();
315
316        if block_info.is_empty() {
317            panic!("get_stat should be called after statistics are computed.");
318        }
319        block_info.get(&stat).cloned()
320    }
321}
322
323impl FixedWidthDataBlock {
324    fn max_bit_widths(&mut self) -> Arc<dyn Array> {
325        assert!(self.num_values > 0);
326
327        const CHUNK_SIZE: usize = 1024;
328
329        fn calculate_max_bit_width<T: PrimInt>(slice: &[T], bits_per_value: u64) -> Vec<u64> {
330            slice
331                .chunks(CHUNK_SIZE)
332                .map(|chunk| {
333                    let max_value = chunk.iter().fold(T::zero(), |acc, &x| acc | x);
334                    bits_per_value - max_value.leading_zeros() as u64
335                })
336                .collect()
337        }
338
339        match self.bits_per_value {
340            8 => {
341                let u8_slice = self.data.borrow_to_typed_slice::<u8>();
342                let u8_slice = u8_slice.as_ref();
343                Arc::new(UInt64Array::from(calculate_max_bit_width(
344                    u8_slice,
345                    self.bits_per_value,
346                )))
347            }
348            16 => {
349                let u16_slice = self.data.borrow_to_typed_slice::<u16>();
350                let u16_slice = u16_slice.as_ref();
351                Arc::new(UInt64Array::from(calculate_max_bit_width(
352                    u16_slice,
353                    self.bits_per_value,
354                )))
355            }
356            32 => {
357                let u32_slice = self.data.borrow_to_typed_slice::<u32>();
358                let u32_slice = u32_slice.as_ref();
359                Arc::new(UInt64Array::from(calculate_max_bit_width(
360                    u32_slice,
361                    self.bits_per_value,
362                )))
363            }
364            64 => {
365                let u64_slice = self.data.borrow_to_typed_slice::<u64>();
366                let u64_slice = u64_slice.as_ref();
367                Arc::new(UInt64Array::from(calculate_max_bit_width(
368                    u64_slice,
369                    self.bits_per_value,
370                )))
371            }
372            _ => Arc::new(UInt64Array::from(vec![self.bits_per_value])),
373        }
374    }
375
376    fn cardinality(&mut self) -> Arc<dyn Array> {
377        match self.bits_per_value {
378            128 => {
379                let u128_slice_ref = self.data.borrow_to_typed_slice::<u128>();
380                let u128_slice = u128_slice_ref.as_ref();
381
382                const PRECISION: u8 = 4;
383                let mut hll: HyperLogLogPlus<u128, RandomState> =
384                    HyperLogLogPlus::new(PRECISION, RandomState::new()).unwrap();
385                for val in u128_slice {
386                    hll.insert(val);
387                }
388                let cardinality = hll.count() as u64;
389                Arc::new(UInt64Array::from(vec![cardinality]))
390            }
391            _ => unreachable!(),
392        }
393    }
394
395    /// Counts the number of runs (consecutive sequences of equal values) in the data.
396    ///
397    /// A "run" is defined as a sequence of one or more consecutive equal values.
398    /// For example:
399    /// - `[1, 1, 2, 2, 2, 3]` has 3 runs: [1,1], [2,2,2], and [3]
400    /// - `[1, 2, 3, 4]` has 4 runs (each value is its own run)
401    /// - `[5, 5, 5, 5]` has 1 run
402    ///
403    /// This count is used to determine if RLE compression would be effective.
404    /// Fewer runs relative to the total number of values indicates better RLE compression potential.
405    fn run_count(&mut self) -> Arc<dyn Array> {
406        assert!(self.num_values > 0);
407
408        // Inner function to count runs in typed data
409        fn count_runs<T: PartialEq + Copy>(slice: &[T]) -> u64 {
410            if slice.is_empty() {
411                return 0;
412            }
413
414            // Start with 1 run (the first value)
415            let mut runs = 1u64;
416            let mut prev = slice[0];
417
418            // Count value transitions (each transition indicates a new run)
419            for &val in &slice[1..] {
420                if val != prev {
421                    runs += 1;
422                    prev = val;
423                }
424            }
425
426            runs
427        }
428
429        let run_count = match self.bits_per_value {
430            8 => {
431                let u8_slice = self.data.borrow_to_typed_slice::<u8>();
432                count_runs(u8_slice.as_ref())
433            }
434            16 => {
435                let u16_slice = self.data.borrow_to_typed_slice::<u16>();
436                count_runs(u16_slice.as_ref())
437            }
438            32 => {
439                let u32_slice = self.data.borrow_to_typed_slice::<u32>();
440                count_runs(u32_slice.as_ref())
441            }
442            64 => {
443                let u64_slice = self.data.borrow_to_typed_slice::<u64>();
444                count_runs(u64_slice.as_ref())
445            }
446            128 => {
447                let u128_slice = self.data.borrow_to_typed_slice::<u128>();
448                count_runs(u128_slice.as_ref())
449            }
450            _ => self.num_values, // For other bit widths, assume no runs
451        };
452
453        Arc::new(UInt64Array::from(vec![run_count]))
454    }
455}
456
457impl GetStat for OpaqueBlock {
458    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
459        let block_info = self.block_info.0.read().unwrap();
460
461        if block_info.is_empty() {
462            panic!("get_stat should be called after statistics are computed.");
463        }
464        block_info.get(&stat).cloned()
465    }
466}
467
468impl GetStat for DictionaryDataBlock {
469    fn get_stat(&self, _stat: Stat) -> Option<Arc<dyn Array>> {
470        None
471    }
472}
473
474impl GetStat for StructDataBlock {
475    fn get_stat(&self, stat: Stat) -> Option<Arc<dyn Array>> {
476        let block_info = self.block_info.0.read().unwrap();
477        if block_info.is_empty() {
478            panic!("get_stat should be called after statistics are computed.")
479        }
480        block_info.get(&stat).cloned()
481    }
482}
483
484impl ComputeStat for StructDataBlock {
485    fn compute_stat(&mut self) {
486        let data_size = self.data_size();
487        let data_size_array = Arc::new(UInt64Array::from(vec![data_size]));
488
489        let max_len = self
490            .children
491            .iter()
492            .map(|child| child.expect_single_stat::<UInt64Type>(Stat::MaxLength))
493            .sum::<u64>();
494        let max_len_array = Arc::new(UInt64Array::from(vec![max_len]));
495
496        let mut info = self.block_info.0.write().unwrap();
497        info.insert(Stat::DataSize, data_size_array);
498        info.insert(Stat::MaxLength, max_len_array);
499    }
500}
501
502#[cfg(test)]
503mod tests {
504    use std::sync::Arc;
505
506    use arrow_array::{
507        ArrayRef, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
508        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
509    };
510    use arrow_schema::{DataType, Field};
511    use lance_arrow::DataTypeExt;
512    use lance_datagen::{array, ArrayGeneratorExt, RowCount, DEFAULT_SEED};
513    use rand::SeedableRng;
514
515    use crate::statistics::{GetStat, Stat};
516
517    use super::DataBlock;
518
519    use arrow::{
520        array::AsArray,
521        compute::concat,
522        datatypes::{Int32Type, UInt64Type},
523    };
524    use arrow_array::Array;
525    #[test]
526    fn test_data_size_stat() {
527        let mut rng = rand_xoshiro::Xoshiro256PlusPlus::seed_from_u64(DEFAULT_SEED.0);
528        let mut gen = array::rand::<Int32Type>().with_nulls(&[false, false, false]);
529        let arr1 = gen.generate(RowCount::from(3), &mut rng).unwrap();
530        let arr2 = gen.generate(RowCount::from(3), &mut rng).unwrap();
531        let arr3 = gen.generate(RowCount::from(3), &mut rng).unwrap();
532        let block = DataBlock::from_arrays(&[arr1.clone(), arr2.clone(), arr3.clone()], 9);
533
534        let concatenated_array = concat(&[
535            &*Arc::new(arr1.clone()) as &dyn Array,
536            &*Arc::new(arr2.clone()) as &dyn Array,
537            &*Arc::new(arr3.clone()) as &dyn Array,
538        ])
539        .unwrap();
540
541        let data_size = block.expect_single_stat::<UInt64Type>(Stat::DataSize);
542
543        let total_buffer_size: usize = concatenated_array
544            .to_data()
545            .buffers()
546            .iter()
547            .map(|buffer| buffer.len())
548            .sum();
549        assert!(data_size == total_buffer_size as u64);
550
551        // test DataType::Binary
552        let mut gen = lance_datagen::array::rand_type(&DataType::Binary);
553        let arr = gen.generate(RowCount::from(3), &mut rng).unwrap();
554        let block = DataBlock::from_array(arr.clone());
555        let data_size = block.expect_single_stat::<UInt64Type>(Stat::DataSize);
556
557        let total_buffer_size: usize = arr
558            .to_data()
559            .buffers()
560            .iter()
561            .map(|buffer| buffer.len())
562            .sum();
563        assert!(data_size == total_buffer_size as u64);
564
565        // test DataType::Struct
566        let fields = vec![
567            Arc::new(Field::new("int_field", DataType::Int32, false)),
568            Arc::new(Field::new("float_field", DataType::Float32, false)),
569        ]
570        .into();
571
572        let mut gen = lance_datagen::array::rand_type(&DataType::Struct(fields));
573        let arr = gen.generate(RowCount::from(3), &mut rng).unwrap();
574        let block = DataBlock::from_array(arr.clone());
575        let (_, arr_parts, _) = arr.as_struct().clone().into_parts();
576        let total_buffer_size: usize = arr_parts
577            .iter()
578            .map(|arr| {
579                arr.to_data()
580                    .buffers()
581                    .iter()
582                    .map(|buffer| buffer.len())
583                    .sum::<usize>()
584            })
585            .sum();
586        let data_size = block.expect_single_stat::<UInt64Type>(Stat::DataSize);
587        assert!(data_size == total_buffer_size as u64);
588
589        // test DataType::Dictionary
590        let mut gen = array::rand_type(&DataType::Dictionary(
591            Box::new(DataType::Int32),
592            Box::new(DataType::Utf8),
593        ));
594        let arr = gen.generate(RowCount::from(3), &mut rng).unwrap();
595        let block = DataBlock::from_array(arr.clone());
596        assert!(block.get_stat(Stat::DataSize).is_none());
597
598        let mut gen = array::rand::<Int32Type>().with_nulls(&[false, true, false]);
599        let arr = gen.generate(RowCount::from(3), &mut rng).unwrap();
600        let block = DataBlock::from_array(arr.clone());
601        let data_size = block.expect_single_stat::<UInt64Type>(Stat::DataSize);
602        let total_buffer_size: usize = arr
603            .to_data()
604            .buffers()
605            .iter()
606            .map(|buffer| buffer.len())
607            .sum();
608
609        assert!(data_size == total_buffer_size as u64);
610    }
611
612    #[test]
613    fn test_bit_width_stat_for_integers() {
614        let int8_array = Int8Array::from(vec![1, 2, 3]);
615        let array_ref: ArrayRef = Arc::new(int8_array);
616        let block = DataBlock::from_array(array_ref);
617
618        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
619        let actual_bit_width = block.expect_stat(Stat::BitWidth);
620
621        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref(),);
622
623        let int8_array = Int8Array::from(vec![0x1, 0x2, 0x3, 0x7F]);
624        let array_ref: ArrayRef = Arc::new(int8_array);
625        let block = DataBlock::from_array(array_ref);
626
627        let expected_bit_width = Arc::new(UInt64Array::from(vec![7])) as ArrayRef;
628        let actual_bit_width = block.expect_stat(Stat::BitWidth);
629        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref(),);
630
631        let int8_array = Int8Array::from(vec![0x1, 0x2, 0x3, 0xF, 0x1F]);
632        let array_ref: ArrayRef = Arc::new(int8_array);
633        let block = DataBlock::from_array(array_ref);
634
635        let expected_bit_width = Arc::new(UInt64Array::from(vec![5])) as ArrayRef;
636        let actual_bit_width = block.expect_stat(Stat::BitWidth);
637        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref(),);
638
639        let int8_array = Int8Array::from(vec![-1, 2, 3]);
640        let array_ref: ArrayRef = Arc::new(int8_array);
641        let block = DataBlock::from_array(array_ref);
642
643        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
644        let actual_bit_width = block.expect_stat(Stat::BitWidth);
645        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
646
647        let int16_array = Int16Array::from(vec![1, 2, 3]);
648        let array_ref: ArrayRef = Arc::new(int16_array);
649        let block = DataBlock::from_array(array_ref);
650
651        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
652        let actual_bit_width = block.expect_stat(Stat::BitWidth);
653        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
654
655        let int16_array = Int16Array::from(vec![0x1, 0x2, 0x3, 0x7F]);
656        let array_ref: ArrayRef = Arc::new(int16_array);
657        let block = DataBlock::from_array(array_ref);
658
659        let expected_bit_width = Arc::new(UInt64Array::from(vec![7])) as ArrayRef;
660        let actual_bit_width = block.expect_stat(Stat::BitWidth);
661        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
662
663        let int16_array = Int16Array::from(vec![0x1, 0x2, 0x3, 0xFF]);
664        let array_ref: ArrayRef = Arc::new(int16_array);
665        let block = DataBlock::from_array(array_ref);
666
667        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
668        let actual_bit_width = block.expect_stat(Stat::BitWidth);
669        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
670
671        let int16_array = Int16Array::from(vec![0x1, 0x2, 0x3, 0x1FF]);
672        let array_ref: ArrayRef = Arc::new(int16_array);
673        let block = DataBlock::from_array(array_ref);
674
675        let expected_bit_width = Arc::new(UInt64Array::from(vec![9])) as ArrayRef;
676        let actual_bit_width = block.expect_stat(Stat::BitWidth);
677        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
678
679        let int16_array = Int16Array::from(vec![0x1, 0x2, 0x3, 0xF, 0x1F]);
680        let array_ref: ArrayRef = Arc::new(int16_array);
681        let block = DataBlock::from_array(array_ref);
682
683        let expected_bit_width = Arc::new(UInt64Array::from(vec![5])) as ArrayRef;
684        let actual_bit_width = block.expect_stat(Stat::BitWidth);
685        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
686
687        let int16_array = Int16Array::from(vec![-1, 2, 3]);
688        let array_ref: ArrayRef = Arc::new(int16_array);
689        let block = DataBlock::from_array(array_ref);
690
691        let expected_bit_width = Arc::new(UInt64Array::from(vec![16])) as ArrayRef;
692        let actual_bit_width = block.expect_stat(Stat::BitWidth);
693        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
694
695        let int32_array = Int32Array::from(vec![1, 2, 3]);
696        let array_ref: ArrayRef = Arc::new(int32_array);
697        let block = DataBlock::from_array(array_ref);
698
699        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
700        let actual_bit_width = block.expect_stat(Stat::BitWidth);
701        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
702
703        let int32_array = Int32Array::from(vec![0x1, 0x2, 0x3, 0xFF]);
704        let array_ref: ArrayRef = Arc::new(int32_array);
705        let block = DataBlock::from_array(array_ref);
706
707        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
708        let actual_bit_width = block.expect_stat(Stat::BitWidth);
709        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
710
711        let int32_array = Int32Array::from(vec![0x1, 0x2, 0x3, 0xFF, 0x1FF]);
712        let array_ref: ArrayRef = Arc::new(int32_array);
713        let block = DataBlock::from_array(array_ref);
714
715        let expected_bit_width = Arc::new(UInt64Array::from(vec![9])) as ArrayRef;
716        let actual_bit_width = block.expect_stat(Stat::BitWidth);
717        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
718
719        let int32_array = Int32Array::from(vec![-1, 2, 3]);
720        let array_ref: ArrayRef = Arc::new(int32_array);
721        let block = DataBlock::from_array(array_ref);
722
723        let expected_bit_width = Arc::new(UInt64Array::from(vec![32])) as ArrayRef;
724        let actual_bit_width = block.expect_stat(Stat::BitWidth);
725        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
726
727        let int32_array = Int32Array::from(vec![-1, 2, 3, -88]);
728        let array_ref: ArrayRef = Arc::new(int32_array);
729        let block = DataBlock::from_array(array_ref);
730
731        let expected_bit_width = Arc::new(UInt64Array::from(vec![32])) as ArrayRef;
732        let actual_bit_width = block.expect_stat(Stat::BitWidth);
733        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
734
735        let int64_array = Int64Array::from(vec![1, 2, 3]);
736        let array_ref: ArrayRef = Arc::new(int64_array);
737        let block = DataBlock::from_array(array_ref);
738
739        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
740        let actual_bit_width = block.expect_stat(Stat::BitWidth);
741        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
742
743        let int64_array = Int64Array::from(vec![0x1, 0x2, 0x3, 0xFF]);
744        let array_ref: ArrayRef = Arc::new(int64_array);
745        let block = DataBlock::from_array(array_ref);
746
747        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
748        let actual_bit_width = block.expect_stat(Stat::BitWidth);
749        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
750
751        let int64_array = Int64Array::from(vec![0x1, 0x2, 0x3, 0xFF, 0x1FF]);
752        let array_ref: ArrayRef = Arc::new(int64_array);
753        let block = DataBlock::from_array(array_ref);
754
755        let expected_bit_width = Arc::new(UInt64Array::from(vec![9])) as ArrayRef;
756        let actual_bit_width = block.expect_stat(Stat::BitWidth);
757        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
758
759        let int64_array = Int64Array::from(vec![-1, 2, 3]);
760        let array_ref: ArrayRef = Arc::new(int64_array);
761        let block = DataBlock::from_array(array_ref);
762
763        let expected_bit_width = Arc::new(UInt64Array::from(vec![64])) as ArrayRef;
764        let actual_bit_width = block.expect_stat(Stat::BitWidth);
765        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
766
767        let int64_array = Int64Array::from(vec![-1, 2, 3, -88]);
768        let array_ref: ArrayRef = Arc::new(int64_array);
769        let block = DataBlock::from_array(array_ref);
770
771        let expected_bit_width = Arc::new(UInt64Array::from(vec![64])) as ArrayRef;
772        let actual_bit_width = block.expect_stat(Stat::BitWidth);
773        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
774
775        let uint8_array = UInt8Array::from(vec![1, 2, 3]);
776        let array_ref: ArrayRef = Arc::new(uint8_array);
777        let block = DataBlock::from_array(array_ref);
778
779        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
780        let actual_bit_width = block.expect_stat(Stat::BitWidth);
781        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
782
783        let uint8_array = UInt8Array::from(vec![0x1, 0x2, 0x3, 0x7F]);
784        let array_ref: ArrayRef = Arc::new(uint8_array);
785        let block = DataBlock::from_array(array_ref);
786
787        let expected_bit_width = Arc::new(UInt64Array::from(vec![7])) as ArrayRef;
788        let actual_bit_width = block.expect_stat(Stat::BitWidth);
789        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
790
791        let uint8_array = UInt8Array::from(vec![0x1, 0x2, 0x3, 0xF, 0x1F]);
792        let array_ref: ArrayRef = Arc::new(uint8_array);
793        let block = DataBlock::from_array(array_ref);
794
795        let expected_bit_width = Arc::new(UInt64Array::from(vec![5])) as ArrayRef;
796        let actual_bit_width = block.expect_stat(Stat::BitWidth);
797        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
798
799        let uint8_array = UInt8Array::from(vec![1, 2, 3, 0xF]);
800        let array_ref: ArrayRef = Arc::new(uint8_array);
801        let block = DataBlock::from_array(array_ref);
802
803        let expected_bit_width = Arc::new(UInt64Array::from(vec![4])) as ArrayRef;
804        let actual_bit_width = block.expect_stat(Stat::BitWidth);
805        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
806
807        let uint16_array = UInt16Array::from(vec![1, 2, 3]);
808        let array_ref: ArrayRef = Arc::new(uint16_array);
809        let block = DataBlock::from_array(array_ref);
810
811        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
812        let actual_bit_width = block.expect_stat(Stat::BitWidth);
813        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
814
815        let uint16_array = UInt16Array::from(vec![0x1, 0x2, 0x3, 0x7F]);
816        let array_ref: ArrayRef = Arc::new(uint16_array);
817        let block = DataBlock::from_array(array_ref);
818
819        let expected_bit_width = Arc::new(UInt64Array::from(vec![7])) as ArrayRef;
820        let actual_bit_width = block.expect_stat(Stat::BitWidth);
821        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
822
823        let uint16_array = UInt16Array::from(vec![0x1, 0x2, 0x3, 0xFF]);
824        let array_ref: ArrayRef = Arc::new(uint16_array);
825        let block = DataBlock::from_array(array_ref);
826
827        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
828        let actual_bit_width = block.expect_stat(Stat::BitWidth);
829        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
830
831        let uint16_array = UInt16Array::from(vec![0x1, 0x2, 0x3, 0x1FF]);
832        let array_ref: ArrayRef = Arc::new(uint16_array);
833        let block = DataBlock::from_array(array_ref);
834
835        let expected_bit_width = Arc::new(UInt64Array::from(vec![9])) as ArrayRef;
836        let actual_bit_width = block.expect_stat(Stat::BitWidth);
837        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
838
839        let uint16_array = UInt16Array::from(vec![0x1, 0x2, 0x3, 0xF, 0x1F]);
840        let array_ref: ArrayRef = Arc::new(uint16_array);
841        let block = DataBlock::from_array(array_ref);
842
843        let expected_bit_width = Arc::new(UInt64Array::from(vec![5])) as ArrayRef;
844        let actual_bit_width = block.expect_stat(Stat::BitWidth);
845        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
846
847        let uint16_array = UInt16Array::from(vec![1, 2, 3, 0xFFFF]);
848        let array_ref: ArrayRef = Arc::new(uint16_array);
849        let block = DataBlock::from_array(array_ref);
850
851        let expected_bit_width = Arc::new(UInt64Array::from(vec![16])) as ArrayRef;
852        let actual_bit_width = block.expect_stat(Stat::BitWidth);
853        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
854
855        let uint32_array = UInt32Array::from(vec![1, 2, 3]);
856        let array_ref: ArrayRef = Arc::new(uint32_array);
857        let block = DataBlock::from_array(array_ref);
858
859        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
860        let actual_bit_width = block.expect_stat(Stat::BitWidth);
861        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
862
863        let uint32_array = UInt32Array::from(vec![0x1, 0x2, 0x3, 0xFF]);
864        let array_ref: ArrayRef = Arc::new(uint32_array);
865        let block = DataBlock::from_array(array_ref);
866
867        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
868        let actual_bit_width = block.expect_stat(Stat::BitWidth);
869        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref(),);
870
871        let uint32_array = UInt32Array::from(vec![0x1, 0x2, 0x3, 0xFF, 0x1FF]);
872        let array_ref: ArrayRef = Arc::new(uint32_array);
873        let block = DataBlock::from_array(array_ref);
874
875        let expected_bit_width = Arc::new(UInt64Array::from(vec![9])) as ArrayRef;
876        let actual_bit_width = block.expect_stat(Stat::BitWidth);
877        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
878
879        let uint32_array = UInt32Array::from(vec![1, 2, 3, 0xF]);
880        let array_ref: ArrayRef = Arc::new(uint32_array);
881        let block = DataBlock::from_array(array_ref);
882
883        let expected_bit_width = Arc::new(UInt64Array::from(vec![4])) as ArrayRef;
884        let actual_bit_width = block.expect_stat(Stat::BitWidth);
885        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
886
887        let uint32_array = UInt32Array::from(vec![1, 2, 3, 0x77]);
888        let array_ref: ArrayRef = Arc::new(uint32_array);
889        let block = DataBlock::from_array(array_ref);
890
891        let expected_bit_width = Arc::new(UInt64Array::from(vec![7])) as ArrayRef;
892        let actual_bit_width = block.expect_stat(Stat::BitWidth);
893        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
894
895        let uint64_array = UInt64Array::from(vec![1, 2, 3]);
896        let array_ref: ArrayRef = Arc::new(uint64_array);
897        let block = DataBlock::from_array(array_ref);
898
899        let expected_bit_width = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
900        let actual_bit_width = block.expect_stat(Stat::BitWidth);
901        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
902
903        let uint64_array = UInt64Array::from(vec![0x1, 0x2, 0x3, 0xFF]);
904        let array_ref: ArrayRef = Arc::new(uint64_array);
905        let block = DataBlock::from_array(array_ref);
906
907        let expected_bit_width = Arc::new(UInt64Array::from(vec![8])) as ArrayRef;
908        let actual_bit_width = block.expect_stat(Stat::BitWidth);
909        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
910
911        let uint64_array = UInt64Array::from(vec![0x1, 0x2, 0x3, 0xFF, 0x1FF]);
912        let array_ref: ArrayRef = Arc::new(uint64_array);
913        let block = DataBlock::from_array(array_ref);
914
915        let expected_bit_width = Arc::new(UInt64Array::from(vec![9])) as ArrayRef;
916        let actual_bit_width = block.expect_stat(Stat::BitWidth);
917        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
918
919        let uint64_array = UInt64Array::from(vec![0, 2, 3, 0xFFFF]);
920        let array_ref: ArrayRef = Arc::new(uint64_array);
921        let block = DataBlock::from_array(array_ref);
922
923        let expected_bit_width = Arc::new(UInt64Array::from(vec![16])) as ArrayRef;
924        let actual_bit_width = block.expect_stat(Stat::BitWidth);
925        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
926
927        let uint64_array = UInt64Array::from(vec![1, 2, 3, 0xFFFF_FFFF_FFFF_FFFF]);
928        let array_ref: ArrayRef = Arc::new(uint64_array);
929        let block = DataBlock::from_array(array_ref);
930
931        let expected_bit_width = Arc::new(UInt64Array::from(vec![64])) as ArrayRef;
932        let actual_bit_width = block.expect_stat(Stat::BitWidth);
933        assert_eq!(actual_bit_width.as_ref(), expected_bit_width.as_ref());
934    }
935
936    #[test]
937    fn test_bit_width_stat_more_than_1024() {
938        for data_type in [
939            DataType::Int8,
940            DataType::Int16,
941            DataType::Int32,
942            DataType::Int64,
943        ] {
944            let array1 = Int64Array::from(vec![3; 1024]);
945            let array2 = Int64Array::from(vec![8; 1024]);
946            let array3 = Int64Array::from(vec![-1; 10]);
947            let array1 = arrow_cast::cast(&array1, &data_type).unwrap();
948            let array2 = arrow_cast::cast(&array2, &data_type).unwrap();
949            let array3 = arrow_cast::cast(&array3, &data_type).unwrap();
950
951            let arrays: Vec<&dyn arrow::array::Array> =
952                vec![array1.as_ref(), array2.as_ref(), array3.as_ref()];
953            let concatenated = concat(&arrays).unwrap();
954            let block = DataBlock::from_array(concatenated.clone());
955
956            let expected_bit_width = Arc::new(UInt64Array::from(vec![
957                2,
958                4,
959                (data_type.byte_width() * 8) as u64,
960            ])) as ArrayRef;
961            let actual_bit_widths = block.expect_stat(Stat::BitWidth);
962            assert_eq!(actual_bit_widths.as_ref(), expected_bit_width.as_ref(),);
963        }
964    }
965
966    #[test]
967    fn test_bit_width_when_none() {
968        let mut rng = rand_xoshiro::Xoshiro256PlusPlus::seed_from_u64(DEFAULT_SEED.0);
969        let mut gen = lance_datagen::array::rand_type(&DataType::Binary);
970        let arr = gen.generate(RowCount::from(3), &mut rng).unwrap();
971        let block = DataBlock::from_array(arr.clone());
972        assert!(block.get_stat(Stat::BitWidth).is_none(),);
973    }
974
975    #[test]
976    fn test_cardinality_variable_width_datablock() {
977        let string_array = StringArray::from(vec![Some("hello"), Some("world")]);
978        let block = DataBlock::from_array(string_array);
979        let expected_cardinality = 2;
980        let actual_cardinality = block.expect_single_stat::<UInt64Type>(Stat::Cardinality);
981        assert_eq!(actual_cardinality, expected_cardinality,);
982
983        let string_array = StringArray::from(vec![
984            Some("to be named by variables"),
985            Some("to be passed as arguments to procedures"),
986            Some("to be returned as values of procedures"),
987        ]);
988        let block = DataBlock::from_array(string_array);
989        let expected_cardinality = 3;
990        let actual_cardinality = block.expect_single_stat::<UInt64Type>(Stat::Cardinality);
991
992        assert_eq!(actual_cardinality, expected_cardinality,);
993
994        let string_array = StringArray::from(vec![
995            Some("Samuel Eilenberg"),
996            Some("Saunders Mac Lane"),
997            Some("Samuel Eilenberg"),
998        ]);
999        let block = DataBlock::from_array(string_array);
1000        let expected_cardinality = 2;
1001        let actual_cardinality = block.expect_single_stat::<UInt64Type>(Stat::Cardinality);
1002        assert_eq!(actual_cardinality, expected_cardinality,);
1003
1004        let string_array = LargeStringArray::from(vec![Some("hello"), Some("world")]);
1005        let block = DataBlock::from_array(string_array);
1006        let expected_cardinality = 2;
1007        let actual_cardinality = block.expect_single_stat::<UInt64Type>(Stat::Cardinality);
1008        assert_eq!(actual_cardinality, expected_cardinality,);
1009
1010        let string_array = LargeStringArray::from(vec![
1011            Some("to be named by variables"),
1012            Some("to be passed as arguments to procedures"),
1013            Some("to be returned as values of procedures"),
1014        ]);
1015        let block = DataBlock::from_array(string_array);
1016        let expected_cardinality = 3;
1017        let actual_cardinality = block.expect_single_stat::<UInt64Type>(Stat::Cardinality);
1018        assert_eq!(actual_cardinality, expected_cardinality,);
1019
1020        let string_array = LargeStringArray::from(vec![
1021            Some("Samuel Eilenberg"),
1022            Some("Saunders Mac Lane"),
1023            Some("Samuel Eilenberg"),
1024        ]);
1025        let block = DataBlock::from_array(string_array);
1026        let expected_cardinality = 2;
1027        let actual_cardinality = block.expect_single_stat::<UInt64Type>(Stat::Cardinality);
1028        assert_eq!(actual_cardinality, expected_cardinality,);
1029    }
1030
1031    #[test]
1032    fn test_max_length_variable_width_datablock() {
1033        let string_array = StringArray::from(vec![Some("hello"), Some("world")]);
1034        let block = DataBlock::from_array(string_array.clone());
1035        let expected_max_length = string_array.value_length(0) as u64;
1036        let actual_max_length = block.expect_single_stat::<UInt64Type>(Stat::MaxLength);
1037        assert_eq!(actual_max_length, expected_max_length);
1038
1039        let string_array = StringArray::from(vec![
1040            Some("to be named by variables"),
1041            Some("to be passed as arguments to procedures"), // string that has max length
1042            Some("to be returned as values of procedures"),
1043        ]);
1044        let block = DataBlock::from_array(string_array.clone());
1045        let expected_max_length = string_array.value_length(1) as u64;
1046        let actual_max_length = block.expect_single_stat::<UInt64Type>(Stat::MaxLength);
1047        assert_eq!(actual_max_length, expected_max_length);
1048
1049        let string_array = StringArray::from(vec![
1050            Some("Samuel Eilenberg"),
1051            Some("Saunders Mac Lane"), // string that has max length
1052            Some("Samuel Eilenberg"),
1053        ]);
1054        let block = DataBlock::from_array(string_array.clone());
1055        let expected_max_length = string_array.value_length(1) as u64;
1056        let actual_max_length = block.expect_single_stat::<UInt64Type>(Stat::MaxLength);
1057        assert_eq!(actual_max_length, expected_max_length);
1058
1059        let string_array = LargeStringArray::from(vec![Some("hello"), Some("world")]);
1060        let block = DataBlock::from_array(string_array.clone());
1061        let expected_max_length = string_array.value_length(1) as u64;
1062        let actual_max_length = block.expect_single_stat::<UInt64Type>(Stat::MaxLength);
1063        assert_eq!(actual_max_length, expected_max_length);
1064
1065        let string_array = LargeStringArray::from(vec![
1066            Some("to be named by variables"),
1067            Some("to be passed as arguments to procedures"), // string that has max length
1068            Some("to be returned as values of procedures"),
1069        ]);
1070        let block = DataBlock::from_array(string_array.clone());
1071        let expected_max_length = string_array.value(1).len() as u64;
1072        let actual_max_length = block.expect_single_stat::<UInt64Type>(Stat::MaxLength);
1073
1074        assert_eq!(actual_max_length, expected_max_length);
1075    }
1076
1077    #[test]
1078    fn test_run_count_stat() {
1079        // Test with highly repetitive data
1080        let int32_array = Int32Array::from(vec![1, 1, 1, 2, 2, 2, 3, 3, 3]);
1081        let block = DataBlock::from_array(int32_array);
1082        let expected_run_count = 3;
1083        let actual_run_count = block.expect_single_stat::<UInt64Type>(Stat::RunCount);
1084        assert_eq!(actual_run_count, expected_run_count);
1085
1086        // Test with no repetition
1087        let int32_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
1088        let block = DataBlock::from_array(int32_array);
1089        let expected_run_count = 5;
1090        let actual_run_count = block.expect_single_stat::<UInt64Type>(Stat::RunCount);
1091        assert_eq!(actual_run_count, expected_run_count);
1092
1093        // Test with mixed pattern
1094        let int32_array = Int32Array::from(vec![1, 1, 2, 3, 3, 3, 4, 5, 5]);
1095        let block = DataBlock::from_array(int32_array);
1096        let expected_run_count = 5;
1097        let actual_run_count = block.expect_single_stat::<UInt64Type>(Stat::RunCount);
1098        assert_eq!(actual_run_count, expected_run_count);
1099
1100        // Test with single value
1101        let int32_array = Int32Array::from(vec![42, 42, 42, 42, 42]);
1102        let block = DataBlock::from_array(int32_array);
1103        let expected_run_count = 1;
1104        let actual_run_count = block.expect_single_stat::<UInt64Type>(Stat::RunCount);
1105        assert_eq!(actual_run_count, expected_run_count);
1106
1107        // Test with different data types
1108        let uint8_array = UInt8Array::from(vec![1, 1, 2, 2, 3, 3]);
1109        let block = DataBlock::from_array(uint8_array);
1110        let expected_run_count = 3;
1111        let actual_run_count = block.expect_single_stat::<UInt64Type>(Stat::RunCount);
1112        assert_eq!(actual_run_count, expected_run_count);
1113
1114        let int64_array = Int64Array::from(vec![100, 100, 200, 300, 300]);
1115        let block = DataBlock::from_array(int64_array);
1116        let expected_run_count = 3;
1117        let actual_run_count = block.expect_single_stat::<UInt64Type>(Stat::RunCount);
1118        assert_eq!(actual_run_count, expected_run_count);
1119    }
1120}