entrenar/config/infer/
stats.rs1#[allow(clippy::struct_excessive_bools)]
5#[derive(Debug, Clone, Default)]
6pub struct ColumnStats {
7 pub name: String,
9 pub count: usize,
11 pub unique_count: usize,
13 pub null_count: usize,
15 pub all_integers: bool,
17 pub all_numeric: bool,
19 pub min_str_len: Option<usize>,
21 pub max_str_len: Option<usize>,
23 pub avg_str_len: Option<f32>,
25 pub looks_like_datetime: bool,
27 pub is_array: bool,
29 pub array_len: Option<usize>,
31 pub sample_values: Vec<String>,
33}
34
35impl ColumnStats {
36 pub fn new(name: impl Into<String>) -> Self {
38 Self { name: name.into(), ..Default::default() }
39 }
40
41 pub fn cardinality_ratio(&self) -> f32 {
43 if self.count == 0 {
44 0.0
45 } else {
46 self.unique_count as f32 / self.count as f32
47 }
48 }
49
50 pub fn null_ratio(&self) -> f32 {
52 if self.count == 0 {
53 0.0
54 } else {
55 self.null_count as f32 / self.count as f32
56 }
57 }
58}