use super::table::Table;
pub fn calculate_tau_0(table: &Table) -> f64 {
if table.field_counts.is_empty() {
return 0.0;
}
let sigma = standard_deviation(&table.field_counts);
1.0 / 2.0f64.mul_add(sigma, 1.0)
}
pub fn calculate_tau_1(table: &Table) -> f64 {
if table.field_counts.is_empty() {
return 0.0;
}
let n = table.field_counts.len();
if n == 1 {
return 1.0; }
let min_fc = table.min_field_count();
let max_fc = table.max_field_count();
let range = max_fc - min_fc;
let range_score = if max_fc == 0 {
0.0
} else {
1.0 - (range as f64 / max_fc as f64).min(1.0)
};
let mut transitions = 0;
for i in 1..n {
if table.field_counts[i] != table.field_counts[i - 1] {
transitions += 1;
}
}
let transition_score = 1.0 - (transitions as f64 / (n - 1) as f64);
let mode_count = table.modal_field_count_freq();
let mode_score = mode_count as f64 / n as f64;
mode_score.mul_add(0.4, range_score * 0.3 + transition_score * 0.3)
}
#[allow(dead_code)]
pub fn calculate_uniformity(table: &Table) -> f64 {
let tau_0 = calculate_tau_0(table);
let tau_1 = calculate_tau_1(table);
(tau_0 * tau_1).sqrt()
}
fn standard_deviation(values: &[usize]) -> f64 {
if values.is_empty() {
return 0.0;
}
let n = values.len() as f64;
let mean: f64 = values.iter().sum::<usize>() as f64 / n;
let variance: f64 = values
.iter()
.map(|&v| {
let diff = v as f64 - mean;
diff * diff
})
.sum::<f64>()
/ n;
variance.sqrt()
}
pub fn is_uniform(table: &Table) -> bool {
if table.field_counts.is_empty() {
return true;
}
let first = table.field_counts[0];
table.field_counts.iter().all(|&fc| fc == first)
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct FieldCountStats {
pub min: usize,
pub max: usize,
pub mode: usize,
pub mean: f64,
pub std_dev: f64,
pub is_uniform: bool,
}
impl FieldCountStats {
#[allow(dead_code)]
pub fn from_table(table: &Table) -> Self {
let min = table.min_field_count();
let max = table.max_field_count();
let mode = table.modal_field_count();
let mean = if table.field_counts.is_empty() {
0.0
} else {
table.field_counts.iter().sum::<usize>() as f64 / table.field_counts.len() as f64
};
let std_dev = standard_deviation(&table.field_counts);
let is_uniform_val = is_uniform(table);
Self {
min,
max,
mode,
mean,
std_dev,
is_uniform: is_uniform_val,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tau_0_uniform() {
let mut table = Table::new();
table.field_counts = vec![3, 3, 3, 3, 3];
table.update_modal_field_count();
let tau_0 = calculate_tau_0(&table);
assert!((tau_0 - 1.0).abs() < 0.001); }
#[test]
fn test_tau_0_varied() {
let mut table = Table::new();
table.field_counts = vec![3, 4, 3, 5, 3];
table.update_modal_field_count();
let tau_0 = calculate_tau_0(&table);
assert!(tau_0 < 1.0); assert!(tau_0 > 0.0);
}
#[test]
fn test_tau_1_uniform() {
let mut table = Table::new();
table.field_counts = vec![3, 3, 3, 3, 3];
table.update_modal_field_count();
let tau_1 = calculate_tau_1(&table);
assert!((tau_1 - 1.0).abs() < 0.001); }
#[test]
fn test_is_uniform() {
let mut uniform_table = Table::new();
uniform_table.field_counts = vec![3, 3, 3];
uniform_table.update_modal_field_count();
assert!(is_uniform(&uniform_table));
let mut varied_table = Table::new();
varied_table.field_counts = vec![3, 4, 3];
varied_table.update_modal_field_count();
assert!(!is_uniform(&varied_table));
}
}