#[derive(Debug, Clone, Copy)]
pub(crate) enum ColumnType {
Null,
Bool,
Int,
Float,
String,
}
fn infer_column_type<'a, I>(values: I) -> ColumnType
where
I: Iterator<Item = &'a str>,
{
let mut all_null = true;
let mut all_bool = true;
let mut all_int = true;
let mut all_float = true;
for value in values {
let trimmed = value.trim();
if trimmed.is_empty() || trimmed == "~" || trimmed == "null" {
continue;
}
all_null = false;
if trimmed != "true" && trimmed != "false" {
all_bool = false;
}
if trimmed.parse::<i64>().is_err() {
all_int = false;
}
if trimmed.parse::<f64>().is_err() {
all_float = false;
}
if !all_bool && !all_int && !all_float {
return ColumnType::String;
}
}
if all_null {
ColumnType::Null
} else if all_bool {
ColumnType::Bool
} else if all_int {
ColumnType::Int
} else if all_float {
ColumnType::Float
} else {
ColumnType::String
}
}
pub(crate) fn infer_column_types(records: &[Vec<String>], sample_size: usize) -> Vec<ColumnType> {
if records.is_empty() {
return Vec::new();
}
let num_columns = records[0].len();
let sample_count = sample_size.min(records.len());
(0..num_columns)
.map(|col_idx| {
let column_values = records
.iter()
.take(sample_count)
.filter_map(|row| row.get(col_idx).map(std::string::String::as_str));
infer_column_type(column_values)
})
.collect()
}