use arrow::record_batch::RecordBatch;
use samkhya_core::Result;
use samkhya_core::sketches::{BloomFilter, EquiDepthHistogram, HllSketch};
use crate::ingest::{
ingest_array_into_bloom, ingest_array_into_histogram_values, ingest_array_into_hll,
};
pub fn build_column_sketches(batch: &RecordBatch, precision: u8) -> Result<Vec<HllSketch>> {
let mut sketches = Vec::with_capacity(batch.num_columns());
for col_idx in 0..batch.num_columns() {
let mut hll = HllSketch::new(precision)?;
ingest_array_into_hll(batch.column(col_idx).as_ref(), &mut hll);
sketches.push(hll);
}
Ok(sketches)
}
pub fn build_blooms(batch: &RecordBatch, fp_rate: f64) -> Result<Vec<BloomFilter>> {
let capacity = batch.num_rows();
let mut blooms = Vec::with_capacity(batch.num_columns());
for col_idx in 0..batch.num_columns() {
let mut bloom = BloomFilter::new(capacity, fp_rate);
ingest_array_into_bloom(batch.column(col_idx).as_ref(), &mut bloom);
blooms.push(bloom);
}
Ok(blooms)
}
pub fn build_histograms(
batch: &RecordBatch,
buckets: usize,
) -> Result<Vec<Option<EquiDepthHistogram>>> {
let mut hists = Vec::with_capacity(batch.num_columns());
for col_idx in 0..batch.num_columns() {
match ingest_array_into_histogram_values(batch.column(col_idx).as_ref()) {
Ok(values) => {
let h = EquiDepthHistogram::from_values(&values, buckets)?;
hists.push(Some(h));
}
Err(_) => hists.push(None),
}
}
Ok(hists)
}