pub mod bin_mapper;
pub mod builder;
pub use bin_mapper::BinMapper;
pub use builder::DatasetBuilder;
use serde::{Deserialize, Serialize};
pub const MISSING_BIN: u16 = 0;
pub trait Bin: Copy + PartialEq + PartialOrd + Send + Sync + 'static {
const MISSING: Self;
fn as_usize(self) -> usize;
fn from_u16(v: u16) -> Self;
}
impl Bin for u8 {
const MISSING: Self = 0;
#[inline(always)]
fn as_usize(self) -> usize {
self as usize
}
#[inline(always)]
fn from_u16(v: u16) -> Self {
v as u8
}
}
impl Bin for u16 {
const MISSING: Self = 0;
#[inline(always)]
fn as_usize(self) -> usize {
self as usize
}
#[inline(always)]
fn from_u16(v: u16) -> Self {
v
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinWidth {
U8,
U16,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum BinData {
U8(Vec<Vec<u8>>),
U16(Vec<Vec<u16>>),
}
impl BinData {
pub fn width(&self) -> BinWidth {
match self {
BinData::U8(_) => BinWidth::U8,
BinData::U16(_) => BinWidth::U16,
}
}
pub fn n_features(&self) -> usize {
match self {
BinData::U8(v) => v.len(),
BinData::U16(v) => v.len(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Dataset {
pub(crate) n_rows: usize,
pub(crate) n_features: usize,
pub(crate) bin_data: BinData,
pub(crate) bin_mappers: Vec<BinMapper>,
pub(crate) labels: Vec<f32>,
}
impl Dataset {
pub fn n_rows(&self) -> usize {
self.n_rows
}
pub fn n_features(&self) -> usize {
self.n_features
}
pub fn labels(&self) -> &[f32] {
&self.labels
}
pub fn bin_width(&self) -> BinWidth {
self.bin_data.width()
}
pub fn feature_column_u8(&self, feat: usize) -> &[u8] {
match &self.bin_data {
BinData::U8(v) => &v[feat],
BinData::U16(_) => panic!("feature_column_u8 called on a U16 dataset"),
}
}
pub fn feature_column_u16(&self, feat: usize) -> &[u16] {
match &self.bin_data {
BinData::U16(v) => &v[feat],
BinData::U8(_) => panic!("feature_column_u16 called on a U8 dataset"),
}
}
#[inline]
pub fn feature_bin(&self, feat: usize, row: usize) -> u16 {
match &self.bin_data {
BinData::U8(v) => v[feat][row] as u16,
BinData::U16(v) => v[feat][row],
}
}
pub fn bin_mapper(&self, feat: usize) -> &BinMapper {
&self.bin_mappers[feat]
}
pub fn bin_mappers(&self) -> &[BinMapper] {
&self.bin_mappers
}
}
macro_rules! with_columns {
($ds:expr, $feats:expr, |$cols:ident| $body:block) => {
match $ds.bin_width() {
$crate::dataset::BinWidth::U8 => {
let $cols: Vec<&[u8]> =
$feats.iter().map(|&f| $ds.feature_column_u8(f)).collect();
$body
}
$crate::dataset::BinWidth::U16 => {
let $cols: Vec<&[u16]> =
$feats.iter().map(|&f| $ds.feature_column_u16(f)).collect();
$body
}
}
};
}
macro_rules! with_column {
($ds:expr, $feat:expr, |$col:ident| $body:block) => {
match $ds.bin_width() {
$crate::dataset::BinWidth::U8 => {
let $col: &[u8] = $ds.feature_column_u8($feat);
$body
}
$crate::dataset::BinWidth::U16 => {
let $col: &[u16] = $ds.feature_column_u16($feat);
$body
}
}
};
}
pub(crate) use with_column;
pub(crate) use with_columns;