use std::collections::HashMap;
#[non_exhaustive]
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum LanceDataType {
Float32,
Float64,
Int32,
Int64,
UInt32,
UInt64,
Utf8,
Boolean,
}
impl LanceDataType {
pub fn type_tag(&self) -> u8 {
match self {
LanceDataType::Float32 => 0,
LanceDataType::Float64 => 1,
LanceDataType::Int32 => 2,
LanceDataType::Int64 => 3,
LanceDataType::UInt32 => 4,
LanceDataType::UInt64 => 5,
LanceDataType::Utf8 => 6,
LanceDataType::Boolean => 7,
}
}
pub fn from_type_tag(tag: u8) -> Option<Self> {
match tag {
0 => Some(LanceDataType::Float32),
1 => Some(LanceDataType::Float64),
2 => Some(LanceDataType::Int32),
3 => Some(LanceDataType::Int64),
4 => Some(LanceDataType::UInt32),
5 => Some(LanceDataType::UInt64),
6 => Some(LanceDataType::Utf8),
7 => Some(LanceDataType::Boolean),
_ => None,
}
}
}
#[non_exhaustive]
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LanceField {
pub name: String,
pub dtype: LanceDataType,
pub nullable: bool,
}
impl LanceField {
pub fn new(name: impl Into<String>, dtype: LanceDataType) -> Self {
Self {
name: name.into(),
dtype,
nullable: false,
}
}
pub fn nullable(name: impl Into<String>, dtype: LanceDataType) -> Self {
Self {
name: name.into(),
dtype,
nullable: true,
}
}
}
#[non_exhaustive]
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LanceSchema {
pub fields: Vec<LanceField>,
pub metadata: HashMap<String, String>,
}
impl LanceSchema {
pub fn new(fields: Vec<LanceField>) -> Self {
Self {
fields,
metadata: HashMap::new(),
}
}
pub fn with_metadata(fields: Vec<LanceField>, metadata: HashMap<String, String>) -> Self {
Self { fields, metadata }
}
}
impl Default for LanceSchema {
fn default() -> Self {
Self::new(Vec::new())
}
}
#[non_exhaustive]
#[derive(Debug, Clone)]
pub enum LanceColumn {
Float32(Vec<f32>),
Float64(Vec<f64>),
Int32(Vec<i32>),
Int64(Vec<i64>),
UInt32(Vec<u32>),
UInt64(Vec<u64>),
Utf8(Vec<String>),
Boolean(Vec<bool>),
Nullable(Box<LanceColumn>, Vec<bool>),
}
impl LanceColumn {
pub fn len(&self) -> usize {
match self {
LanceColumn::Float32(v) => v.len(),
LanceColumn::Float64(v) => v.len(),
LanceColumn::Int32(v) => v.len(),
LanceColumn::Int64(v) => v.len(),
LanceColumn::UInt32(v) => v.len(),
LanceColumn::UInt64(v) => v.len(),
LanceColumn::Utf8(v) => v.len(),
LanceColumn::Boolean(v) => v.len(),
LanceColumn::Nullable(inner, _) => inner.len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn data_type(&self) -> LanceDataType {
match self {
LanceColumn::Float32(_) => LanceDataType::Float32,
LanceColumn::Float64(_) => LanceDataType::Float64,
LanceColumn::Int32(_) => LanceDataType::Int32,
LanceColumn::Int64(_) => LanceDataType::Int64,
LanceColumn::UInt32(_) => LanceDataType::UInt32,
LanceColumn::UInt64(_) => LanceDataType::UInt64,
LanceColumn::Utf8(_) => LanceDataType::Utf8,
LanceColumn::Boolean(_) => LanceDataType::Boolean,
LanceColumn::Nullable(inner, _) => inner.data_type(),
}
}
}
#[derive(Debug, Clone)]
pub struct LanceBatch {
pub schema: LanceSchema,
pub columns: Vec<LanceColumn>,
pub num_rows: usize,
}
impl LanceBatch {
pub fn new(schema: LanceSchema, columns: Vec<LanceColumn>, num_rows: usize) -> Self {
Self {
schema,
columns,
num_rows,
}
}
pub fn empty(schema: LanceSchema) -> Self {
let columns = schema
.fields
.iter()
.map(|f| empty_column_for(&f.dtype))
.collect();
Self {
schema,
columns,
num_rows: 0,
}
}
}
fn empty_column_for(dtype: &LanceDataType) -> LanceColumn {
match dtype {
LanceDataType::Float32 => LanceColumn::Float32(Vec::new()),
LanceDataType::Float64 => LanceColumn::Float64(Vec::new()),
LanceDataType::Int32 => LanceColumn::Int32(Vec::new()),
LanceDataType::Int64 => LanceColumn::Int64(Vec::new()),
LanceDataType::UInt32 => LanceColumn::UInt32(Vec::new()),
LanceDataType::UInt64 => LanceColumn::UInt64(Vec::new()),
LanceDataType::Utf8 => LanceColumn::Utf8(Vec::new()),
LanceDataType::Boolean => LanceColumn::Boolean(Vec::new()),
}
}