use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SemanticType {
NumericArray {
dtype: NumericDType,
length: Option<usize>,
},
TimeSeries {
timestamp_field: String,
value_fields: SmallVec<[String; 4]>,
interval_ms: Option<u64>,
},
Table {
columns: Box<SmallVec<[ColumnMeta; 16]>>,
row_count: Option<usize>,
},
Graph {
node_type: String,
edge_type: String,
node_count: Option<usize>,
},
Geospatial {
coordinate_system: String,
geometry_type: String,
},
Matrix {
dimensions: SmallVec<[usize; 4]>,
dtype: NumericDType,
},
Generic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum NumericDType {
F64,
F32,
I64,
I32,
I16,
I8,
U64,
U32,
U16,
U8,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ColumnMeta {
pub name: String,
pub dtype: ColumnType,
pub nullable: bool,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum ColumnType {
Numeric(NumericDType),
String,
Boolean,
Timestamp,
Json,
Array(Box<ColumnType>),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SemanticMeta {
pub semantic_type: SemanticType,
pub secondary_types: SmallVec<[SemanticType; 2]>,
pub hints: ProcessingHints,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ProcessingHints {
pub prefer_simd: bool,
pub prefer_gpu: bool,
pub prefer_parallel: bool,
pub access_pattern: AccessPattern,
pub compression_hint: CompressionHint,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum AccessPattern {
Sequential,
Random,
Streaming,
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum CompressionHint {
None,
Fast,
Balanced,
Maximum,
}
impl SemanticType {
pub fn numeric_dtype(&self) -> Option<NumericDType> {
match self {
Self::NumericArray { dtype, .. } => Some(*dtype),
Self::Matrix { dtype, .. } => Some(*dtype),
_ => None,
}
}
pub fn is_simd_friendly(&self) -> bool {
matches!(self, Self::NumericArray { .. } | Self::Matrix { .. })
}
pub fn is_columnar(&self) -> bool {
matches!(self, Self::Table { .. } | Self::TimeSeries { .. })
}
pub fn size_hint(&self) -> Option<usize> {
match self {
Self::NumericArray {
dtype,
length: Some(len),
} => Some(len * dtype.size()),
Self::Table {
row_count: Some(rows),
columns,
} => {
Some(rows * columns.len() * 8) }
Self::Matrix { dimensions, dtype } => {
Some(dimensions.iter().product::<usize>() * dtype.size())
}
_ => None,
}
}
}
impl NumericDType {
pub fn size(self) -> usize {
match self {
Self::F64 | Self::I64 | Self::U64 => 8,
Self::F32 | Self::I32 | Self::U32 => 4,
Self::I16 | Self::U16 => 2,
Self::I8 | Self::U8 => 1,
}
}
pub fn is_float(self) -> bool {
matches!(self, Self::F32 | Self::F64)
}
pub fn is_signed(self) -> bool {
matches!(
self,
Self::I8 | Self::I16 | Self::I32 | Self::I64 | Self::F32 | Self::F64
)
}
}
impl Default for ProcessingHints {
fn default() -> Self {
Self {
prefer_simd: false,
prefer_gpu: false,
prefer_parallel: true,
access_pattern: AccessPattern::Sequential,
compression_hint: CompressionHint::Balanced,
}
}
}
impl SemanticMeta {
pub fn new(semantic_type: SemanticType) -> Self {
Self {
semantic_type,
secondary_types: SmallVec::new(),
hints: ProcessingHints::default(),
}
}
pub fn with_hints(semantic_type: SemanticType, hints: ProcessingHints) -> Self {
Self {
semantic_type,
secondary_types: SmallVec::new(),
hints,
}
}
pub fn with_secondary(mut self, secondary_type: SemanticType) -> Self {
self.secondary_types.push(secondary_type);
self
}
pub fn processing_strategy(&self) -> ProcessingStrategy {
if self.hints.prefer_gpu {
return ProcessingStrategy::Gpu;
}
if self.hints.prefer_simd && self.semantic_type.is_simd_friendly() {
return ProcessingStrategy::Simd;
}
match &self.semantic_type {
SemanticType::NumericArray {
length: Some(len), ..
} if *len > 1000 => ProcessingStrategy::Simd,
SemanticType::Table {
row_count: Some(rows),
..
} if *rows > 10000 => ProcessingStrategy::Columnar,
SemanticType::TimeSeries { .. } => ProcessingStrategy::Streaming,
_ => ProcessingStrategy::Generic,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ProcessingStrategy {
Simd,
Gpu,
Columnar,
Streaming,
Generic,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_semantic_type_creation() {
let numeric_array = SemanticType::NumericArray {
dtype: NumericDType::F64,
length: Some(1000),
};
assert!(numeric_array.is_simd_friendly());
assert_eq!(numeric_array.numeric_dtype(), Some(NumericDType::F64));
assert_eq!(numeric_array.size_hint(), Some(8000)); }
#[test]
fn test_processing_strategy() {
let meta = SemanticMeta::new(SemanticType::NumericArray {
dtype: NumericDType::F32,
length: Some(2000),
});
assert_eq!(meta.processing_strategy(), ProcessingStrategy::Simd);
}
#[test]
fn test_column_meta() {
let column = ColumnMeta {
name: "value".to_string(),
dtype: ColumnType::Numeric(NumericDType::F64),
nullable: false,
};
assert_eq!(column.name, "value");
assert!(!column.nullable);
}
}