1use serde::{Deserialize, Serialize};
4use smallvec::SmallVec;
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum SemanticType {
9 NumericArray {
11 dtype: NumericDType,
13 length: Option<usize>,
15 },
16
17 TimeSeries {
19 timestamp_field: String,
21 value_fields: SmallVec<[String; 4]>,
23 interval_ms: Option<u64>,
25 },
26
27 Table {
29 columns: Box<SmallVec<[ColumnMeta; 16]>>,
31 row_count: Option<usize>,
33 },
34
35 Graph {
37 node_type: String,
39 edge_type: String,
41 node_count: Option<usize>,
43 },
44
45 Geospatial {
47 coordinate_system: String,
49 geometry_type: String,
51 },
52
53 Matrix {
55 dimensions: SmallVec<[usize; 4]>,
57 dtype: NumericDType,
59 },
60
61 Generic,
63}
64
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
67pub enum NumericDType {
68 F64,
70 F32,
72 I64,
74 I32,
76 I16,
78 I8,
80 U64,
82 U32,
84 U16,
86 U8,
88}
89
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
92pub struct ColumnMeta {
93 pub name: String,
95 pub dtype: ColumnType,
97 pub nullable: bool,
99}
100
101#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
103pub enum ColumnType {
104 Numeric(NumericDType),
106 String,
108 Boolean,
110 Timestamp,
112 Json,
114 Array(Box<ColumnType>),
116}
117
118#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
120pub struct SemanticMeta {
121 pub semantic_type: SemanticType,
123 pub secondary_types: SmallVec<[SemanticType; 2]>,
125 pub hints: ProcessingHints,
127}
128
129#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
131pub struct ProcessingHints {
132 pub prefer_simd: bool,
134 pub prefer_gpu: bool,
136 pub prefer_parallel: bool,
138 pub access_pattern: AccessPattern,
140 pub compression_hint: CompressionHint,
142}
143
144#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
146pub enum AccessPattern {
147 Sequential,
149 Random,
151 Streaming,
153}
154
155#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
157pub enum CompressionHint {
158 None,
160 Fast,
162 Balanced,
164 Maximum,
166}
167
168impl SemanticType {
169 pub fn numeric_dtype(&self) -> Option<NumericDType> {
171 match self {
172 Self::NumericArray { dtype, .. } => Some(*dtype),
173 Self::Matrix { dtype, .. } => Some(*dtype),
174 _ => None,
175 }
176 }
177
178 pub fn is_simd_friendly(&self) -> bool {
180 matches!(self, Self::NumericArray { .. } | Self::Matrix { .. })
181 }
182
183 pub fn is_columnar(&self) -> bool {
185 matches!(self, Self::Table { .. } | Self::TimeSeries { .. })
186 }
187
188 pub fn size_hint(&self) -> Option<usize> {
190 match self {
191 Self::NumericArray {
192 dtype,
193 length: Some(len),
194 } => Some(len * dtype.size()),
195 Self::Table {
196 row_count: Some(rows),
197 columns,
198 } => {
199 Some(rows * columns.len() * 8) }
201 Self::Matrix { dimensions, dtype } => {
202 Some(dimensions.iter().product::<usize>() * dtype.size())
203 }
204 _ => None,
205 }
206 }
207}
208
209impl NumericDType {
210 pub fn size(self) -> usize {
212 match self {
213 Self::F64 | Self::I64 | Self::U64 => 8,
214 Self::F32 | Self::I32 | Self::U32 => 4,
215 Self::I16 | Self::U16 => 2,
216 Self::I8 | Self::U8 => 1,
217 }
218 }
219
220 pub fn is_float(self) -> bool {
222 matches!(self, Self::F32 | Self::F64)
223 }
224
225 pub fn is_signed(self) -> bool {
227 matches!(
228 self,
229 Self::I8 | Self::I16 | Self::I32 | Self::I64 | Self::F32 | Self::F64
230 )
231 }
232}
233
234impl Default for ProcessingHints {
235 fn default() -> Self {
236 Self {
237 prefer_simd: false,
238 prefer_gpu: false,
239 prefer_parallel: true,
240 access_pattern: AccessPattern::Sequential,
241 compression_hint: CompressionHint::Balanced,
242 }
243 }
244}
245
246impl SemanticMeta {
247 pub fn new(semantic_type: SemanticType) -> Self {
249 Self {
250 semantic_type,
251 secondary_types: SmallVec::new(),
252 hints: ProcessingHints::default(),
253 }
254 }
255
256 pub fn with_hints(semantic_type: SemanticType, hints: ProcessingHints) -> Self {
258 Self {
259 semantic_type,
260 secondary_types: SmallVec::new(),
261 hints,
262 }
263 }
264
265 pub fn with_secondary(mut self, secondary_type: SemanticType) -> Self {
267 self.secondary_types.push(secondary_type);
268 self
269 }
270
271 pub fn processing_strategy(&self) -> ProcessingStrategy {
273 if self.hints.prefer_gpu {
275 return ProcessingStrategy::Gpu;
276 }
277
278 if self.hints.prefer_simd && self.semantic_type.is_simd_friendly() {
279 return ProcessingStrategy::Simd;
280 }
281
282 match &self.semantic_type {
284 SemanticType::NumericArray {
285 length: Some(len), ..
286 } if *len > 1000 => ProcessingStrategy::Simd,
287 SemanticType::Table {
288 row_count: Some(rows),
289 ..
290 } if *rows > 10000 => ProcessingStrategy::Columnar,
291 SemanticType::TimeSeries { .. } => ProcessingStrategy::Streaming,
292 _ => ProcessingStrategy::Generic,
293 }
294 }
295}
296
297#[derive(Debug, Clone, Copy, PartialEq)]
299pub enum ProcessingStrategy {
300 Simd,
302 Gpu,
304 Columnar,
306 Streaming,
308 Generic,
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 #[test]
317 fn test_semantic_type_creation() {
318 let numeric_array = SemanticType::NumericArray {
319 dtype: NumericDType::F64,
320 length: Some(1000),
321 };
322
323 assert!(numeric_array.is_simd_friendly());
324 assert_eq!(numeric_array.numeric_dtype(), Some(NumericDType::F64));
325 assert_eq!(numeric_array.size_hint(), Some(8000)); }
327
328 #[test]
329 fn test_processing_strategy() {
330 let meta = SemanticMeta::new(SemanticType::NumericArray {
331 dtype: NumericDType::F32,
332 length: Some(2000),
333 });
334
335 assert_eq!(meta.processing_strategy(), ProcessingStrategy::Simd);
336 }
337
338 #[test]
339 fn test_column_meta() {
340 let column = ColumnMeta {
341 name: "value".to_string(),
342 dtype: ColumnType::Numeric(NumericDType::F64),
343 nullable: false,
344 };
345
346 assert_eq!(column.name, "value");
347 assert!(!column.nullable);
348 }
349}