Skip to main content

zelos_trace_types/
data_type.rs

1#[cfg(feature = "duckdb")]
2use anyhow::{Result, anyhow};
3#[cfg(feature = "duckdb")]
4use base64::prelude::*;
5#[cfg(feature = "duckdb")]
6use chrono::DateTime;
7#[cfg(feature = "datafusion")]
8use datafusion::arrow::datatypes::TimeUnit as ArrowTimeUnit;
9#[cfg(feature = "datafusion")]
10use datafusion::common::arrow::datatypes::DataType as ArrowDataType;
11#[cfg(feature = "duckdb")]
12use duckdb::ToSql;
13use serde_enum_str::{Deserialize_enum_str, Serialize_enum_str};
14#[cfg(feature = "duckdb")]
15use serde_json::Value;
16#[cfg(feature = "ts-rs")]
17use ts_rs::TS;
18
19// Hash is required for the Python bindings
20#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize_enum_str, Serialize_enum_str)]
21#[serde(rename_all = "lowercase")]
22#[cfg_attr(feature = "ts-rs", derive(TS))]
23pub enum DataType {
24    Int8,
25    Int16,
26    Int32,
27    Int64,
28    UInt8,
29    UInt16,
30    UInt32,
31    UInt64,
32    #[serde(alias = "float")]
33    Float32,
34    #[serde(alias = "double")]
35    Float64,
36    #[serde(rename = "timestamp[ns]")]
37    TimestampNs,
38    /// Binary, as base64-encoded string
39    Binary,
40    String,
41    #[serde(rename = "bool")]
42    Boolean,
43}
44
45impl DataType {
46    pub fn is_numeric(&self) -> bool {
47        match self {
48            DataType::Int8 => true,
49            DataType::Int16 => true,
50            DataType::Int32 => true,
51            DataType::Int64 => true,
52            DataType::UInt8 => true,
53            DataType::UInt16 => true,
54            DataType::UInt32 => true,
55            DataType::UInt64 => true,
56            DataType::Float32 => true,
57            DataType::Float64 => true,
58            DataType::TimestampNs => false,
59            DataType::Binary => false,
60            DataType::String => false,
61            DataType::Boolean => true,
62        }
63    }
64}
65
66#[cfg(feature = "datafusion")]
67impl DataType {
68    pub fn as_arrow(&self) -> ArrowDataType {
69        match self {
70            DataType::Int8 => ArrowDataType::Int8,
71            DataType::Int16 => ArrowDataType::Int16,
72            DataType::Int32 => ArrowDataType::Int32,
73            DataType::Int64 => ArrowDataType::Int64,
74            DataType::UInt8 => ArrowDataType::UInt8,
75            DataType::UInt16 => ArrowDataType::UInt16,
76            DataType::UInt32 => ArrowDataType::UInt32,
77            DataType::UInt64 => ArrowDataType::UInt64,
78            DataType::Float32 => ArrowDataType::Float32,
79            DataType::Float64 => ArrowDataType::Float64,
80            DataType::TimestampNs => {
81                ArrowDataType::Timestamp(ArrowTimeUnit::Nanosecond, Some("UTC".into()))
82            }
83            DataType::Binary => ArrowDataType::Binary,
84            DataType::String => ArrowDataType::Utf8,
85            DataType::Boolean => ArrowDataType::Boolean,
86        }
87    }
88}
89
90#[cfg(feature = "duckdb")]
91impl DataType {
92    pub fn from_duckdb_type(value: &String) -> Result<DataType> {
93        match value.as_str() {
94            "TINYINT" => Ok(DataType::Int8),
95            "SMALLINT" => Ok(DataType::Int16),
96            "INTEGER" => Ok(DataType::Int32),
97            "BIGINT" => Ok(DataType::Int64),
98            "UTINYINT" => Ok(DataType::UInt8),
99            "USMALLINT" => Ok(DataType::UInt16),
100            "UINTEGER" => Ok(DataType::UInt32),
101            "UBIGINT" => Ok(DataType::UInt64),
102            "FLOAT" => Ok(DataType::Float32),
103            "DOUBLE" => Ok(DataType::Float64),
104            "TIMESTAMP_NS" => Ok(DataType::TimestampNs),
105            "BLOB" => Ok(DataType::Binary),
106            "VARCHAR" => Ok(DataType::String),
107            "BOOLEAN" => Ok(DataType::Boolean),
108            _ => Err(anyhow!("Could not convert type")),
109        }
110    }
111
112    pub fn to_sql(&self, value: &Value) -> Result<Box<dyn ToSql>> {
113        match value {
114            Value::String(s) => match self {
115                DataType::Binary => {
116                    let decoded = BASE64_STANDARD.decode(s)?;
117                    Ok(Box::new(decoded))
118                }
119                _ => Ok(Box::new(s.clone())),
120            },
121            Value::Number(n) => match self {
122                DataType::TimestampNs => {
123                    let ts = n.as_i64().ok_or(anyhow!("Could not convert time to i64"))?;
124                    Ok(Box::new(DateTime::from_timestamp_nanos(ts)))
125                }
126                _ => Ok(Box::new(format!("{}", n))),
127            },
128            Value::Null => Ok(Box::new(duckdb::types::Value::Null)),
129            x => Ok(Box::new(format!("{}", x))),
130        }
131    }
132
133    pub fn to_duckdb_type(&self) -> &'static str {
134        match self {
135            DataType::Int8 => &"TINYINT",
136            DataType::Int16 => &"SMALLINT",
137            DataType::Int32 => &"INTEGER",
138            DataType::Int64 => &"BIGINT",
139            DataType::UInt8 => &"UTINYINT",
140            DataType::UInt16 => &"USMALLINT",
141            DataType::UInt32 => &"UINTEGER",
142            DataType::UInt64 => &"UBIGINT",
143            DataType::Float32 => &"FLOAT",
144            DataType::Float64 => &"DOUBLE",
145            DataType::TimestampNs => &"TIMESTAMP_NS",
146            DataType::Binary => &"BLOB",
147            DataType::String => &"VARCHAR",
148            DataType::Boolean => &"BOOLEAN",
149        }
150    }
151}