1use std::collections::HashMap;
2
3use super::{DataFrame, Value};
4
5pub trait GGData {
7 fn into_dataframe(self) -> DataFrame;
8}
9
10#[cfg(feature = "polars")]
12fn polars_anyvalue_to_value(v: polars::datatypes::AnyValue) -> Value {
13 use polars::datatypes::AnyValue;
14 match v {
15 AnyValue::Float64(f) => Value::Float(f),
16 AnyValue::Float32(f) => Value::Float(f as f64),
17 AnyValue::Int64(i) => Value::Integer(i),
18 AnyValue::Int32(i) => Value::Integer(i as i64),
19 AnyValue::Int16(i) => Value::Integer(i as i64),
20 AnyValue::Int8(i) => Value::Integer(i as i64),
21 AnyValue::UInt64(i) => Value::Integer(i as i64),
22 AnyValue::UInt32(i) => Value::Integer(i as i64),
23 AnyValue::UInt16(i) => Value::Integer(i as i64),
24 AnyValue::UInt8(i) => Value::Integer(i as i64),
25 AnyValue::Boolean(b) => Value::Bool(b),
26 AnyValue::String(s) => Value::Str(s.to_string()),
27 AnyValue::StringOwned(s) => Value::Str(s.to_string()),
28 AnyValue::Null => Value::Na,
29 AnyValue::Date(d) => Value::DateTime(d as i64 * 86400),
30 AnyValue::Datetime(us, _, _) => Value::DateTime(us / 1_000_000),
31 AnyValue::Duration(us, _) => Value::Integer(us),
32 AnyValue::Time(ns) => Value::Integer(ns / 1_000_000_000),
33 other => Value::Str(format!("{:?}", other)),
34 }
35}
36
37#[cfg(feature = "polars")]
39impl GGData for polars::frame::DataFrame {
40 fn into_dataframe(self) -> DataFrame {
41 let mut df = DataFrame::new();
42 for col in self.get_columns() {
43 let name = col.name().to_string();
44 let values: Vec<Value> = (0..col.len())
45 .map(|i| polars_anyvalue_to_value(col.get(i).unwrap()))
46 .collect();
47 df.add_column(name, values);
48 }
49 df
50 }
51}
52
53#[cfg(feature = "arrow")]
58fn arrow_array_to_values(array: &dyn arrow::array::Array) -> Vec<Value> {
59 use arrow::array::{
60 Array, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
61 Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray,
62 TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
63 TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
64 };
65 use arrow::datatypes::{DataType, TimeUnit};
66
67 let n = array.len();
68 macro_rules! map_col {
70 ($ty:ty, $f:expr) => {{
71 let a = array.as_any().downcast_ref::<$ty>().unwrap();
72 (0..n)
73 .map(|i| {
74 if a.is_null(i) {
75 Value::Na
76 } else {
77 $f(a.value(i))
78 }
79 })
80 .collect()
81 }};
82 }
83
84 match array.data_type() {
85 DataType::Float64 => map_col!(Float64Array, Value::Float),
86 DataType::Float32 => map_col!(Float32Array, |v: f32| Value::Float(v as f64)),
87 DataType::Int64 => map_col!(Int64Array, Value::Integer),
88 DataType::Int32 => map_col!(Int32Array, |v: i32| Value::Integer(v as i64)),
89 DataType::Int16 => map_col!(Int16Array, |v: i16| Value::Integer(v as i64)),
90 DataType::Int8 => map_col!(Int8Array, |v: i8| Value::Integer(v as i64)),
91 DataType::UInt64 => map_col!(UInt64Array, |v: u64| Value::Integer(v as i64)),
92 DataType::UInt32 => map_col!(UInt32Array, |v: u32| Value::Integer(v as i64)),
93 DataType::UInt16 => map_col!(UInt16Array, |v: u16| Value::Integer(v as i64)),
94 DataType::UInt8 => map_col!(UInt8Array, |v: u8| Value::Integer(v as i64)),
95 DataType::Boolean => map_col!(BooleanArray, Value::Bool),
96 DataType::Utf8 => map_col!(StringArray, |v: &str| Value::Str(v.to_string())),
97 DataType::LargeUtf8 => map_col!(LargeStringArray, |v: &str| Value::Str(v.to_string())),
98 DataType::Date32 => map_col!(Date32Array, |v: i32| Value::DateTime(v as i64 * 86_400)),
100 DataType::Date64 => map_col!(Date64Array, |v: i64| Value::DateTime(v / 1_000)),
101 DataType::Timestamp(unit, _) => match unit {
102 TimeUnit::Second => map_col!(TimestampSecondArray, Value::DateTime),
103 TimeUnit::Millisecond => {
104 map_col!(TimestampMillisecondArray, |v: i64| Value::DateTime(
105 v / 1_000
106 ))
107 }
108 TimeUnit::Microsecond => {
109 map_col!(TimestampMicrosecondArray, |v: i64| Value::DateTime(
110 v / 1_000_000
111 ))
112 }
113 TimeUnit::Nanosecond => {
114 map_col!(TimestampNanosecondArray, |v: i64| Value::DateTime(
115 v / 1_000_000_000
116 ))
117 }
118 },
119 _ => (0..n)
121 .map(|i| {
122 if array.is_null(i) {
123 Value::Na
124 } else {
125 Value::Str(
126 arrow::util::display::array_value_to_string(array, i).unwrap_or_default(),
127 )
128 }
129 })
130 .collect(),
131 }
132}
133
134#[cfg(feature = "arrow")]
139impl GGData for arrow::record_batch::RecordBatch {
140 fn into_dataframe(self) -> DataFrame {
141 let mut df = DataFrame::new();
142 let schema = self.schema();
143 for (i, field) in schema.fields().iter().enumerate() {
144 let values = arrow_array_to_values(self.column(i).as_ref());
145 df.add_column(field.name().to_string(), values);
146 }
147 df
148 }
149}
150
151impl GGData for Vec<HashMap<String, Value>> {
153 fn into_dataframe(self) -> DataFrame {
154 if self.is_empty() {
155 return DataFrame::new();
156 }
157
158 let mut col_names: Vec<String> = Vec::new();
160 for row in &self {
161 for key in row.keys() {
162 if !col_names.contains(key) {
163 col_names.push(key.clone());
164 }
165 }
166 }
167
168 let mut df = DataFrame::new();
169 for name in &col_names {
170 let values: Vec<Value> = self
171 .iter()
172 .map(|row| row.get(name).cloned().unwrap_or(Value::Na))
173 .collect();
174 df.add_column(name.clone(), values);
175 }
176 df
177 }
178}
179
180impl GGData for Vec<(String, Vec<Value>)> {
182 fn into_dataframe(self) -> DataFrame {
183 let mut df = DataFrame::new();
184 for (name, values) in self {
185 df.add_column(name, values);
186 }
187 df
188 }
189}
190
191impl GGData for DataFrame {
193 fn into_dataframe(self) -> DataFrame {
194 self
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 #[test]
203 fn test_from_hashmap_vec() {
204 let data = vec![
205 HashMap::from([
206 ("x".to_string(), Value::Float(1.0)),
207 ("y".to_string(), Value::Float(2.0)),
208 ]),
209 HashMap::from([
210 ("x".to_string(), Value::Float(3.0)),
211 ("y".to_string(), Value::Float(4.0)),
212 ]),
213 ];
214
215 let df = data.into_dataframe();
216 assert_eq!(df.nrows(), 2);
217 assert!(df.has_column("x"));
218 assert!(df.has_column("y"));
219 }
220
221 #[cfg(feature = "arrow")]
222 #[test]
223 fn test_from_arrow_record_batch() {
224 use arrow::array::{Float64Array, Int64Array, StringArray};
225 use arrow::record_batch::RecordBatch;
226 use std::sync::Arc;
227
228 let batch = RecordBatch::try_from_iter(vec![
229 (
230 "x",
231 Arc::new(Float64Array::from(vec![Some(1.0), None, Some(3.0)])) as _,
232 ),
233 ("n", Arc::new(Int64Array::from(vec![10, 20, 30])) as _),
234 ("g", Arc::new(StringArray::from(vec!["a", "b", "c"])) as _),
235 ])
236 .unwrap();
237
238 let df = batch.into_dataframe();
239 assert_eq!(df.nrows(), 3);
240 assert_eq!(df.ncols(), 3);
241 assert!(df.has_column("x"));
242 assert!(df.has_column("n"));
243 assert!(df.has_column("g"));
244 assert_eq!(df.column("x").unwrap()[1], Value::Na);
246 assert_eq!(df.column("n").unwrap()[0], Value::Integer(10));
247 assert_eq!(df.column("g").unwrap()[2], Value::Str("c".to_string()));
248 }
249
250 #[test]
251 fn test_from_column_oriented() {
252 let data = vec![
253 ("x".to_string(), vec![Value::Float(1.0), Value::Float(2.0)]),
254 ("y".to_string(), vec![Value::Float(3.0), Value::Float(4.0)]),
255 ];
256
257 let df = data.into_dataframe();
258 assert_eq!(df.nrows(), 2);
259 assert_eq!(df.ncols(), 2);
260 }
261}