anyvalue_dataframe/
lib.rs

1#![doc(html_root_url = "https://docs.rs/anyvalue-dataframe/0.1.2")]
2//! anyvalue dataframe
3//!
4
5use std::error::Error;
6use polars::prelude::{DataFrame, AnyValue, Schema, Field, DataType};
7
8/// from anyvalue and datatype to primitive value
9#[macro_export]
10macro_rules! from_any {
11  ($a: expr, DataType::Int64) => {
12    match $a { AnyValue::Int64(i) => i, _ => 0 }
13  };
14  ($a: expr, DataType::Int32) => {
15    match $a { AnyValue::Int32(i) => i, _ => 0 }
16  };
17  ($a: expr, DataType::Int16) => {
18    match $a { AnyValue::Int16(i) => i, _ => 0 }
19  };
20  ($a: expr, DataType::Int8) => {
21    match $a { AnyValue::Int8(i) => i, _ => 0 }
22  };
23  ($a: expr, DataType::UInt64) => {
24    match $a { AnyValue::UInt64(u) => u, _ => 0 }
25  };
26  ($a: expr, DataType::UInt32) => {
27    match $a { AnyValue::UInt32(u) => u, _ => 0 }
28  };
29  ($a: expr, DataType::UInt16) => {
30    match $a { AnyValue::UInt16(u) => u, _ => 0 }
31  };
32  ($a: expr, DataType::UInt8) => {
33    match $a { AnyValue::UInt8(u) => u, _ => 0 }
34  };
35  ($a: expr, DataType::Float64) => {
36    match $a { AnyValue::Float64(f) => f, _ => 0.0 }
37  };
38  ($a: expr, DataType::Float32) => {
39    match $a { AnyValue::Float32(f) => f, _ => 0.0 }
40  };
41  ($a: expr, DataType::Utf8) => { // polars 0.25.1
42    match $a { AnyValue::Utf8(s) => s, _ => "" }
43  };
44  ($a: expr, DataType::String) => { // polars latest
45    match $a { AnyValue::String(s) => s, _ => "".to_string() }
46  };
47  ($a: expr, DataType::Boolean) => {
48    match $a { AnyValue::Boolean(b) => b, _ => false }
49  };
50  ($a: expr, DataType::BinaryOwned) => { // must match with reference
51    match &$a {
52    AnyValue::BinaryOwned(u) => u.clone(),
53    AnyValue::Binary(u) => u.to_vec(),
54    _ => vec![]
55    }
56  };
57  ($a: expr, DataType::Binary) => { // must match with reference
58    match &$a {
59    AnyValue::Binary(u) => u.to_vec(),
60    AnyValue::BinaryOwned(u) => u.clone(),
61    _ => vec![]
62    }
63  };
64  ($a: expr, DataType::Null) => { 0i64 }; // or None must check later
65  ($a: expr, DataType::Unknown) => { 0i64 }; // or None must check later
66  ($a: expr, DataType:: $t: ident) => { 0i64 } // or None must check later
67}
68// pub from_any;
69
70/// to anyvalue from primitive value and datatype
71/// let a = to_any!(3, DataType::UInt64);
72/// let b = to_any!("X", DataType::Utf8);
73#[macro_export]
74macro_rules! to_any {
75  ($v: expr, DataType::Null) => { AnyValue::Null };
76  // Date: feature dtype-date
77  // Time: feature dtype-date
78  // DataType:: DateTime, Duration, Categorical, List, Object, Struct
79  //   feature dtype-datetime -duration -categorical -array
80  // AnyValue:: Enum, Array, Decimal, xxxOwned, etc
81  ($v: expr, DataType:: $t: ident) => { AnyValue::$t($v) }
82}
83// pub to_any;
84
85/// row schema from vec AnyValue (column names are column_0, column_1, ...)
86/// - let schema = Schema::from(&row);
87pub fn row_schema(row: Vec<AnyValue<'_>>) -> polars::frame::row::Row {
88  polars::frame::row::Row::new(row)
89}
90
91/// row fields from vec (&amp;str, DataType) (set with column names)
92/// - let schema = Schema::from_iter(fields);
93pub fn row_fields(desc: Vec<(&str, DataType)>) -> Vec<Field> {
94  desc.into_iter().map(|(s, t)| Field::new(s, t)).collect()
95}
96
97/// named fields from DataFrame
98pub fn named_fields(df: &DataFrame, n: Vec<&str>) -> Vec<Field> {
99  let t = df.dtypes();
100  row_fields(n.into_iter().enumerate().map(|(i, s)|
101    (s, t[i].clone())).collect())
102}
103
104/// named schema from DataFrame
105/// - same as df.schema() after column names are set by df.set_column_names()
106/// - otherwise df.schema() returns names as column_0, column_1, ...
107pub fn named_schema(df: &DataFrame, n: Vec<&str>) -> Schema {
108  Schema::from_iter(named_fields(&df, n))
109}
110
111/// DataFrame from Vec&lt;polars::frame::row::Row&gt; and field names
112pub fn df_from_vec(rows: &Vec<polars::frame::row::Row>, n: &Vec<&str>) ->
113  Result<DataFrame, Box<dyn Error>> {
114  let schema = Schema::from(&rows[0]);
115  let mut df = DataFrame::from_rows_iter_and_schema(rows.iter(), &schema)?;
116  df.set_column_names(&n)?;
117  Ok(df)
118}
119
120/// tests
121#[cfg(test)]
122mod tests {
123  use super::*;
124
125  /// [-- --nocapture] [-- --show-output]
126  #[test]
127  fn test_dataframe() {
128    let a = to_any!(3, DataType::UInt64);
129    assert_eq!(a, AnyValue::UInt64(3));
130    assert_eq!(a.dtype(), DataType::UInt64);
131    let b = to_any!("A", DataType::Utf8);
132    assert_eq!(b, AnyValue::Utf8("A"));
133    assert_eq!(b.dtype(), DataType::Utf8);
134    let c = to_any!(4, DataType::Int8);
135    assert_eq!(c, AnyValue::Int8(4));
136    assert_eq!(c.dtype(), DataType::Int8);
137    let d = to_any!(1.5, DataType::Float64);
138    assert_eq!(d, AnyValue::Float64(1.5));
139    assert_eq!(d.dtype(), DataType::Float64);
140    let e = to_any!(true, DataType::Boolean);
141    assert_eq!(e, AnyValue::Boolean(true));
142    assert_eq!(e.dtype(), DataType::Boolean);
143    let f = to_any!(&[255, 0], DataType::Binary);
144    assert_eq!(f, AnyValue::Binary(&[255, 0]));
145    assert_eq!(f.dtype(), DataType::Binary);
146  }
147}