trs_dataframe/
dataframe.rs

1use column_store::sorted_df::SortedDataFrame;
2use data_value::{DataValue, Extract};
3use halfbrown::HashMap;
4use ndarray::{Array1, Array2, ArrayView1};
5use std::fmt;
6pub mod column_store;
7pub mod index;
8pub mod join;
9pub mod key;
10use crate::{error::Error, CandidateData};
11#[cfg(feature = "python")]
12pub mod python;
13
14#[cfg(feature = "python")]
15use pyo3::prelude::*;
16
17use crate::{
18    dataframe::{column_store::ColumnFrame, join::JoinRelation, key::Key},
19    MLChefMap,
20};
21
22#[derive(Debug, Clone, PartialEq, Eq, Copy)]
23pub enum TopN {
24    First(usize),
25    Last(usize),
26}
27
28/// DataFrame holds information about [`ColumnFrame`].
29/// This is used to store the data and the metadata for the candidates.
30#[derive(Debug, Clone, PartialEq, Default, serde::Serialize, serde::Deserialize)]
31#[cfg_attr(feature = "python", pyclass)]
32pub struct DataFrame {
33    /// Constants for the dataframe - mikro optimization for the data
34    /// Values which is constant for the whole dataframe are stored here
35    pub constants: HashMap<Key, DataValue>,
36    /// Dataframe with the candidates
37    //pub dataframe: Candidates<CandidateItem>,
38    pub dataframe: ColumnFrame,
39    /// Metadata for the dataframe. Here you can store the information about the dataframe
40    pub metadata: HashMap<String, DataValue>,
41}
42
43impl fmt::Display for DataFrame {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        self.dataframe.fmt(f)
46    }
47}
48
49impl DataFrame {
50    pub fn new<C: Into<ColumnFrame>>(dataframe: C) -> Self {
51        Self {
52            constants: HashMap::new(),
53            dataframe: dataframe.into(),
54            metadata: HashMap::new(),
55        }
56    }
57
58    pub fn shrink(&mut self) {
59        self.dataframe.shrink();
60    }
61
62    pub fn add_metadata(&mut self, key: String, value: DataValue) {
63        self.metadata.insert(key, value);
64    }
65
66    pub fn get_metadata(&self, key: &str) -> Option<&DataValue> {
67        self.metadata.get(key)
68    }
69
70    pub fn join(&mut self, other: Self, join_type: &JoinRelation) -> Result<(), Error> {
71        other.constants.into_iter().for_each(|(key, value)| {
72            self.constants.insert(key, value);
73        });
74        self.dataframe.join(other.dataframe, join_type)
75    }
76
77    pub fn apply_function<F>(&mut self, keys: &[Key], mut func: F) -> Result<(), Error>
78    where
79        F: FnMut(&[Key], &mut ColumnFrame) -> Result<(), Error>,
80    {
81        self.dataframe.apply_function(keys, &mut func)
82    }
83
84    pub fn select(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
85        Ok(self.dataframe.select(keys))
86    }
87
88    pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>> {
89        self.dataframe.select_transposed_typed::<D>(keys)
90    }
91
92    pub fn select_column(&self, key: Key) -> Option<ndarray::ArrayView1<DataValue>> {
93        self.dataframe.select_column(&key)
94    }
95
96    pub fn select_transposed(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
97        self.dataframe.select_transposed(keys)
98    }
99
100    pub fn insert_constant(&mut self, key: Key, value: DataValue) {
101        self.constants.insert(key, value);
102    }
103
104    pub fn push<C: CandidateData>(&mut self, item: C) -> Result<(), Error> {
105        self.dataframe.push(item)
106    }
107
108    pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error> {
109        self.dataframe.remove_column(keys).map(|x| x.into())
110    }
111
112    pub fn extend(&mut self, items: Self) -> Result<(), Error> {
113        self.dataframe.extend(items.dataframe)
114    }
115
116    pub fn len(&self) -> usize {
117        self.dataframe.len()
118    }
119
120    pub fn is_empty(&self) -> bool {
121        self.dataframe.is_empty()
122    }
123
124    pub fn add_single_column<K: Into<Key>>(
125        &mut self,
126        key: K,
127        values: Array1<DataValue>,
128    ) -> Result<(), Error> {
129        self.dataframe.add_single_column(key, values)
130    }
131
132    pub fn get_single_column(&self, key: &Key) -> Option<ArrayView1<DataValue>> {
133        self.dataframe.get_single_column(key)
134    }
135
136    pub fn sorted(&self, key: &Key) -> Result<SortedDataFrame<'_>, Error> {
137        self.dataframe.sorted(key)
138    }
139
140    pub fn filter(&self, filter: &crate::filter::FilterRules) -> Result<Self, Error> {
141        let filtered_df = self.dataframe.filter(filter)?;
142        Ok(Self {
143            constants: self.constants.clone(),
144            dataframe: filtered_df,
145            metadata: self.metadata.clone(),
146        })
147    }
148
149    #[cfg(feature = "polars-df")]
150    pub fn as_polars(&self) -> Result<polars::prelude::DataFrame, Error> {
151        let mut columns = vec![];
152        for key in self.dataframe.keys() {
153            columns.push(polars::prelude::Column::new(
154                key.name().into(),
155                self.dataframe
156                    .get_single_column(key)
157                    .ok_or_else(|| Error::NotFound(key.clone()))?
158                    .into_iter()
159                    .map(|x| into_polars_value(x.clone()))
160                    .collect::<Vec<_>>(),
161            ));
162        }
163
164        Ok(polars::prelude::DataFrame::new(columns)?)
165    }
166}
167
168#[cfg(feature = "polars-df")]
169pub fn into_polars_value(dv: DataValue) -> polars::prelude::AnyValue<'static> {
170    use polars::prelude::AnyValue::*;
171    use polars::prelude::NamedFrom;
172    match dv {
173        DataValue::String(smart_string) => StringOwned(smart_string.as_str().into()),
174        DataValue::Bytes(items) => BinaryOwned(items),
175        DataValue::U8(x) => UInt16(x as _),
176        DataValue::Bool(x) => Boolean(x),
177        DataValue::I32(x) => Int32(x),
178        DataValue::U32(x) => UInt32(x),
179        DataValue::I64(x) => Int64(x),
180        DataValue::U64(x) => UInt64(x),
181        DataValue::I128(x) => Int128(x),
182        DataValue::F32(x) => Float32(x),
183        DataValue::F64(x) => Float64(x),
184        DataValue::Null => Null,
185        DataValue::Vec(data_values) => List(polars::series::Series::new(
186            "v".into(),
187            data_values
188                .into_iter()
189                .map(into_polars_value)
190                .collect::<Vec<_>>(),
191        )),
192        DataValue::EnumNumber(_) => unimplemented!(),
193        DataValue::U128(_) => unimplemented!(),
194        DataValue::Map(_) => unimplemented!(),
195    }
196}
197
198#[cfg(feature = "polars-df")]
199pub fn from_polars_value(dv: polars::prelude::AnyValue<'_>) -> DataValue {
200    use polars::prelude::AnyValue::*;
201    match dv {
202        Null => DataValue::Null,
203        Boolean(v) => v.into(),
204        String(v) => DataValue::String(v.into()),
205        UInt8(v) => DataValue::U8(v),
206        UInt16(v) => DataValue::U32(v as u32),
207        UInt32(v) => v.into(),
208        UInt64(v) => v.into(),
209        Int8(v) => (v as i32).into(),
210        Int16(v) => (v as i32).into(),
211        Int32(v) => v.into(),
212        Int64(v) => v.into(),
213        Float32(v) => v.into(),
214        Float64(v) => v.into(),
215        Int128(v) => v.into(),
216        List(series) => DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>()),
217        // Array(series, _) => {
218        //     DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>())
219        // }
220        StringOwned(v) => DataValue::String(v.as_str().into()),
221        Binary(v) => DataValue::Bytes(v.to_owned()),
222        BinaryOwned(v) => DataValue::Bytes(v),
223        e => {
224            tracing::warn!("Unsupported polars value: {e:?}");
225            DataValue::Null
226        }
227    }
228}
229
230impl From<ColumnFrame> for DataFrame {
231    fn from(dataframe: ColumnFrame) -> Self {
232        Self::new(dataframe)
233    }
234}
235
236impl From<Vec<std::collections::HashMap<Key, DataValue>>> for DataFrame {
237    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
238        Self::new(ColumnFrame::from(dataframe))
239    }
240}
241
242impl From<Vec<HashMap<Key, DataValue>>> for DataFrame {
243    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
244        Self::new(ColumnFrame::from(dataframe))
245    }
246}
247
248impl From<std::collections::HashMap<String, Vec<DataValue>>> for DataFrame {
249    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
250        Self::new(ColumnFrame::from(dataframe))
251    }
252}
253
254impl From<MLChefMap> for DataFrame {
255    fn from(dataframe: MLChefMap) -> Self {
256        Self::new(ColumnFrame::from(dataframe))
257    }
258}
259impl From<Vec<(Key, Vec<DataValue>)>> for DataFrame {
260    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
261        Self::new(ColumnFrame::from(dataframe))
262    }
263}
264
265impl From<std::collections::HashMap<String, Array1<DataValue>>> for DataFrame {
266    fn from(dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
267        Self::new(ColumnFrame::from(dataframe))
268    }
269}
270
271#[cfg(feature = "polars-df")]
272impl From<polars::prelude::DataFrame> for DataFrame {
273    fn from(dataframe: polars::prelude::DataFrame) -> Self {
274        Self::new(ColumnFrame::from(dataframe))
275    }
276}
277#[cfg(test)]
278mod test {
279    use crate::filter::FilterRules;
280
281    use super::*;
282    use halfbrown::hashmap;
283    #[cfg(feature = "polars-df")]
284    use polars::prelude::NamedFrom as _;
285    use rstest::*;
286    use tracing_test::traced_test;
287    #[fixture]
288    fn dummy_candidates() -> ColumnFrame {
289        ColumnFrame::from(vec![
290            hashmap! {
291                "key1".into() => 1.into(),
292                "key2".into() => "a".into(),
293            },
294            hashmap! {
295                "key1".into() => 2.into(),
296                "key2".into() => "b".into(),
297            },
298        ])
299    }
300
301    #[rstest]
302    fn test_serde() {
303        let df = crate::df! {
304            "a" => [1u64, 2u64, 3u64],
305            "b" => [4u64, 5u64, 6u64],
306            "c" => [7u64, 8u64, 9u64]
307        };
308
309        let serialized = serde_json::to_string(&df).expect("BUG: Unable to serialize dataframe");
310
311        let deserialized =
312            serde_json::from_str(&serialized).expect("BUG: Unable to deserialize dataframe");
313
314        assert_eq!(df, deserialized);
315    }
316
317    #[cfg(feature = "polars-df")]
318    #[rstest]
319    fn test_polars() {
320        let expected = crate::df! {
321            "a" => [1u64, 2u64, 3u64],
322            "b" => [4f64, 5f64, 6f64],
323            "c" => [7i64, 8i64, 9i64]
324        };
325
326        let polars_df = polars::df!(
327            "a" => [1u64, 2u64, 3u64],
328            "b" => [4f64, 5f64, 6f64],
329            "c" => [7i64, 8i64, 9i64]
330        )
331        .expect("BUG: should be ok");
332        let as_df: DataFrame = polars_df.into();
333        let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into()];
334        assert_eq!(
335            as_df.select(Some(keys.as_slice())),
336            expected.select(Some(keys.as_slice()))
337        );
338    }
339
340    #[cfg(feature = "polars-df")]
341    #[rstest]
342    #[case::str(DataValue::String("test".into()), polars::prelude::AnyValue::String("test".into()))]
343    #[case::u32(DataValue::U32(u32::MAX), polars::prelude::AnyValue::UInt32(u32::MAX))]
344    #[case::i32(DataValue::I32(i32::MIN), polars::prelude::AnyValue::Int32(i32::MIN))]
345    #[case::i64(DataValue::I64(i64::MIN), polars::prelude::AnyValue::Int64(i64::MIN))]
346    #[case::u64(DataValue::U64(u64::MIN), polars::prelude::AnyValue::UInt64(u64::MIN))]
347    #[case::f32(DataValue::F32(f32::MIN), polars::prelude::AnyValue::Float32(f32::MIN))]
348    #[case::f64(DataValue::F64(f64::MIN), polars::prelude::AnyValue::Float64(f64::MIN))]
349    #[case::null(DataValue::Null, polars::prelude::AnyValue::Null)]
350    #[case::i128(
351        DataValue::I128(i128::MIN),
352        polars::prelude::AnyValue::Int128(i128::MIN)
353    )]
354    #[case::u8(DataValue::U8(255), polars::prelude::AnyValue::UInt8(255))]
355    #[case::bool(DataValue::Bool(true), polars::prelude::AnyValue::Boolean(true))]
356    #[case::bytes(DataValue::Bytes("aaaaa".as_bytes().to_vec()), polars::prelude::AnyValue::BinaryOwned("aaaaa".as_bytes().to_vec()))]
357    #[case::vec_uints(DataValue::Vec(vec![DataValue::U32(0), DataValue::U32(1)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::UInt32(1)])))]
358    // polars converts all by first element type
359    //#[case::vec_diff_int(DataValue::Vec(vec![ DataValue::I32(1), DataValue::U32(0)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::Int32(1i32), polars::prelude::AnyValue::UInt32(0u32)])))]
360    //#[case::vec_int_str(DataValue::Vec(vec![DataValue::U32(0), DataValue::String("1".into())]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::StringOwned("1".into())])))]
361    fn into_polars_value_test(
362        #[case] input: DataValue,
363        #[case] output: polars::prelude::AnyValue<'static>,
364    ) {
365        assert_eq!(into_polars_value(input.clone()), output);
366        assert_eq!(from_polars_value(output), input);
367    }
368
369    #[rstest]
370    #[case(
371        DataFrame::new(crate::column_frame! {
372            "a" => [1f64, 2f64, 3f64],
373            "b" => [4i64, 5i64, 6i64],
374            "c" => [7i64, 8i64, 9i64]
375        }),
376        DataFrame::new(crate::column_frame! {
377            "a" => [1f64, 2f64],
378            "b" => [4i64, 5i64],
379            "c" => [7i64, 8i64]
380        }),
381        FilterRules::try_from("a >= 1f64 && (b <= 5 || c <= 8) && b >= 4").expect("BUG: cannot create filter rules"),
382    )]
383    #[traced_test]
384    fn filter_test(
385        #[case] df: DataFrame,
386        #[case] expected: DataFrame,
387        #[case] filter: FilterRules,
388    ) {
389        let filtered = df.filter(&filter).expect("BUG: cannot filter");
390        assert_eq!(filtered, expected);
391    }
392
393    #[rstest]
394    fn test_serde_complex() {
395        let simple = r#"
396{
397    "constants": {},
398    "dataframe": {
399        "index": {
400            "keys": [
401                {
402                    "key": 3162770485,
403                    "name": "a",
404                    "ctype": "U32"
405                },
406                {
407                    "key": 2279056742,
408                    "name": "b",
409                    "ctype": "F64"
410                },
411                {
412                    "key": 2994984227,
413                    "name": "c",
414                    "ctype": "U64"
415                },
416                {
417                    "key": 3319645144,
418                    "name": "d",
419                    "ctype": "F64"
420                },
421                {
422                    "key": 1291847470,
423                    "name": "e",
424                    "ctype": "U32"
425                },
426                {
427                    "key": 874241070,
428                    "name": "f",
429                    "ctype": "Bool"
430                }
431            ],
432            "indexes": {
433                "a": 0,
434                "b": 1,
435                "c": 2,
436                "d": 3,
437                "e": 4,
438                "f": 5
439            },
440            "alias": {}
441        },
442        "data_frame": {
443            "v": 1,
444            "dim": [
445                2,
446                6
447            ],
448            "data": [
449                253780,
450                0.009369421750307085,
451                1633222860381359,
452                8,
453                5,
454                true,
455                64512,
456                0.003391335718333721,
457                1633222860810557,
458                8,
459                5,
460                null
461            ]
462        }
463    },
464    "metadata": {}
465}
466        "#;
467
468        let simple_deserialized: DataFrame =
469            serde_json::from_str(simple).expect("BUG: Unable to deserialize dataframe");
470
471        println!("deserialized: {simple_deserialized:?}");
472        let array = format!("[{}, {}, {}]", simple, simple, simple);
473        let deserialized: Vec<DataFrame> =
474            serde_json::from_str(&array).expect("BUG: Unable to deserialize dataframe");
475
476        println!("deserialized: {deserialized:?}");
477        assert_eq!(deserialized.len(), 3);
478        assert_eq!(simple_deserialized, deserialized[0]);
479    }
480
481    #[rstest]
482    #[case(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
483    #[case(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
484    #[case(vec![hashmap! {
485        "key1".into() => 1.into(),
486        "key2".into() => "a".into(),
487    },
488    hashmap! {
489        "key1".into() => 2.into(),
490    },])]
491    #[case(vec![data_value::stdhashmap! {
492        "key1" => DataValue::from(1),
493        "key2" => DataValue::from("a"),
494    },data_value::stdhashmap! {
495        "key1" => DataValue::from(2),
496    },])]
497    #[case(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(),
498    vec![DataValue::from("a"), DataValue::Null])])]
499    fn test_select_column<T: Into<DataFrame>>(#[case] input: T) {
500        let df: DataFrame = input.into();
501        assert_eq!(
502            df,
503            DataFrame {
504                constants: HashMap::new(),
505                dataframe: ColumnFrame::from(vec![
506                    hashmap! {
507                        "key1".into() => 1.into(),
508                        "key2".into() => "a".into(),
509                    },
510                    hashmap! {
511                        "key1".into() => 2.into(),
512                    },
513                ]),
514                metadata: HashMap::new(),
515            }
516        );
517        let selected_transposed = df.select_column("key1".into());
518        assert!(selected_transposed.is_some());
519        let selected_transposed = selected_transposed.unwrap();
520        assert_eq!(selected_transposed.len(), 2);
521        assert_eq!(selected_transposed, ndarray::array![1.into(), 2.into()]);
522    }
523
524    #[rstest]
525    #[case::hhm(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
526    #[case::stdhm(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
527    #[case::hm({
528        let hm: std::collections::HashMap<String, Array1<DataValue>> = data_value::stdhashmap!("key1".to_string() => Array1::from_vec(vec![DataValue::from(1), DataValue::from(2)]), "key2".to_string() => Array1::from_vec(vec![DataValue::from("a"), DataValue::Null]));
529        hm
530    })]
531    #[case::vec_hhm(vec![hashmap! {
532        "key1".into() => 1.into(),
533        "key2".into() => "a".into(),
534    },
535    hashmap! {
536        "key1".into() => 2.into(),
537    },])]
538    #[case::vec_hme(vec![data_value::stdhashmap! {
539        "key1" => DataValue::from(1),
540        "key2" => DataValue::from("a"),
541    },data_value::stdhashmap! {
542        "key1" => DataValue::from(2),
543    },])]
544    #[case::vec_vec(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(), vec![DataValue::from("a"), DataValue::Null])])]
545    fn test_from_conversion<T: Into<DataFrame>>(#[case] input: T) {
546        let df: DataFrame = input.into();
547        let expected: DataFrame = DataFrame {
548            constants: HashMap::new(),
549            dataframe: ColumnFrame::from(vec![
550                hashmap! {
551                    "key1".into() => 1.into(),
552                    "key2".into() => "a".into(),
553                },
554                hashmap! {
555                    "key1".into() => 2.into(),
556                },
557            ]),
558            metadata: HashMap::new(),
559        };
560        assert_eq!(
561            df.select(Some(&["key1".into(), "key2".into()])),
562            expected.select(Some(&["key1".into(), "key2".into()])),
563            "{df} vs {expected}"
564        );
565        let selected_transposed = df.select_transposed_typed::<i32>(&["key1".into()]);
566        assert_eq!(selected_transposed.len(), 2);
567        println!("{:?}", selected_transposed);
568        assert_eq!(selected_transposed, vec![vec![1], vec![2]]);
569    }
570    #[rstest]
571    fn test_dataframe(dummy_candidates: ColumnFrame) {
572        let mut dataframe: DataFrame = DataFrame::default();
573        assert!(dataframe.is_empty());
574        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
575        assert_eq!(dataframe.len(), 2);
576
577        let candidate = hashmap! {
578            "key1".into() => 3.into(),
579            "key2".into() => "c".into(),
580        };
581
582        assert!(dataframe.push(candidate).is_ok());
583        assert_eq!(dataframe.len(), 3);
584        assert!(!dataframe.is_empty());
585
586        dataframe.insert_constant("key3".into(), 4.into());
587        assert_eq!(dataframe.constants.len(), 1);
588        assert!(dataframe
589            .apply_function(&["key1".into()], |keys, df| {
590                let key = keys[0].clone();
591                let s = df
592                    .get_single_column(&key)
593                    .expect("BUG: Cannot get column")
594                    .to_owned();
595                let s = s.mapv(|x| x + DataValue::from(1));
596                df.add_single_column("key5", s)?;
597                Ok(())
598            })
599            .is_ok());
600        let original = dataframe.clone();
601        dataframe.shrink();
602        let remove_df = dataframe.remove_column(&["key1".into()]);
603        assert!(remove_df.is_ok());
604        let mut remove_df = remove_df.unwrap();
605        assert_eq!(remove_df.len(), 3);
606        let selected = dataframe.select(Some(&["key2".into()]));
607        assert!(selected.is_ok());
608        let selected = selected.unwrap();
609        println!("{:?}", selected);
610        assert_eq!(selected.len(), 3);
611
612        // fixme later
613        let joined_result =
614            remove_df.join(dataframe, &JoinRelation::new(crate::JoinBy::AddColumns));
615        assert!(joined_result.is_ok(), "{:?}", joined_result);
616        assert_eq!(original, remove_df);
617    }
618
619    #[rstest]
620    fn test_metadata(dummy_candidates: ColumnFrame) {
621        let mut dataframe: DataFrame = DataFrame::default();
622        assert!(dataframe.is_empty());
623        println!("{:?}", dataframe);
624        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
625        println!("{:?}", dataframe);
626        assert_eq!(dataframe.len(), 2);
627
628        dataframe.add_metadata("test".into(), 1.into());
629        assert_eq!(dataframe.get_metadata("test"), Some(&1.into()));
630        let dataframe = DataFrame::new(ColumnFrame::from(vec![
631            hashmap! {
632                "key1".into() => 1.into(),
633                "key2".into() => "a".into(),
634            },
635            hashmap! {
636                "key1".into() => 2.into(),
637                "key2".into() => "b".into(),
638            },
639        ]));
640        assert_eq!(dataframe.get_metadata("test"), None);
641        let tt = dataframe.select_transposed(None);
642        assert!(tt.is_ok());
643        let tt = tt.unwrap();
644        assert_eq!(tt.shape(), [2, 2]);
645        assert_eq!(
646            tt,
647            Array2::from_shape_vec((2, 2), vec![1.into(), 2.into(), "a".into(), "b".into()])
648                .unwrap()
649        );
650    }
651
652    #[rstest]
653    #[traced_test]
654    fn add_single_column_test() {
655        let mut dataframe = DataFrame::default();
656        let values = Array1::from(vec![1.into(), 2.into(), 3.into()]);
657        let r = dataframe.add_single_column("key1", values);
658        assert!(r.is_ok(), "{r:?}");
659        let selected = dataframe.select(None);
660        assert!(selected.is_ok());
661        let selected = selected.unwrap();
662        assert_eq!(selected.shape(), [3, 1]);
663        assert_eq!(
664            selected,
665            Array2::from_shape_vec((3, 1), vec![1.into(), 2.into(), 3.into()]).unwrap()
666        );
667        let values = Array1::from(vec![1.into(), 2.into()]);
668        assert!(dataframe.add_single_column("key1", values).is_err());
669        let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
670        assert!(dataframe.add_single_column("key2", values).is_ok());
671        let values = Array1::from(vec![3.into()]);
672        assert!(dataframe.add_single_column("key3", values).is_err());
673    }
674
675    #[rstest]
676    #[traced_test]
677    fn add_single_column_empty_test() {
678        let mut dataframe = DataFrame::default();
679        let values = Array1::from(vec![]);
680        let r = dataframe.add_single_column("key1", values);
681        assert!(r.is_ok(), "{r:?}");
682        let selected = dataframe.select(None);
683        assert!(selected.is_ok());
684        let selected = selected.unwrap();
685        assert_eq!(selected.shape(), [0, 1]);
686        assert_eq!(selected, Array2::from_shape_vec((0, 1), vec![]).unwrap());
687        let values = Array1::from(vec![1.into(), 2.into()]);
688        assert!(dataframe.add_single_column("key1", values).is_err());
689        let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
690        assert!(dataframe.add_single_column("key2", values).is_ok());
691        let values = Array1::from(vec![3.into(), 4.into()]);
692        assert!(dataframe.add_single_column("key3", values).is_err());
693        let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
694        assert!(dataframe.add_single_column("key3", values).is_ok());
695
696        assert_eq!(
697            dataframe
698                .select_column("key1".into())
699                .expect("BUG: has to exists"),
700            ndarray::arr1(&[DataValue::Null, DataValue::Null, DataValue::Null]),
701        );
702        assert_eq!(
703            dataframe
704                .select_column("key2".into())
705                .expect("BUG: has to exists"),
706            ndarray::arr1(&[3.into(), 4.into(), 5.into()]),
707        );
708        assert_eq!(
709            dataframe.select(None).expect("BUG: cannot get data"),
710            ndarray::arr2(&[
711                [DataValue::Null, 3.into(), 3.into()],
712                [DataValue::Null, 4.into(), 4.into()],
713                [DataValue::Null, 5.into(), 5.into()],
714            ])
715        );
716    }
717
718    #[rstest]
719    #[case(
720        DataFrame::new(ColumnFrame::from(vec![
721            hashmap! {
722                "k".into() => 1.into(),
723                "k2".into() => 2.into(),
724                "k3".into() => 2.2.into(),
725            },
726            hashmap! {
727                "k".into() => 11.into(),
728                "k2".into() => 3.into(),
729            },
730            hashmap! {
731                "k".into() => 4.into(),
732                "k2".into() => 5.into(),
733                "k3".into() => 2.3.into(),
734            },
735            hashmap! {
736                "k".into() => 4.into(),
737                "k2".into() => 5.into(),
738                "k3".into() => 2.4.into(),
739            },
740        ])),
741        vec!["k".into(), "k2".into()],
742        Array2::from_shape_vec((4, 2), vec![1.into(), 2.into(), 11.into(), 3.into(), 4.into(), 5.into(), 4.into(), 5.into()]).unwrap()
743    )]
744    #[case(
745        DataFrame::new(ColumnFrame::from(vec![
746            hashmap! {
747                "k".into() => 1.into(),
748                "k2".into() => 2.into(),
749                "k3".into() => 2.2.into(),
750            },
751            hashmap! {
752                "k".into() => 11.into(),
753                "k2".into() => 3.into(),
754            },
755            hashmap! {
756                "k".into() => 4.into(),
757                "k2".into() => 5.into(),
758                "k3".into() => 2.3.into(),
759            },
760            hashmap! {
761                "k".into() => 4.into(),
762                "k2".into() => 5.into(),
763                "k3".into() => 2.4.into(),
764            },
765        ])),
766        vec!["k2".into(), "k3".into(), "nonexist1".into(), "nonexists2".into(), "k".into()],
767        Array2::from_shape_vec((4, 5), vec![
768            2.into(), 2.2.into(), DataValue::Null, DataValue::Null, 1.into(),
769            3.into(), DataValue::Null, DataValue::Null, DataValue::Null, 11.into(),
770            5.into(), 2.3.into(),  DataValue::Null, DataValue::Null, 4.into(),
771            5.into(), 2.4.into(), DataValue::Null, DataValue::Null, 4.into()]).unwrap()
772    )]
773    #[traced_test]
774    fn select_multiple(
775        #[case] input: DataFrame,
776        #[case] columns: Vec<Key>,
777        #[case] expected: Array2<DataValue>,
778    ) {
779        let selected = input.select(Some(&columns));
780        assert!(selected.is_ok());
781        let selected = selected.unwrap();
782
783        assert_eq!(selected, expected);
784    }
785
786    #[rstest]
787    #[case(
788        DataFrame::new(ColumnFrame::from(vec![
789            hashmap! {
790                "k".into() => 1.into(),
791                "k2".into() => 2.into(),
792                "k3".into() => 2.2.into(),
793            },
794            hashmap! {
795                "k".into() => 11.into(),
796                "k2".into() => 3.into(),
797            },
798            hashmap! {
799                "k".into() => 4.into(),
800                "k2".into() => 5.into(),
801                "k3".into() => 2.3.into(),
802            },
803            hashmap! {
804                "k".into() => 4.into(),
805                "k2".into() => 5.into(),
806                "k3".into() => 2.4.into(),
807            },
808        ])),
809        "k".into(),
810        Array2::from_shape_vec((4, 3), vec![
811            1.into(), 2.into(), 2.2.into(),
812            4.into(), 5.into(), 2.3.into(),
813            4.into(), 5.into(), 2.4.into(),
814            11.into(), 3.into(), DataValue::Null,
815            ]
816        ).unwrap(),
817        vec!["k".into(), "k2".into(), "k3".into()],
818    )]
819    #[rstest]
820    #[case(
821        DataFrame::new(ColumnFrame::from(vec![
822            hashmap! {
823                "k".into() => 1.into(),
824                "k2".into() => 2.into(),
825                "k3".into() => 2.2.into(),
826            },
827            hashmap! {
828                "k".into() => 11.into(),
829                "k2".into() => 3.into(),
830            },
831            hashmap! {
832                "k".into() => 4.into(),
833                "k2".into() => 5.into(),
834                "k3".into() => 2.3.into(),
835            },
836            hashmap! {
837                "k".into() => 4.into(),
838                "k2".into() => 5.into(),
839                "k3".into() => 2.4.into(),
840            },
841        ])),
842        "k3".into(),
843        Array2::from_shape_vec((4, 3), vec![
844            11.into(), 3.into(), DataValue::Null,
845            1.into(), 2.into(), 2.2.into(),
846            4.into(), 5.into(), 2.3.into(),
847            4.into(), 5.into(), 2.4.into(),
848            ]
849        ).unwrap(),
850        vec!["k".into(), "k2".into(), "k3".into()],
851    )]
852    #[case(
853        DataFrame::new(ColumnFrame::from(vec![
854            hashmap! {
855                "k".into() => 2.into(),
856                "k2".into() => 0.000001.into(),
857            },
858            hashmap! {
859                "k".into() => 1.into(),
860                "k2".into() =>0.0000001.into(),
861            },
862            hashmap! {
863                "k".into() => 3.into(),
864                "k2".into() => 0.00001.into(),
865            },
866            hashmap! {
867                "k".into() => 4.into(),
868                "k2".into() => 0.001.into(),
869            },
870        ])),
871        "k2".into(),
872        Array2::from_shape_vec((4, 2), vec![
873            1.into(), 0.0000001.into(),
874            2.into(), 0.000001.into(),
875            3.into(), 0.00001.into(),
876            4.into(), 0.001.into(),
877            ]
878        ).unwrap(),
879        vec!["k".into(), "k2".into()],
880    )]
881    #[case(
882        DataFrame::new(ColumnFrame::from(vec![
883            hashmap! {
884                "k".into() => 2.into(),
885                "k2".into() => "b".into(),
886            },
887            hashmap! {
888                "k".into() => 1.into(),
889                "k2".into() =>"a".into(),
890            },
891            hashmap! {
892                "k".into() => 3.into(),
893                "k2".into() =>"c".into(),
894            },
895            hashmap! {
896                "k".into() => 4.into(),
897                "k2".into() =>"z".into(),
898            },
899        ])),
900        "k2".into(),
901        Array2::from_shape_vec((4, 2), vec![
902            1.into(),"a".into(),
903            2.into(), "b".into(),
904            3.into(), "c".into(),
905            4.into(), "z".into(),
906            ]
907        ).unwrap(),
908        vec!["k".into(), "k2".into()],
909    )]
910    #[traced_test]
911    fn sort_by(
912        #[case] input: DataFrame,
913        #[case] column: Key,
914        #[case] expected: Array2<DataValue>,
915        #[case] columns: Vec<Key>,
916    ) {
917        let result = input.sorted(&column);
918        assert!(result.is_ok(), "{result:?}");
919        let result = result.unwrap().get_sorted();
920        let selected = result.select(Some(&columns));
921
922        assert_eq!(selected, expected);
923    }
924    #[rstest]
925    #[case(
926        DataFrame::new(ColumnFrame::from(vec![
927            hashmap! {
928                "k".into() => 2.into(),
929                "k2".into() => 0.000001.into(),
930            },
931            hashmap! {
932                "k".into() => 1.into(),
933                "k2".into() =>0.0000001.into(),
934            },
935            hashmap! {
936                "k".into() => 3.into(),
937                "k2".into() => 0.00001.into(),
938            },
939            hashmap! {
940                "k".into() => 4.into(),
941                "k2".into() => 0.001.into(),
942            },
943        ])),
944        "k2".into(),
945        TopN::Last(1),
946        Array2::from_shape_vec((1, 2), vec![
947            4.into(), 0.001.into(),
948            ]
949        ).unwrap(),
950        vec!["k".into(), "k2".into()],
951    )]
952    #[case(
953        DataFrame::new(ColumnFrame::from(vec![
954            hashmap! {
955                "k".into() => 2.into(),
956                "k2".into() => 0.000001.into(),
957            },
958            hashmap! {
959                "k".into() => 1.into(),
960                "k2".into() =>0.0000001.into(),
961            },
962            hashmap! {
963                "k".into() => 3.into(),
964                "k2".into() => 0.00001.into(),
965            },
966            hashmap! {
967                "k".into() => 4.into(),
968                "k2".into() => 0.001.into(),
969            },
970        ])),
971        "k2".into(),
972        TopN::Last(2),
973        Array2::from_shape_vec((2, 2), vec![
974            4.into(), 0.001.into(),
975            3.into(), 0.00001.into(),
976            ]
977        ).unwrap(),
978        vec!["k".into(), "k2".into()],
979    )]
980    #[case(
981        DataFrame::new(ColumnFrame::from(vec![
982            hashmap! {
983                "k".into() => 2.into(),
984                "k2".into() => "b".into(),
985            },
986            hashmap! {
987                "k".into() => 1.into(),
988                "k2".into() =>"a".into(),
989            },
990            hashmap! {
991                "k".into() => 3.into(),
992                "k2".into() =>"c".into(),
993            },
994            hashmap! {
995                "k".into() => 4.into(),
996                "k2".into() =>"z".into(),
997            },
998        ])),
999        "k2".into(),
1000        TopN::First(1),
1001        Array2::from_shape_vec((1, 2), vec![
1002            1.into(),"a".into(),
1003            ]
1004        ).unwrap(),
1005        vec!["k".into(), "k2".into()],
1006    )]
1007    #[case(
1008        DataFrame::new(ColumnFrame::from(vec![
1009            hashmap! {
1010                "k".into() => 2.into(),
1011                "k2".into() => "b".into(),
1012            },
1013            hashmap! {
1014                "k".into() => 1.into(),
1015                "k2".into() =>"a".into(),
1016            },
1017            hashmap! {
1018                "k".into() => 3.into(),
1019                "k2".into() =>"c".into(),
1020            },
1021            hashmap! {
1022                "k".into() => 4.into(),
1023                "k2".into() =>"z".into(),
1024            },
1025        ])),
1026        "k2".into(),
1027        TopN::First(2),
1028        Array2::from_shape_vec((2, 2), vec![
1029            1.into(),"a".into(),
1030            2.into(),"b".into(),
1031            ]
1032        ).unwrap(),
1033        vec!["k".into(), "k2".into()],
1034    )]
1035    #[traced_test]
1036    fn top_n(
1037        #[case] input: DataFrame,
1038        #[case] column: Key,
1039        #[case] topn: TopN,
1040        #[case] expected: Array2<DataValue>,
1041        #[case] columns: Vec<Key>,
1042    ) {
1043        let result = input.sorted(&column);
1044        assert!(result.is_ok(), "{result:?}");
1045        let result = result.unwrap();
1046        let first = result.topn(topn).unwrap();
1047        let selected = first.select(Some(&columns));
1048        assert_eq!(selected, expected);
1049    }
1050}