1use std::collections::HashMap;
2
3use crate::{DataFrame, DataValue, JoinRelation, Key};
4use data_value::Extract as _;
5use ndarray::Array1;
6use numpy::{IntoPyArray, PyArray2};
7use pyo3::{
8 exceptions::PyTypeError,
9 prelude::*,
10 types::{PyBytes, PyList},
11 IntoPyObjectExt,
12};
13use tracing::trace;
14
15impl DataFrame {
16 fn select_data(
17 &self,
18 keys: Option<Vec<String>>,
19 transposed: Option<bool>,
20 ) -> Result<ndarray::Array2<DataValue>, crate::error::Error> {
21 let keys = keys
22 .map(|x| x.into_iter().map(Key::from).collect::<Vec<Key>>())
23 .unwrap_or(self.keys());
24 if transposed.unwrap_or(false) {
25 self.select(Some(keys.as_slice()))
26 } else {
27 self.select_transposed(Some(keys.as_slice()))
28 }
29 }
30}
31
32enum DfOrDict {
33 DataFrame(DataFrame),
34 Dict(HashMap<String, DataValue>),
35}
36
37impl DfOrDict {
38 pub fn new(object: Bound<'_, PyAny>) -> Result<DfOrDict, PyErr> {
39 if let Ok(df) = object.extract::<DataFrame>() {
40 Ok(DfOrDict::DataFrame(df))
41 } else {
42 let dict: HashMap<String, DataValue> = object.extract()?;
43 Ok(DfOrDict::Dict(dict))
44 }
45 }
46}
47
48#[pymethods]
49impl DataFrame {
50 #[new]
52 pub fn init() -> Self {
53 Self::default()
54 }
55
56 #[cfg(feature = "polars-df")]
63 #[staticmethod]
64 pub fn from_polars(df: pyo3_polars::PyDataFrame) -> Self {
65 df.0.into()
66 }
67
68 #[staticmethod]
73 pub fn from_dict(df: HashMap<String, Vec<DataValue>>) -> Self {
74 let mut result_df: Vec<(Key, Vec<DataValue>)> = Vec::new();
75 for (key, value) in df.into_iter() {
76 let dtype = crate::detect_dtype_arr(&value);
77 let key = Key::new(key.as_str(), dtype);
78 result_df.push((key, value));
79 }
80
81 result_df.into()
82 }
83
84 pub fn keys(&self) -> Vec<Key> {
86 self.dataframe.keys().to_vec()
87 }
88
89 pub fn set_dtype_for_column(&mut self, key: String, dtype: crate::DataType) -> PyResult<()> {
95 self.dataframe
96 .enforce_dtype_for_column(key.as_str(), dtype)
97 .map_err(|e| {
98 PyErr::new::<PyTypeError, _>(format!("Cannot set dtype for columnĀ {key}: {e}"))
99 })
100 }
101
102 #[cfg(feature = "polars-df")]
112 #[pyo3(name = "as_polars")]
113 pub fn py_as_polars(&self) -> PyResult<pyo3_polars::PyDataFrame> {
114 let df = self
115 .as_polars()
116 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot prepare polars DF: {e}")))?;
117 Ok(pyo3_polars::PyDataFrame(df))
118 }
119
120 pub fn apply(&mut self, function: Bound<'_, PyAny>) -> Result<(), PyErr> {
130 let df: DataFrame = pyo3::Python::attach(|py| {
131 let self_ = self
132 .clone()
133 .into_pyobject(py)
134 .expect("BUG: cannot convert to PyObject");
135 let result = function.call1((self_,)).expect("BUG: cannot call function");
136 result
137 .extract::<Bound<DataFrame>>()
138 .expect("BUG: cannot extract data frame")
139 .unbind()
140 .extract(py)
141 .expect("BUG: cannot extract data frame")
142 });
143 self.dataframe = df.dataframe;
144 Ok(())
145 }
146
147 #[pyo3(signature = (keys=None, transposed=None))]
159 pub fn as_numpy_u32<'py>(
160 &self,
161 keys: Option<Vec<String>>,
162 transposed: Option<bool>,
163 py: Python<'py>,
164 ) -> PyResult<Bound<'py, numpy::PyArray2<u32>>> {
165 let data = self
166 .select_data(keys, transposed)
167 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
168 Ok(PyArray2::from_array(py, &data.mapv(|x| u32::extract(&x))))
169 }
170
171 #[pyo3(signature = (keys=None, transposed=None))]
183 pub fn as_numpy_u64<'py>(
184 &self,
185 keys: Option<Vec<String>>,
186 transposed: Option<bool>,
187 py: Python<'py>,
188 ) -> PyResult<Bound<'py, numpy::PyArray2<u64>>> {
189 let data = self
190 .select_data(keys, transposed)
191 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
192 Ok(PyArray2::from_array(py, &data.mapv(|x| u64::extract(&x))))
193 }
194
195 #[pyo3(signature = (keys=None, transposed=None))]
207 pub fn as_numpy_i32<'py>(
208 &self,
209 keys: Option<Vec<String>>,
210 transposed: Option<bool>,
211 py: Python<'py>,
212 ) -> PyResult<Bound<'py, numpy::PyArray2<i32>>> {
213 let data = self
214 .select_data(keys, transposed)
215 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
216 Ok(PyArray2::from_array(py, &data.mapv(|x| i32::extract(&x))))
217 }
218
219 #[pyo3(signature = (keys=None, transposed=None))]
231 pub fn as_numpy_i64<'py>(
232 &self,
233 keys: Option<Vec<String>>,
234 transposed: Option<bool>,
235 py: Python<'py>,
236 ) -> PyResult<Bound<'py, numpy::PyArray2<i64>>> {
237 let data = self
238 .select_data(keys, transposed)
239 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
240 Ok(PyArray2::from_array(py, &data.mapv(|x| i64::extract(&x))))
241 }
242
243 #[pyo3(signature = (keys=None, transposed=None))]
255 pub fn as_numpy_f32<'py>(
256 &self,
257 keys: Option<Vec<String>>,
258 transposed: Option<bool>,
259 py: Python<'py>,
260 ) -> PyResult<Bound<'py, numpy::PyArray2<f32>>> {
261 let data = self
262 .select_data(keys, transposed)
263 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
264 Ok(PyArray2::from_array(py, &data.mapv(|x| f32::extract(&x))))
265 }
266
267 #[pyo3(signature = (keys=None, transposed=None))]
279 pub fn as_numpy_f64<'py>(
280 &self,
281 keys: Option<Vec<String>>,
282 transposed: Option<bool>,
283 py: Python<'py>,
284 ) -> PyResult<Bound<'py, numpy::PyArray2<f64>>> {
285 let data = self
286 .select_data(keys, transposed)
287 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
288 Ok(PyArray2::from_array(py, &data.mapv(|x| f64::extract(&x))))
289 }
290
291 #[pyo3(signature = (keys=None, transposed=None))]
303 pub fn as_numpy_str<'py>(
304 &self,
305 keys: Option<Vec<String>>,
306 transposed: Option<bool>,
307 py: Python<'py>,
308 ) -> PyResult<Bound<'py, numpy::PyArray2<Py<PyAny>>>> {
309 self.as_numpy(keys, transposed, py)
310 }
311
312 #[pyo3(signature = (keys=None, transposed=None))]
324 pub fn as_numpy<'py>(
325 &self,
326 keys: Option<Vec<String>>,
327 transposed: Option<bool>,
328 py: Python<'py>,
329 ) -> PyResult<Bound<'py, numpy::PyArray2<Py<PyAny>>>> {
330 let data = self
331 .select_data(keys, transposed)
332 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?;
333 let data = data.mapv(|x| {
334 String::extract(&x)
335 .into_py_any(py)
336 .expect("cannot convert string to py object")
337 });
338 Ok(data.into_pyarray(py))
339 }
340
341 #[pyo3(name = "shrink")]
342 pub fn py_shrink(&mut self) {
343 self.dataframe.shrink();
344 }
345
346 #[pyo3(name = "add_metadata")]
347 pub fn py_add_metadata(&mut self, key: String, value: DataValue) {
348 self.metadata.insert(key, value);
349 }
350
351 #[pyo3(name = "get_metadata")]
352 pub fn py_get_metadata(&self, key: &str) -> Option<DataValue> {
353 self.metadata.get(key).cloned()
354 }
355
356 #[pyo3(name = "rename_key")]
357 pub fn py_rename_key(&mut self, key: &str, new_name: &str) -> Result<(), PyErr> {
358 self.dataframe
360 .rename_key(key, new_name.into())
361 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("{e}")))
362 }
363
364 #[pyo3(name = "add_alias")]
365 pub fn py_add_alias(&mut self, key: &str, new_name: &str) -> Result<(), PyErr> {
366 self.dataframe
367 .add_alias(key, new_name)
368 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("{e}")))
369 }
370
371 #[pyo3(name = "select", signature = (keys=None, transposed=None))]
384 pub fn py_select<'py>(
385 &self,
386 py: Python<'py>,
387 keys: Option<Vec<String>>,
388 transposed: Option<bool>,
389 ) -> Result<Bound<'py, PyList>, PyErr> {
390 let keys = keys
391 .map(|x| x.into_iter().map(Key::from).collect::<Vec<Key>>())
392 .unwrap_or(self.keys());
393
394 let selected = if transposed.unwrap_or_default() {
395 self.select_transposed(Some(keys.as_slice()))
396 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?
397 } else {
398 self.select(Some(keys.as_slice()))
399 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot select data: {e}")))?
400 };
401
402 let list = PyList::empty(py);
403 for rows in selected.rows() {
404 let row = PyList::empty(py);
405 for value in rows.iter() {
406 row.append(value.clone())
407 .expect("BUG: cannot append to list");
408 }
409 list.append(row).expect("BUG: cannot append to list");
410 }
411 Ok(list)
412 }
413
414 #[pyo3(name = "select_column")]
429 pub fn py_select_column<'py>(
430 &self,
431 py: Python<'py>,
432 key: String,
433 ) -> Result<Bound<'py, PyList>, PyErr> {
434 let selected = self
435 .select_column(Key::from(key))
436 .ok_or_else(|| PyErr::new::<PyTypeError, _>("Cannot select column"))?;
437
438 let list = PyList::empty(py);
439 for x in selected.to_vec().into_iter() {
440 list.append(x)?;
441 }
442
443 Ok(list)
444 }
445
446 #[pyo3(name = "join")]
461 pub fn py_join(&mut self, other: DataFrame, join_type: JoinRelation) -> Result<(), PyErr> {
462 self.dataframe
463 .join(other.dataframe, &join_type)
464 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot join data: {e}")))?;
465
466 Ok(())
467 }
468
469 #[pyo3(name = "push")]
478 pub fn py_push(&mut self, data: HashMap<Key, DataValue>) -> Result<(), PyErr> {
479 self.dataframe
480 .push(data)
481 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot join data: {e}")))?;
482 Ok(())
483 }
484
485 #[pyo3(name = "add_column")]
497 pub fn py_add_column(&mut self, key: Key, data: Vec<DataValue>) -> Result<(), PyErr> {
498 self.dataframe
499 .add_single_column(key, Array1::from_vec(data))
500 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot join data: {e}")))?;
501 Ok(())
502 }
503
504 pub fn add_constant(&mut self, key: Key, feature: DataValue) -> Result<(), PyErr> {
505 self.constants.insert(key, feature);
506 Ok(())
507 }
508
509 pub fn filter_by_expression(&mut self, expression: String) -> Result<Self, PyErr> {
521 let filter = crate::filter::FilterRules::try_from(expression.as_str())
522 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot parse expression: {e}")))?;
523 self.filter(&filter)
524 .map_err(|e| PyErr::new::<PyTypeError, _>(format!("Cannot filter data: {e}")))
525 }
526
527 fn __repr__(&self) -> String {
528 self.to_string()
529 }
530
531 fn __str__(&self) -> String {
532 self.to_string()
533 }
534
535 pub fn __iadd__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr> {
536 trace!("{object:?}");
537 let df_or_dict = DfOrDict::new(object)?;
538 match df_or_dict {
539 DfOrDict::DataFrame(df) => {
540 self.dataframe += df.dataframe;
541 }
542 DfOrDict::Dict(dict) => {
543 self.dataframe += dict;
544 }
545 }
546 Ok(())
547 }
548
549 pub fn __isub__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr> {
550 trace!("{object:?}");
551
552 let df_or_dict = DfOrDict::new(object)?;
553 match df_or_dict {
554 DfOrDict::DataFrame(df) => {
555 self.dataframe -= df.dataframe;
556 }
557 DfOrDict::Dict(dict) => {
558 self.dataframe -= dict;
559 }
560 }
561 Ok(())
562 }
563
564 pub fn __imul__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr> {
565 trace!("{object:?}");
566 let df_or_dict = DfOrDict::new(object)?;
567 match df_or_dict {
568 DfOrDict::DataFrame(df) => {
569 self.dataframe *= df.dataframe;
570 }
571 DfOrDict::Dict(dict) => {
572 self.dataframe *= dict;
573 }
574 }
575 Ok(())
576 }
577
578 pub fn __itruediv__(&mut self, object: Bound<'_, PyAny>) -> Result<(), PyErr> {
579 trace!("{object:?}");
580 let df_or_dict = DfOrDict::new(object)?;
581 match df_or_dict {
582 DfOrDict::DataFrame(df) => {
583 self.dataframe /= df.dataframe;
584 }
585 DfOrDict::Dict(dict) => {
586 self.dataframe /= dict;
587 }
588 }
589 Ok(())
590 }
591
592 pub fn __len__(&mut self) -> Result<usize, PyErr> {
593 Ok(self.dataframe.len())
594 }
595
596 pub fn serialize_to_json_string(&self) -> String {
597 serde_json::to_string(self).expect("Cannot serialize to strinng")
598 }
599
600 #[staticmethod]
601 pub fn deserialize_from_json_string(json_df: String) -> Self {
602 let mut df: DataFrame =
603 serde_json::from_str(json_df.as_str()).expect("Cannot deserialize from str");
604 let _ = df.dataframe.try_fix_dtype();
605
606 df
607 }
608
609 pub fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
611 let s: DataFrame = rmp_serde::decode::from_slice(state.as_bytes()).map_err(|e| {
612 pyo3::PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!(
613 "Cannot deserialize object {e}"
614 ))
615 })?;
616 *self = s;
617 self.dataframe.try_fix_dtype().map_err(|e| {
618 pyo3::PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!(
619 "Cannot deserialize object {e}"
620 ))
621 })?;
622 Ok(())
623 }
624 pub fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
625 let buf = rmp_serde::encode::to_vec(self).map_err(|e| {
626 pyo3::PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!(
628 "Cannot deserialize object {e}"
629 ))
630 })?;
631 Ok(PyBytes::new(py, &buf))
632 }
633
634 pub fn __del__(&mut self) {
635 self.dataframe = Default::default();
636 }
637
638 }
648
649#[cfg(test)]
650mod test {
651
652 use super::*;
653 use crate::DataType;
654 use data_value::{stdhashmap, DataValue};
655 use halfbrown::hashmap;
656 use pyo3::ffi::c_str;
657 use rstest::*;
658 use tracing_test::traced_test;
659
660 #[fixture]
661 fn df() -> DataFrame {
662 let mut df = DataFrame::init();
663 assert!(df
664 .push(hashmap! {
665 Key::new("key1", DataType::U32) => DataValue::U32(1),
666 Key::new("key2", DataType::U32) => DataValue::U32(2),
667 })
668 .is_ok());
669 assert!(df
670 .push(hashmap! {
671 Key::from("key1") => DataValue::U32(11),
672 Key::from("key2") => DataValue::U32(21),
673 })
674 .is_ok());
675 df
676 }
677
678 #[fixture]
679 fn hm() -> HashMap<String, DataValue> {
680 stdhashmap!(
681 "key1".to_string() => DataValue::U32(2),
682 "key2".to_string() => DataValue::U32(3),
683 )
684 }
685
686 #[rstest]
687 fn serde_py(df: DataFrame) {
688 let str_df = df.serialize_to_json_string();
689 assert!(!str_df.is_empty());
690
691 let loaded = DataFrame::deserialize_from_json_string(str_df);
692
693 assert_eq!(loaded, df);
694 }
695 #[cfg(feature = "python")]
696 #[rstest]
697 fn pickle_py(df: DataFrame) {
698 pyo3::Python::attach(|py| {
699 let bytes = df.__getstate__(py);
700 assert!(bytes.is_ok());
701
702 let mut deser = DataFrame::default();
703 assert!(deser.__setstate__(bytes.unwrap().into()).is_ok());
704 assert_eq!(deser, df);
705 });
706 }
707 #[rstest]
708 fn test_select_data(df: DataFrame) {
709 let data = df.select_data(Some(vec!["key1".into(), "key2".into()]), Some(false));
710 assert!(data.is_ok());
711 assert_eq!(
712 data.unwrap(),
713 ndarray::array![[1u32.into(), 11u32.into()], [2u32.into(), 21u32.into()]]
714 );
715
716 let data = df.select_data(Some(vec!["key1".into(), "key2".into()]), Some(true));
717 assert!(data.is_ok());
718 assert_eq!(
719 data.unwrap(),
720 ndarray::array![[1u32.into(), 2u32.into()], [11u32.into(), 21u32.into()]]
721 );
722 }
723
724 #[cfg(feature = "python")]
725 #[rstest]
726 fn test_from_create() {
727 pyo3::Python::attach(|_py| {
728 let mut hm: HashMap<String, Vec<DataValue>> = Default::default();
729 let value: Vec<DataValue> = vec![1i32.into(), 22i32.into()];
730 hm.insert("a".into(), value);
731
732 let mut df = DataFrame::from_dict(hm);
733 assert_eq!(
734 df.select(Some(&["a".into()])),
735 Ok(ndarray::array![
736 [DataValue::from(1i32)],
737 [DataValue::from(22i32)]
738 ]),
739 );
740 assert!(df.set_dtype_for_column("a".into(), DataType::U32).is_ok());
741 assert_eq!(
742 df.select(Some(&["a".into()])),
743 Ok(ndarray::array![
744 [DataValue::from(1u32)],
745 [DataValue::from(22u32)]
746 ]),
747 );
748 });
749 #[cfg(feature = "polars-df")]
750 {
751 let pdf = polars::df!(
752 "a" => [1u64, 2u64, 3u64],
753 "b" => [4f64, 5f64, 6f64],
754 "c" => [7i64, 8i64, 9i64]
755 )
756 .expect("BUG: should be ok");
757 let df = DataFrame::from_polars(pyo3_polars::PyDataFrame(pdf));
758 assert_eq!(
759 df.select(Some(&["a".into(), "b".into(), "c".into()])),
760 crate::df! {
761 "a" => [1u64, 2u64, 3u64],
762 "b" => [4f64, 5f64, 6f64],
763 "c" => [7i64, 8i64, 9i64]
764 }
765 .select(Some(&["a".into(), "b".into(), "c".into()])),
766 );
767 }
768 }
769
770 #[rstest]
771 #[traced_test]
772 fn basic_ops_add(mut df: DataFrame, hm: HashMap<String, DataValue>) {
773 let mut df_expect = df.clone();
774 let df2 = df.clone();
775 let exec = Python::attach(|py| -> PyResult<()> {
776 df.__iadd__(df.clone().into_pyobject(py)?.into_any())?;
777 df_expect.dataframe += df2.dataframe;
778 tracing::trace!("{} vs {}", df, df_expect);
779 assert_eq!(df.dataframe, df_expect.dataframe);
780
781 df.__iadd__(hm.clone().into_pyobject(py)?.into_any())?;
782 df_expect.dataframe += hm;
783 tracing::trace!("{} vs {}", df, df_expect);
784 assert_eq!(df.dataframe, df_expect.dataframe);
785
786 Ok(())
787 });
788
789 assert!(exec.is_ok(), "{:?}", exec);
790 }
791
792 #[rstest]
793 #[traced_test]
794 fn basic_ops_sub(mut df: DataFrame, hm: HashMap<String, DataValue>) {
795 let mut df_expect = df.clone();
796 let df2 = df.clone();
797 let exec = Python::attach(|py| -> PyResult<()> {
798 df.__isub__(df.clone().into_pyobject(py)?.into_any())?;
799 df_expect.dataframe -= df2.dataframe;
800 tracing::trace!("{} vs {}", df, df_expect);
801 assert_eq!(df.dataframe, df_expect.dataframe);
802
803 df.__isub__(hm.clone().into_pyobject(py)?.into_any())?;
804 df_expect.dataframe -= hm;
805 tracing::trace!("{} vs {}", df, df_expect);
806 assert_eq!(df.dataframe, df_expect.dataframe);
807
808 Ok(())
809 });
810
811 assert!(exec.is_ok(), "{:?}", exec);
812 }
813
814 #[rstest]
815 #[traced_test]
816 fn basic_ops_mul(mut df: DataFrame, hm: HashMap<String, DataValue>) {
817 let mut df_expect = df.clone();
818 let df2 = df.clone();
819 let exec = Python::attach(|py| -> PyResult<()> {
820 df.__imul__(df.clone().into_pyobject(py)?.into_any())?;
821 df_expect.dataframe *= df2.dataframe;
822 tracing::trace!("{} vs {}", df, df_expect);
823 assert_eq!(df.dataframe, df_expect.dataframe);
824
825 df.__imul__(hm.clone().into_pyobject(py)?.into_any())?;
826 df_expect.dataframe *= hm;
827 tracing::trace!("{} vs {}", df, df_expect);
828 assert_eq!(df.dataframe, df_expect.dataframe);
829 Ok(())
830 });
831
832 assert!(exec.is_ok(), "{:?}", exec);
833 }
834
835 #[rstest]
836 #[traced_test]
837 fn basic_ops_div(mut df: DataFrame, hm: HashMap<String, DataValue>) {
838 let mut df_expect = df.clone();
839 let df2 = df.clone();
840 let exec = Python::attach(|py| -> PyResult<()> {
841 df.__itruediv__(df.clone().into_pyobject(py)?.into_any())?;
842 df_expect.dataframe /= df2.dataframe;
843 tracing::trace!("{} vs {}", df, df_expect);
844 assert_eq!(df.dataframe, df_expect.dataframe);
845
846 df.__itruediv__(hm.clone().into_pyobject(py)?.into_any())?;
847 df_expect.dataframe /= hm;
848 tracing::trace!("{} vs {}", df, df_expect);
849 assert_eq!(df.dataframe, df_expect.dataframe);
850 Ok(())
851 });
852
853 assert!(exec.is_ok(), "{:?}", exec);
854 }
855
856 #[rstest]
857 #[traced_test]
858 #[rstest]
859 fn test_numpy(mut df: DataFrame) {
860 let exec = Python::attach(|py| -> PyResult<()> {
861 let code = c_str!(
862 r#"
863def example(df):
864 import numpy as np
865 a_np = df.as_numpy_f32(['key1', 'key2'])
866 print(a_np)
867 b_np = df.as_numpy_u32(['key1', 'key'])
868 print(b_np)
869 b_np = df.as_numpy_i32(['key1', 'key'])
870 print(b_np)
871 b_np = df.as_numpy_i64(['key1', 'key'])
872 print(b_np)
873 b_np = df.as_numpy_u64(['key1', 'key'])
874 print(b_np)
875 b_np = df.as_numpy_f64(['key1', 'key'])
876 print(b_np)
877 b_np = df.as_numpy_f64(['key1', 'key'], transposed=True)
878 print(b_np)
879 b_np = df.as_numpy(['key1', 'key'], transposed=True)
880 print(b_np)
881 b_np = df.as_numpy_str(['key1', 'key'], transposed=True)
882 print(b_np)
883 return df
884 "#
885 );
886 let fun: Py<PyAny> = PyModule::from_code(py, code, c_str!(""), c_str!(""))?
887 .getattr("example")?
888 .into();
889 let result = fun.call1(py, (df.clone(),));
890 assert!(df.py_join(df.clone(), JoinRelation::default()).is_ok());
891 if py.import("numpy").is_ok() {
894 assert!(result.is_ok(), "{:?}", result);
895 } else {
896 assert!(result.is_err(), "{:?}", result);
897 }
898 Ok(())
899 });
900 assert!(exec.is_ok(), "{:?}", exec);
901 }
902
903 #[rstest]
904 #[traced_test]
905 #[rstest]
906 fn test_fill_from_python(df: DataFrame) {
907 let exec = Python::attach(|_py| -> PyResult<()> {
908 let hm = stdhashmap!(
909 Key::from("key1") => DataValue::U32(1),
910 Key::from("key2") => DataValue::U32(2),
911 );
912 let mut df2 = DataFrame::init();
913 assert!(df2.py_push(hm).is_ok());
914 assert!(df2
915 .py_push(stdhashmap!(
916 Key::from("key1") => DataValue::U32(11),
917 Key::from("key2") => DataValue::U32(21),
918 ))
919 .is_ok());
920
921 assert_eq!(df, df2);
922
923 let mut df2 = DataFrame::init();
924 assert!(df2
925 .py_add_column(
926 Key::from("key1"),
927 vec![DataValue::U32(1), DataValue::U32(11)]
928 )
929 .is_ok());
930 assert!(df2
931 .py_add_column(
932 Key::from("key2"),
933 vec![DataValue::U32(2), DataValue::U32(21)]
934 )
935 .is_ok());
936
937 assert_eq!(df, df2);
938 Ok(())
939 });
940 assert!(exec.is_ok(), "{:?}", exec);
941 }
942
943 #[rstest]
944 fn basic_python_dataframe(mut df: DataFrame) {
945 let exec = Python::attach(|py| -> PyResult<()> {
946 let fun: Py<PyAny> = PyModule::from_code(
947 py,
948 c_str!(
949 "
950def example(df):
951 print(df)
952 df.shrink()
953 assert len(df) == 2
954 df.add_alias('key1', 'key1-alias')
955 a = df.select(['key1', 'key2'])
956 print(a)
957 b = df.select(['key1-alias', 'key2'])
958 print(b)
959 df.rename_key('key1', 'key1new')
960 df.rename_key('key1new', 'key1')
961 assert a == [[1, 2], [11, 21]]
962 assert a == b
963 df.add_metadata('test', 1)
964 m = df.get_metadata('test')
965 assert m == 1
966 b = df.select_transposed(['key1', 'key2'])
967 print(b)
968 assert b == [[1, 11], [2, 21]]
969 c = df.select_column('key1')
970 print(c)
971 assert c == [1, 11]
972
973 a += b
974 print(a)
975 assert a == [[2, 13], [4, 23]]
976 a -= b
977 print(a)
978 assert e == a
979 f = e * b
980 print(f)
981 assert f == [[1, 22], [44, 441]]
982 g = f / b
983 print(g)
984 assert g == e
985
986 "
987 ),
988 c_str!(""),
989 c_str!(""),
990 )?
991 .getattr("example")?
992 .into();
993 let _ = fun.call1(py, (df.clone(),));
994 assert!(df.py_join(df.clone(), JoinRelation::default()).is_ok());
995 Ok(())
996 });
997 assert!(exec.is_ok(), "{:?}", exec);
998 }
999
1000 #[rstest]
1001 fn dummy_test_apply(mut df: DataFrame) {
1002 let exec = Python::attach(|py| -> PyResult<()> {
1003 let fun: Py<PyAny> = PyModule::from_code(
1004 py,
1005 c_str!(
1006 r#"
1007def multiply_by_ten(x):
1008 print(x)
1009 x *= {"key1": 10}
1010 print(x)
1011 return x
1012
1013def example(df):
1014 print(df)
1015 df.apply(multiply_by_ten)
1016 "#
1017 ),
1018 c_str!(""),
1019 c_str!(""),
1020 )?
1021 .getattr("example")?
1022 .into();
1023 let _ = fun.call1(py, (df.clone(),));
1024 assert!(df.py_join(df.clone(), JoinRelation::default()).is_ok());
1025 Ok(())
1026 });
1027 assert!(exec.is_ok(), "{:?}", exec);
1028 }
1029}