1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
pub mod gather;
pub mod join;
#[cfg(feature = "pivot")]
pub mod unpivot;
pub use join::*;
use polars_core::prelude::*;
#[cfg(feature = "to_dummies")]
use polars_core::runtime::RAYON;
#[cfg(feature = "to_dummies")]
use polars_core::utils::accumulate_dataframes_horizontal;
#[cfg(feature = "to_dummies")]
use rayon::prelude::*;
pub trait IntoDf {
fn to_df(&self) -> &DataFrame;
}
impl IntoDf for DataFrame {
fn to_df(&self) -> &DataFrame {
self
}
}
impl<T: IntoDf> DataFrameOps for T {}
pub trait DataFrameOps: IntoDf {
/// Create dummy variables.
///
/// # Example
///
/// ```ignore
///
/// # #[macro_use] extern crate polars_core;
/// # fn main() {
///
/// use polars_core::prelude::*;
///
/// let df = df! {
/// "id" => &[1, 2, 3, 1, 2, 3, 1, 1],
/// "type" => &["A", "B", "B", "B", "C", "C", "C", "B"],
/// "code" => &["X1", "X2", "X3", "X3", "X2", "X2", "X1", "X1"]
/// }.unwrap();
///
/// let dummies = df.to_dummies(None, false, false).unwrap();
/// println!("{}", dummies);
/// # }
/// ```
/// Outputs:
/// ```text
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | id_1 | id_3 | id_2 | type_A | type_B | type_C | code_X1 | code_X2 | code_X3 |
/// | --- | --- | --- | --- | --- | --- | --- | --- | --- |
/// | u8 | u8 | u8 | u8 | u8 | u8 | u8 | u8 | u8 |
/// +======+======+======+========+========+========+=========+=========+=========+
/// | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
/// +------+------+------+--------+--------+--------+---------+---------+---------+
/// ```
#[cfg(feature = "to_dummies")]
fn to_dummies(
&self,
separator: Option<&str>,
drop_first: bool,
drop_nulls: bool,
) -> PolarsResult<DataFrame> {
self._to_dummies(None, separator, drop_first, drop_nulls)
}
#[cfg(feature = "to_dummies")]
fn columns_to_dummies(
&self,
columns: Vec<&str>,
separator: Option<&str>,
drop_first: bool,
drop_nulls: bool,
) -> PolarsResult<DataFrame> {
self._to_dummies(Some(columns), separator, drop_first, drop_nulls)
}
#[cfg(feature = "to_dummies")]
fn _to_dummies(
&self,
columns: Option<Vec<&str>>,
separator: Option<&str>,
drop_first: bool,
drop_nulls: bool,
) -> PolarsResult<DataFrame> {
use crate::series::ToDummies;
let df = self.to_df();
let set: PlHashSet<&str> = if let Some(columns) = columns {
PlHashSet::from_iter(columns)
} else {
PlHashSet::from_iter(df.columns().iter().map(|s| s.name().as_str()))
};
let cols = RAYON.install(|| {
df.columns()
.par_iter()
.map(|s| match set.contains(s.name().as_str()) {
true => s
.as_materialized_series()
.to_dummies(separator, drop_first, drop_nulls),
false => Ok(s.clone().into_frame()),
})
.collect::<PolarsResult<Vec<_>>>()
})?;
accumulate_dataframes_horizontal(cols)
}
}