1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
use crate::prelude::*;
use crate::series::IsSorted;
use crate::utils::{concat_df_unchecked, slice_offsets, CustomIterTools, NoNull};
use crate::POOL;
fn slice_take(
total_rows: IdxSize,
n_rows_right: IdxSize,
slice: Option<(i64, usize)>,
inner: fn(IdxSize, IdxSize, IdxSize) -> IdxCa,
) -> IdxCa {
match slice {
None => inner(0, total_rows, n_rows_right),
Some((offset, len)) => {
let (offset, len) = slice_offsets(offset, len, total_rows as usize);
inner(offset as IdxSize, (len + offset) as IdxSize, n_rows_right)
}
}
}
fn take_left(total_rows: IdxSize, n_rows_right: IdxSize, slice: Option<(i64, usize)>) -> IdxCa {
fn inner(offset: IdxSize, total_rows: IdxSize, n_rows_right: IdxSize) -> IdxCa {
let mut take: NoNull<IdxCa> = (offset..total_rows)
.map(|i| i / n_rows_right)
.collect_trusted();
take.set_sorted2(IsSorted::Ascending);
take.into_inner()
}
slice_take(total_rows, n_rows_right, slice, inner)
}
fn take_right(total_rows: IdxSize, n_rows_right: IdxSize, slice: Option<(i64, usize)>) -> IdxCa {
fn inner(offset: IdxSize, total_rows: IdxSize, n_rows_right: IdxSize) -> IdxCa {
let take: NoNull<IdxCa> = (offset..total_rows)
.map(|i| i % n_rows_right)
.collect_trusted();
take.into_inner()
}
slice_take(total_rows, n_rows_right, slice, inner)
}
impl DataFrame {
pub(crate) fn cross_join(
&self,
other: &DataFrame,
suffix: Option<String>,
slice: Option<(i64, usize)>,
) -> PolarsResult<DataFrame> {
let n_rows_left = self.height() as IdxSize;
let n_rows_right = other.height() as IdxSize;
let total_rows = n_rows_right * n_rows_left;
let create_left_df = || {
unsafe { self.take_unchecked(&take_left(total_rows, n_rows_right, slice)) }
};
let create_right_df = || {
if n_rows_left > 100 || slice.is_some() {
unsafe { other.take_unchecked(&take_right(total_rows, n_rows_right, slice)) }
} else {
let iter = (0..n_rows_left).map(|_| other);
concat_df_unchecked(iter)
}
};
let (l_df, r_df) = POOL.install(|| rayon::join(create_left_df, create_right_df));
self.finish_join(l_df, r_df, suffix)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::df;
#[test]
fn test_cross_join() -> PolarsResult<()> {
let df_a = df![
"a" => [1, 2],
"b" => ["foo", "spam"]
]?;
let df_b = df![
"b" => ["a", "b", "c"]
]?;
let out = df_a.cross_join(&df_b, None, None)?;
let expected = df![
"a" => [1, 1, 1, 2, 2, 2],
"b" => ["foo", "foo", "foo", "spam", "spam", "spam"],
"b_right" => ["a", "b", "c", "a", "b", "c"]
]?;
assert!(out.frame_equal(&expected));
Ok(())
}
}