Function polars_core::utils::concat_df_unchecked
source · pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFramewhere
I: IntoIterator<Item = &'a DataFrame>,Expand description
Concat the DataFrames to a single DataFrame.
Examples found in repository?
src/frame/mod.rs (line 2546)
2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553
pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
fn describe_cast(df: &DataFrame) -> DataFrame {
let mut columns: Vec<Series> = vec![];
for s in df.columns.iter() {
columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
}
DataFrame::new(columns).unwrap()
}
fn count(df: &DataFrame) -> DataFrame {
let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
DataFrame::new_no_checks(columns)
}
let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);
let mut headers: Vec<String> = vec![
"count".to_string(),
"mean".to_string(),
"std".to_string(),
"min".to_string(),
];
let mut tmp: Vec<DataFrame> = vec![
describe_cast(&count(self)),
describe_cast(&self.mean()),
describe_cast(&self.std(1)),
describe_cast(&self.min()),
];
for p in percentiles {
tmp.push(describe_cast(
&self
.quantile(*p, QuantileInterpolOptions::Linear)
.expect("quantile failed"),
));
headers.push(format!("{}%", *p * 100.0));
}
// Keep order same as pandas
tmp.push(describe_cast(&self.max()));
headers.push("max".to_string());
let mut summary = concat_df_unchecked(&tmp);
summary
.insert_at_idx(0, Series::new("describe", headers))
.expect("insert of header failed");
summary
}More examples
src/frame/cross_join.rs (line 80)
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
fn cross_join_dfs(
&self,
other: &DataFrame,
slice: Option<(i64, usize)>,
parallel: bool,
) -> PolarsResult<(DataFrame, DataFrame)> {
let n_rows_left = self.height() as IdxSize;
let n_rows_right = other.height() as IdxSize;
let Some(total_rows) = n_rows_left.checked_mul(n_rows_right) else {
return Err(PolarsError::ComputeError("Cross joins would produce more rows than fits into 2^32.\n\
Consider comping with polars-big-idx feature, or set 'streaming'.".into()))
};
// the left side has the Nth row combined with every row from right.
// So let's say we have the following no. of rows
// left: 3
// right: 4
//
// left take idx: 000011112222
// right take idx: 012301230123
let create_left_df = || {
// Safety:
// take left is in bounds
unsafe { self.take_unchecked(&take_left(total_rows, n_rows_right, slice)) }
};
let create_right_df = || {
// concatenation of dataframes is very expensive if we need to make the series mutable
// many times, these are atomic operations
// so we choose a different strategy at > 100 rows (arbitrarily small number)
if n_rows_left > 100 || slice.is_some() {
// Safety:
// take right is in bounds
unsafe { other.take_unchecked(&take_right(total_rows, n_rows_right, slice)) }
} else {
let iter = (0..n_rows_left).map(|_| other);
concat_df_unchecked(iter)
}
};
let (l_df, r_df) = if parallel {
POOL.install(|| rayon::join(create_left_df, create_right_df))
} else {
(create_left_df(), create_right_df())
};
Ok((l_df, r_df))
}