pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFramewhere
    I: IntoIterator<Item = &'a DataFrame>,
Expand description

Concat the DataFrames to a single DataFrame.

Examples found in repository?
src/frame/mod.rs (line 2546)
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }
More examples
Hide additional examples
src/frame/cross_join.rs (line 80)
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    fn cross_join_dfs(
        &self,
        other: &DataFrame,
        slice: Option<(i64, usize)>,
        parallel: bool,
    ) -> PolarsResult<(DataFrame, DataFrame)> {
        let n_rows_left = self.height() as IdxSize;
        let n_rows_right = other.height() as IdxSize;
        let Some(total_rows) = n_rows_left.checked_mul(n_rows_right) else {
            return Err(PolarsError::ComputeError("Cross joins would produce more rows than fits into 2^32.\n\
            Consider comping with polars-big-idx feature, or set 'streaming'.".into()))
        };

        // the left side has the Nth row combined with every row from right.
        // So let's say we have the following no. of rows
        // left: 3
        // right: 4
        //
        // left take idx:   000011112222
        // right take idx:  012301230123

        let create_left_df = || {
            // Safety:
            // take left is in bounds
            unsafe { self.take_unchecked(&take_left(total_rows, n_rows_right, slice)) }
        };

        let create_right_df = || {
            // concatenation of dataframes is very expensive if we need to make the series mutable
            // many times, these are atomic operations
            // so we choose a different strategy at > 100 rows (arbitrarily small number)
            if n_rows_left > 100 || slice.is_some() {
                // Safety:
                // take right is in bounds
                unsafe { other.take_unchecked(&take_right(total_rows, n_rows_right, slice)) }
            } else {
                let iter = (0..n_rows_left).map(|_| other);
                concat_df_unchecked(iter)
            }
        };
        let (l_df, r_df) = if parallel {
            POOL.install(|| rayon::join(create_left_df, create_right_df))
        } else {
            (create_left_df(), create_right_df())
        };
        Ok((l_df, r_df))
    }