Function polars_core::utils::concat_df_unchecked

source ·

pub fn concat_df_unchecked<'a, I>(dfs: I) -> DataFramewhere
    I: IntoIterator<Item = &'a DataFrame>,

Expand description

Concat the DataFrames to a single DataFrame.

Examples found in repository ?

src/frame/mod.rs (line 2546)

    pub fn describe(&self, percentiles: Option<&[f64]>) -> Self {
        fn describe_cast(df: &DataFrame) -> DataFrame {
            let mut columns: Vec<Series> = vec![];

            for s in df.columns.iter() {
                columns.push(s.cast(&DataType::Float64).expect("cast to float failed"));
            }

            DataFrame::new(columns).unwrap()
        }

        fn count(df: &DataFrame) -> DataFrame {
            let columns = df.apply_columns_par(&|s| Series::new(s.name(), [s.len() as IdxSize]));
            DataFrame::new_no_checks(columns)
        }

        let percentiles = percentiles.unwrap_or(&[0.25, 0.5, 0.75]);

        let mut headers: Vec<String> = vec![
            "count".to_string(),
            "mean".to_string(),
            "std".to_string(),
            "min".to_string(),
        ];

        let mut tmp: Vec<DataFrame> = vec![
            describe_cast(&count(self)),
            describe_cast(&self.mean()),
            describe_cast(&self.std(1)),
            describe_cast(&self.min()),
        ];

        for p in percentiles {
            tmp.push(describe_cast(
                &self
                    .quantile(*p, QuantileInterpolOptions::Linear)
                    .expect("quantile failed"),
            ));
            headers.push(format!("{}%", *p * 100.0));
        }

        // Keep order same as pandas
        tmp.push(describe_cast(&self.max()));
        headers.push("max".to_string());

        let mut summary = concat_df_unchecked(&tmp);

        summary
            .insert_at_idx(0, Series::new("describe", headers))
            .expect("insert of header failed");

        summary
    }

More examples

Hide additional examples

src/frame/cross_join.rs (line 80)

    fn cross_join_dfs(
        &self,
        other: &DataFrame,
        slice: Option<(i64, usize)>,
        parallel: bool,
    ) -> PolarsResult<(DataFrame, DataFrame)> {
        let n_rows_left = self.height() as IdxSize;
        let n_rows_right = other.height() as IdxSize;
        let Some(total_rows) = n_rows_left.checked_mul(n_rows_right) else {
            return Err(PolarsError::ComputeError("Cross joins would produce more rows than fits into 2^32.\n\
            Consider comping with polars-big-idx feature, or set 'streaming'.".into()))
        };

        // the left side has the Nth row combined with every row from right.
        // So let's say we have the following no. of rows
        // left: 3
        // right: 4
        //
        // left take idx:   000011112222
        // right take idx:  012301230123

        let create_left_df = || {
            // Safety:
            // take left is in bounds
            unsafe { self.take_unchecked(&take_left(total_rows, n_rows_right, slice)) }
        };

        let create_right_df = || {
            // concatenation of dataframes is very expensive if we need to make the series mutable
            // many times, these are atomic operations
            // so we choose a different strategy at > 100 rows (arbitrarily small number)
            if n_rows_left > 100 || slice.is_some() {
                // Safety:
                // take right is in bounds
                unsafe { other.take_unchecked(&take_right(total_rows, n_rows_right, slice)) }
            } else {
                let iter = (0..n_rows_left).map(|_| other);
                concat_df_unchecked(iter)
            }
        };
        let (l_df, r_df) = if parallel {
            POOL.install(|| rayon::join(create_left_df, create_right_df))
        } else {
            (create_left_df(), create_right_df())
        };
        Ok((l_df, r_df))
    }

Function polars_core::utils::concat_df_unchecked

Examples found in repository?

Examples found in repository ?