use polars::prelude::*;
pub fn remove_null_columns(df: DataFrame) -> PolarsResult<DataFrame> {
let (cols_to_keep, cols_to_remove): (Vec<&Column>, Vec<&Column>) = df
.columns()
.iter()
.partition(|col| col.is_not_null().any());
let columns_to_keep: Vec<PlSmallStr> =
cols_to_keep.iter().map(|col| col.name().clone()).collect();
let columns_to_remove: Vec<&PlSmallStr> = cols_to_remove.iter().map(|col| col.name()).collect();
tracing::debug!(removed_columns = ?columns_to_remove,
"{} columns removed for being fully null",
cols_to_remove.len()
);
df.select(columns_to_keep)
}
#[cfg(test)]
mod tests_remove_null_columns {
use super::*;
#[test]
fn remove_some_all_null_columns() -> PolarsResult<()> {
let all_null_int = Series::full_null("all_null_int".into(), 3, &DataType::Int32);
let all_null_str = Series::full_null("all_null_str".into(), 3, &DataType::String);
let df_input = df!(
"col_a" => &[Some(1), None, Some(3)],
"col_b" => &all_null_int, "col_c" => &[None, Some("hello"), None],
"col_d" => &all_null_str, "col_e" => &[Some(1.1), Some(2.2), None]
)?;
let df_expected = df!(
"col_a" => &[Some(1), None, Some(3)],
"col_c" => &[None, Some("hello"), None],
"col_e" => &[Some(1.1), Some(2.2), None]
)?;
dbg!(&df_input);
let df_output = remove_null_columns(df_input)?;
dbg!(&df_output);
assert!(
df_output.equals_missing(&df_expected),
"Failed removing some null columns.\nOutput:\n{df_output:?}\nExpected:\n{df_expected:?}"
);
Ok(())
}
#[test]
fn no_null_columns_to_remove() -> PolarsResult<()> {
let df_input = df!(
"col_a" => &[Some(1), Some(2)],
"col_b" => &[None, Some(true)], "col_c" => &[Some("a"), Some("b")]
)?;
let df_expected = df_input.clone();
dbg!(&df_input);
let df_output = remove_null_columns(df_input)?;
dbg!(&df_output);
assert!(
df_output.equals_missing(&df_expected),
"Failed when no columns should be removed.\nOutput:\n{df_output:?}\nExpected:\n{df_expected:?}"
);
Ok(())
}
#[test]
fn empty_dataframe_input() -> PolarsResult<()> {
let df_input = DataFrame::default(); let df_expected = DataFrame::default();
dbg!(&df_input);
let df_output = remove_null_columns(df_input)?;
dbg!(&df_output);
assert!(
df_output.equals(&df_expected),
"Failed for empty input DataFrame.\nOutput:\n{df_output:?}\nExpected:\n{df_expected:?}"
);
Ok(())
}
#[test]
fn all_columns_are_all_null() -> PolarsResult<()> {
let height = 2;
let all_null_col1 = Column::full_null("all_null_1".into(), height, &DataType::Float64);
let all_null_col2 = Column::full_null("all_null_2".into(), height, &DataType::Boolean);
let df_input = DataFrame::new(height, vec![all_null_col1, all_null_col2])?;
let df_expected = df_input.select(Vec::<PlSmallStr>::new())?;
dbg!(&df_input);
let df_output = remove_null_columns(df_input)?;
dbg!(&df_output);
assert!(
df_output.equals(&df_expected),
"Failed when all columns are null.\nOutput:\n{df_output:?}\nExpected:\n{df_expected:?}"
);
assert_eq!(df_output.shape(), (2, 0), "Output shape mismatch");
assert_eq!(df_expected.shape(), (2, 0), "Expected shape mismatch");
Ok(())
}
#[test]
fn dataframe_with_zero_rows() -> PolarsResult<()> {
let df_input = df!(
"col_a" => &Vec::<Option<i32>>::new(),
"col_b" => &Vec::<Option<String>>::new()
)?;
let df_expected = DataFrame::default();
dbg!(&df_input);
let df_output = remove_null_columns(df_input)?;
dbg!(&df_output);
assert!(
df_output.equals(&df_expected),
"Failed for DataFrame with zero rows.\nOutput:\n{df_output:?}\nExpected:\n{df_expected:?}"
);
Ok(())
}
}