Skip to main content

nexcore_dataframe/
select.rs

1//! Column selection operations on DataFrames.
2
3use crate::column::Column;
4use crate::dataframe::DataFrame;
5use crate::error::DataFrameError;
6
7impl DataFrame {
8    /// Select a subset of columns by name. Returns an error if any column is not found.
9    pub fn select(&self, names: &[&str]) -> Result<Self, DataFrameError> {
10        let mut columns = Vec::with_capacity(names.len());
11        for name in names {
12            let col = self.column(name)?;
13            columns.push(col.clone());
14        }
15        Ok(Self::from_columns_unchecked(columns))
16    }
17
18    /// Drop columns by name. Silently ignores names not found.
19    #[must_use]
20    pub fn drop_columns(&self, names: &[&str]) -> Self {
21        let columns: Vec<Column> = self
22            .columns()
23            .iter()
24            .filter(|c| !names.contains(&c.name()))
25            .cloned()
26            .collect();
27        Self::from_columns_unchecked(columns)
28    }
29
30    /// Rename a column. Returns error if the column is not found.
31    pub fn rename_column(&self, old: &str, new: &str) -> Result<Self, DataFrameError> {
32        // Verify column exists — propagate error if not found
33        self.column(old)?;
34        let columns: Vec<Column> = self
35            .columns()
36            .iter()
37            .map(|c| {
38                if c.name() == old {
39                    c.rename(new)
40                } else {
41                    c.clone()
42                }
43            })
44            .collect();
45        Ok(Self::from_columns_unchecked(columns))
46    }
47}
48
49#[cfg(test)]
50mod tests {
51    use super::*;
52    use crate::scalar::Scalar;
53
54    #[test]
55    fn select_columns() {
56        let df = DataFrame::new(vec![
57            Column::from_i64s("a", vec![1, 2]),
58            Column::from_i64s("b", vec![3, 4]),
59            Column::from_i64s("c", vec![5, 6]),
60        ])
61        .unwrap_or_else(|_| unreachable!());
62
63        let selected = df.select(&["a", "c"]).unwrap_or_else(|_| unreachable!());
64        assert_eq!(selected.width(), 2);
65        assert_eq!(selected.column_names(), vec!["a", "c"]);
66    }
67
68    #[test]
69    fn select_missing_column() {
70        let df = DataFrame::new(vec![Column::from_i64s("a", vec![1])])
71            .unwrap_or_else(|_| unreachable!());
72        assert!(df.select(&["a", "missing"]).is_err());
73    }
74
75    #[test]
76    fn drop_columns_basic() {
77        let df = DataFrame::new(vec![
78            Column::from_i64s("a", vec![1]),
79            Column::from_i64s("b", vec![2]),
80            Column::from_i64s("c", vec![3]),
81        ])
82        .unwrap_or_else(|_| unreachable!());
83
84        let dropped = df.drop_columns(&["b"]);
85        assert_eq!(dropped.width(), 2);
86        assert_eq!(dropped.column_names(), vec!["a", "c"]);
87    }
88
89    #[test]
90    fn drop_columns_nonexistent_ignored() {
91        let df = DataFrame::new(vec![Column::from_i64s("a", vec![1])])
92            .unwrap_or_else(|_| unreachable!());
93        let dropped = df.drop_columns(&["missing"]);
94        assert_eq!(dropped.width(), 1);
95    }
96
97    #[test]
98    fn rename_column_basic() {
99        let df = DataFrame::new(vec![
100            Column::from_i64s("old_name", vec![1, 2]),
101            Column::from_i64s("keep", vec![3, 4]),
102        ])
103        .unwrap_or_else(|_| unreachable!());
104
105        let renamed = df
106            .rename_column("old_name", "new_name")
107            .unwrap_or_else(|_| unreachable!());
108        assert!(renamed.column("new_name").is_ok());
109        assert!(renamed.column("old_name").is_err());
110        assert_eq!(
111            renamed
112                .column("new_name")
113                .unwrap_or_else(|_| unreachable!())
114                .get(0),
115            Some(Scalar::Int64(1))
116        );
117    }
118
119    #[test]
120    fn rename_column_not_found() {
121        let df = DataFrame::new(vec![Column::from_i64s("a", vec![1])])
122            .unwrap_or_else(|_| unreachable!());
123        assert!(df.rename_column("missing", "new").is_err());
124    }
125}