pandrs/column/
boolean_column.rs

1use std::sync::Arc;
2use std::any::Any;
3
4use crate::column::common::{Column, ColumnTrait, ColumnType};
5use crate::core::column::BitMask;
6use crate::error::{Error, Result};
7
8/// Structure representing a boolean column (optimized with BitMask)
9#[derive(Debug, Clone)]
10pub struct BooleanColumn {
11    pub(crate) data: BitMask,
12    pub(crate) null_mask: Option<Arc<[u8]>>,
13    pub(crate) name: Option<String>,
14    pub(crate) length: usize,
15}
16
17impl BooleanColumn {
18    /// Create a new BooleanColumn from a vector of booleans
19    pub fn new(data: Vec<bool>) -> Self {
20        let length = data.len();
21        let bitmask = BitMask::from_bools(&data);
22        
23        Self {
24            data: bitmask,
25            null_mask: None,
26            name: None,
27            length,
28        }
29    }
30    
31    /// Create a named BooleanColumn
32    pub fn with_name(data: Vec<bool>, name: impl Into<String>) -> Self {
33        let length = data.len();
34        let bitmask = BitMask::from_bools(&data);
35        
36        Self {
37            data: bitmask,
38            null_mask: None,
39            name: Some(name.into()),
40            length,
41        }
42    }
43    
44    /// Create a BooleanColumn with NULL values
45    pub fn with_nulls(data: Vec<bool>, nulls: Vec<bool>) -> Self {
46        let null_mask = if nulls.iter().any(|&is_null| is_null) {
47            Some(crate::column::common::utils::create_bitmask(&nulls))
48        } else {
49            None
50        };
51        
52        let length = data.len();
53        let bitmask = BitMask::from_bools(&data);
54        
55        Self {
56            data: bitmask,
57            null_mask,
58            name: None,
59            length,
60        }
61    }
62    
63    /// Set the name
64    pub fn set_name(&mut self, name: impl Into<String>) {
65        self.name = Some(name.into());
66    }
67    
68    /// Get the name
69    pub fn get_name(&self) -> Option<&str> {
70        self.name.as_deref()
71    }
72    
73    /// Get a boolean value by index
74    pub fn get(&self, index: usize) -> Result<Option<bool>> {
75        if index >= self.length {
76            return Err(Error::IndexOutOfBounds {
77                index,
78                size: self.length,
79            });
80        }
81        
82        // Check for NULL value
83        if let Some(ref mask) = self.null_mask {
84            let byte_idx = index / 8;
85            let bit_idx = index % 8;
86            if byte_idx < mask.len() && (mask[byte_idx] & (1 << bit_idx)) != 0 {
87                return Ok(None);
88            }
89        }
90        
91        self.data.get(index).map(Some)
92    }
93    
94    /// Get all boolean values in the column
95    pub fn to_bools(&self) -> Vec<Option<bool>> {
96        let mut result = Vec::with_capacity(self.length);
97        
98        for i in 0..self.length {
99            result.push(self.get(i).unwrap_or(None));
100        }
101        
102        result
103    }
104    
105    /// Count the number of true values
106    pub fn count_true(&self) -> usize {
107        let mut count = 0;
108        
109        for i in 0..self.length {
110            if let Ok(Some(true)) = self.get(i) {
111                count += 1;
112            }
113        }
114        
115        count
116    }
117    
118    /// Count the number of false values
119    pub fn count_false(&self) -> usize {
120        let mut count = 0;
121        
122        for i in 0..self.length {
123            if let Ok(Some(false)) = self.get(i) {
124                count += 1;
125            }
126        }
127        
128        count
129    }
130    
131    /// Create a new column by applying a mapping function
132    pub fn map<F>(&self, f: F) -> Self 
133    where
134        F: Fn(bool) -> bool
135    {
136        let mut mapped_data = Vec::with_capacity(self.length);
137        let mut has_nulls = false;
138        
139        for i in 0..self.length {
140            match self.get(i) {
141                Ok(Some(b)) => mapped_data.push(f(b)),
142                Ok(None) => {
143                    has_nulls = true;
144                    mapped_data.push(false); // dummy value
145                },
146                Err(_) => {
147                    has_nulls = true;
148                    mapped_data.push(false); // dummy value
149                },
150            }
151        }
152        
153        if has_nulls {
154            let nulls = (0..self.length)
155                .map(|i| self.get(i).map(|opt| opt.is_none()).unwrap_or(true))
156                .collect();
157            
158            Self::with_nulls(mapped_data, nulls)
159        } else {
160            Self::new(mapped_data)
161        }
162    }
163    
164    /// Create a new column by applying logical NOT operation
165    pub fn logical_not(&self) -> Self {
166        self.map(|b| !b)
167    }
168    
169    /// Create a new column based on filtering conditions
170    pub fn filter<F>(&self, predicate: F) -> Self
171    where
172        F: Fn(Option<bool>) -> bool
173    {
174        let mut filtered_data = Vec::new();
175        let mut filtered_nulls = Vec::new();
176        let has_nulls = self.null_mask.is_some();
177        
178        for i in 0..self.length {
179            let value = self.get(i).unwrap_or(None);
180            if predicate(value) {
181                filtered_data.push(value.unwrap_or(false));
182                if has_nulls {
183                    filtered_nulls.push(value.is_none());
184                }
185            }
186        }
187        
188        if has_nulls {
189            Self::with_nulls(filtered_data, filtered_nulls)
190        } else {
191            Self::new(filtered_data)
192        }
193    }
194}
195
196impl ColumnTrait for BooleanColumn {
197    fn len(&self) -> usize {
198        self.length
199    }
200    
201    fn column_type(&self) -> ColumnType {
202        ColumnType::Boolean
203    }
204    
205    fn name(&self) -> Option<&str> {
206        self.name.as_deref()
207    }
208    
209    fn clone_column(&self) -> crate::core::column::Column {
210        // Convert the legacy Column type to the core Column type
211        let legacy_column = Column::Boolean(self.clone());
212        // This is a temporary workaround - in a complete solution,
213        // we would implement proper conversion between column types
214        crate::core::column::Column::from_any(Box::new(legacy_column))
215    }
216
217    fn as_any(&self) -> &dyn Any {
218        self
219    }
220}