Skip to main content

u_insight/
dataframe.rs

1//! Column-major DataFrame for tabular data.
2//!
3//! The [`DataFrame`] stores data in column-major order with typed columns
4//! and a compact validity bitmap for tracking missing values.
5//!
6//! # Column Types
7//!
8//! | Type | Storage | Use case |
9//! |------|---------|----------|
10//! | [`Numeric`](Column::Numeric) | `Vec<f64>` + bitmap | Continuous/integer values |
11//! | [`Boolean`](Column::Boolean) | `Vec<bool>` + bitmap | True/false values |
12//! | [`Categorical`](Column::Categorical) | Dictionary + `Vec<u32>` | Low-cardinality strings |
13//! | [`Text`](Column::Text) | `Vec<String>` + bitmap | High-cardinality strings |
14//!
15//! # Example
16//!
17//! ```
18//! use u_insight::dataframe::{DataFrame, Column, ValidityBitmap};
19//!
20//! let mut df = DataFrame::new();
21//! df.add_column(
22//!     "temperature".to_string(),
23//!     Column::numeric(vec![20.5, 21.3, 19.8], ValidityBitmap::all_valid(3)),
24//! ).unwrap();
25//! assert_eq!(df.row_count(), 3);
26//! assert_eq!(df.column_count(), 1);
27//! ```
28
29use crate::error::InsightError;
30
31// ── ValidityBitmap ────────────────────────────────────────────────────
32
33/// Bit-packed validity bitmap using `Vec<u64>`.
34///
35/// Each bit indicates whether the corresponding row is valid (1) or
36/// missing/null (0). Uses 1 bit per row instead of 1 byte, yielding
37/// 8× memory savings over `Vec<bool>`.
38#[derive(Debug, Clone, PartialEq)]
39pub struct ValidityBitmap {
40    bits: Vec<u64>,
41    len: usize,
42}
43
44impl ValidityBitmap {
45    /// Creates a bitmap where all `len` positions are valid.
46    pub fn all_valid(len: usize) -> Self {
47        let n_words = len.div_ceil(64);
48        let mut bits = vec![u64::MAX; n_words];
49        let trailing = len % 64;
50        if trailing != 0 && n_words > 0 {
51            bits[n_words - 1] = (1u64 << trailing) - 1;
52        }
53        Self { bits, len }
54    }
55
56    /// Creates a bitmap where all `len` positions are invalid (null).
57    pub fn all_invalid(len: usize) -> Self {
58        let n_words = len.div_ceil(64);
59        Self {
60            bits: vec![0u64; n_words],
61            len,
62        }
63    }
64
65    /// Creates an empty bitmap with no rows.
66    pub fn empty() -> Self {
67        Self {
68            bits: Vec::new(),
69            len: 0,
70        }
71    }
72
73    /// Returns `true` if the value at `idx` is valid (not null).
74    #[inline]
75    pub fn is_valid(&self, idx: usize) -> bool {
76        debug_assert!(
77            idx < self.len,
78            "index {idx} out of bounds (len={})",
79            self.len
80        );
81        let (word, bit) = (idx / 64, idx % 64);
82        (self.bits[word] >> bit) & 1 == 1
83    }
84
85    /// Marks position `idx` as valid.
86    #[inline]
87    pub fn set_valid(&mut self, idx: usize) {
88        debug_assert!(
89            idx < self.len,
90            "index {idx} out of bounds (len={})",
91            self.len
92        );
93        let (word, bit) = (idx / 64, idx % 64);
94        self.bits[word] |= 1u64 << bit;
95    }
96
97    /// Marks position `idx` as invalid (null).
98    #[inline]
99    pub fn set_invalid(&mut self, idx: usize) {
100        debug_assert!(
101            idx < self.len,
102            "index {idx} out of bounds (len={})",
103            self.len
104        );
105        let (word, bit) = (idx / 64, idx % 64);
106        self.bits[word] &= !(1u64 << bit);
107    }
108
109    /// Appends a new position (valid or invalid).
110    pub fn push(&mut self, valid: bool) {
111        let idx = self.len;
112        self.len += 1;
113        let word = idx / 64;
114        let bit = idx % 64;
115        if word >= self.bits.len() {
116            self.bits.push(0);
117        }
118        if valid {
119            self.bits[word] |= 1u64 << bit;
120        }
121    }
122
123    /// Returns the total number of tracked positions.
124    #[inline]
125    pub fn len(&self) -> usize {
126        self.len
127    }
128
129    /// Returns `true` if the bitmap tracks zero positions.
130    #[inline]
131    pub fn is_empty(&self) -> bool {
132        self.len == 0
133    }
134
135    /// Counts the number of null (invalid) positions.
136    ///
137    /// Uses hardware `POPCNT` instruction for fast counting.
138    pub fn null_count(&self) -> usize {
139        let valid_count: usize = self.bits.iter().map(|w| w.count_ones() as usize).sum();
140        self.len - valid_count
141    }
142
143    /// Counts the number of valid (non-null) positions.
144    pub fn valid_count(&self) -> usize {
145        self.len - self.null_count()
146    }
147
148    /// Returns `true` if any position is null.
149    pub fn has_nulls(&self) -> bool {
150        self.null_count() > 0
151    }
152
153    /// Returns an iterator over indices of valid positions.
154    pub fn valid_indices(&self) -> ValidIndicesIter<'_> {
155        ValidIndicesIter {
156            bitmap: self,
157            current: 0,
158        }
159    }
160}
161
162/// Iterator over valid indices in a [`ValidityBitmap`].
163pub struct ValidIndicesIter<'a> {
164    bitmap: &'a ValidityBitmap,
165    current: usize,
166}
167
168impl<'a> Iterator for ValidIndicesIter<'a> {
169    type Item = usize;
170
171    fn next(&mut self) -> Option<usize> {
172        while self.current < self.bitmap.len {
173            let idx = self.current;
174            self.current += 1;
175            if self.bitmap.is_valid(idx) {
176                return Some(idx);
177            }
178        }
179        None
180    }
181
182    fn size_hint(&self) -> (usize, Option<usize>) {
183        (0, Some(self.bitmap.len - self.current))
184    }
185}
186
187// ── DataType ──────────────────────────────────────────────────────────
188
189/// Semantic data type inferred for a column.
190#[derive(Debug, Clone, Copy, PartialEq, Eq)]
191pub enum DataType {
192    /// Continuous or integer numeric values (stored as `f64`).
193    Numeric,
194    /// Boolean (true/false) values.
195    Boolean,
196    /// Low-cardinality strings (dictionary-encoded).
197    Categorical,
198    /// High-cardinality or free-form text.
199    Text,
200}
201
202impl std::fmt::Display for DataType {
203    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
204        match self {
205            Self::Numeric => write!(f, "Numeric"),
206            Self::Boolean => write!(f, "Boolean"),
207            Self::Categorical => write!(f, "Categorical"),
208            Self::Text => write!(f, "Text"),
209        }
210    }
211}
212
213// ── Column ────────────────────────────────────────────────────────────
214
215/// A typed column with validity bitmap for missing values.
216///
217/// All variants store values in a dense array alongside a
218/// [`ValidityBitmap`]. Invalid positions hold a default value
219/// (0.0, false, empty string, or index 0) that should be ignored.
220#[derive(Debug, Clone, PartialEq)]
221pub enum Column {
222    /// Dense `f64` values. Null positions hold `0.0`.
223    Numeric {
224        values: Vec<f64>,
225        validity: ValidityBitmap,
226    },
227    /// Boolean values. Null positions hold `false`.
228    Boolean {
229        values: Vec<bool>,
230        validity: ValidityBitmap,
231    },
232    /// Dictionary-encoded categorical column.
233    ///
234    /// `dictionary` contains unique string values.
235    /// `indices` maps each row to a dictionary index.
236    /// Null positions have index `0` (ignored via validity bit).
237    Categorical {
238        dictionary: Vec<String>,
239        indices: Vec<u32>,
240        validity: ValidityBitmap,
241    },
242    /// Free-form text column. Null positions hold an empty string.
243    Text {
244        values: Vec<String>,
245        validity: ValidityBitmap,
246    },
247}
248
249impl Column {
250    /// Creates a numeric column.
251    pub fn numeric(values: Vec<f64>, validity: ValidityBitmap) -> Self {
252        Self::Numeric { values, validity }
253    }
254
255    /// Creates a boolean column.
256    pub fn boolean(values: Vec<bool>, validity: ValidityBitmap) -> Self {
257        Self::Boolean { values, validity }
258    }
259
260    /// Creates a categorical column from a dictionary and indices.
261    pub fn categorical(
262        dictionary: Vec<String>,
263        indices: Vec<u32>,
264        validity: ValidityBitmap,
265    ) -> Self {
266        Self::Categorical {
267            dictionary,
268            indices,
269            validity,
270        }
271    }
272
273    /// Creates a text column.
274    pub fn text(values: Vec<String>, validity: ValidityBitmap) -> Self {
275        Self::Text { values, validity }
276    }
277
278    /// Returns the data type of this column.
279    pub fn data_type(&self) -> DataType {
280        match self {
281            Self::Numeric { .. } => DataType::Numeric,
282            Self::Boolean { .. } => DataType::Boolean,
283            Self::Categorical { .. } => DataType::Categorical,
284            Self::Text { .. } => DataType::Text,
285        }
286    }
287
288    /// Returns the number of rows in this column.
289    pub fn len(&self) -> usize {
290        self.validity().len()
291    }
292
293    /// Returns `true` if the column has no rows.
294    pub fn is_empty(&self) -> bool {
295        self.len() == 0
296    }
297
298    /// Returns a reference to the validity bitmap.
299    pub fn validity(&self) -> &ValidityBitmap {
300        match self {
301            Self::Numeric { validity, .. }
302            | Self::Boolean { validity, .. }
303            | Self::Categorical { validity, .. }
304            | Self::Text { validity, .. } => validity,
305        }
306    }
307
308    /// Returns the number of null values.
309    pub fn null_count(&self) -> usize {
310        self.validity().null_count()
311    }
312
313    /// Returns the number of valid (non-null) values.
314    pub fn valid_count(&self) -> usize {
315        self.validity().valid_count()
316    }
317
318    /// Returns `true` if the value at `idx` is valid (not null).
319    pub fn is_valid(&self, idx: usize) -> bool {
320        self.validity().is_valid(idx)
321    }
322
323    /// Returns the numeric values, or `None` if not a numeric column.
324    pub fn as_numeric(&self) -> Option<&[f64]> {
325        match self {
326            Self::Numeric { values, .. } => Some(values),
327            _ => None,
328        }
329    }
330
331    /// Returns the boolean values, or `None` if not a boolean column.
332    pub fn as_boolean(&self) -> Option<&[bool]> {
333        match self {
334            Self::Boolean { values, .. } => Some(values),
335            _ => None,
336        }
337    }
338
339    /// Returns valid numeric values (nulls excluded) as a new `Vec<f64>`.
340    pub fn valid_numeric_values(&self) -> Option<Vec<f64>> {
341        match self {
342            Self::Numeric { values, validity } => {
343                let result: Vec<f64> = validity.valid_indices().map(|i| values[i]).collect();
344                Some(result)
345            }
346            _ => None,
347        }
348    }
349
350    /// Returns the category string for a given row index in a categorical column.
351    pub fn category_at(&self, idx: usize) -> Option<&str> {
352        match self {
353            Self::Categorical {
354                dictionary,
355                indices,
356                validity,
357            } => {
358                if validity.is_valid(idx) {
359                    dictionary.get(indices[idx] as usize).map(|s| s.as_str())
360                } else {
361                    None
362                }
363            }
364            _ => None,
365        }
366    }
367
368    /// Returns the text value for a given row index in a text column.
369    pub fn text_at(&self, idx: usize) -> Option<&str> {
370        match self {
371            Self::Text { values, validity } => {
372                if validity.is_valid(idx) {
373                    Some(&values[idx])
374                } else {
375                    None
376                }
377            }
378            _ => None,
379        }
380    }
381}
382
383// ── DataFrame ─────────────────────────────────────────────────────────
384
385/// Column-major tabular data structure.
386///
387/// Stores named columns of typed data. All columns must have the same
388/// number of rows. Supports numeric, boolean, categorical, and text
389/// column types.
390///
391/// # Example
392///
393/// ```
394/// use u_insight::dataframe::{DataFrame, Column, ValidityBitmap};
395///
396/// let mut df = DataFrame::new();
397/// df.add_column(
398///     "x".to_string(),
399///     Column::numeric(vec![1.0, 2.0, 3.0], ValidityBitmap::all_valid(3)),
400/// ).unwrap();
401/// df.add_column(
402///     "label".to_string(),
403///     Column::text(
404///         vec!["a".into(), "b".into(), "c".into()],
405///         ValidityBitmap::all_valid(3),
406///     ),
407/// ).unwrap();
408/// assert_eq!(df.row_count(), 3);
409/// assert_eq!(df.column_count(), 2);
410/// ```
411#[derive(Debug, Clone)]
412pub struct DataFrame {
413    names: Vec<String>,
414    columns: Vec<Column>,
415    row_count: usize,
416}
417
418impl DataFrame {
419    /// Creates an empty DataFrame with no columns or rows.
420    pub fn new() -> Self {
421        Self {
422            names: Vec::new(),
423            columns: Vec::new(),
424            row_count: 0,
425        }
426    }
427
428    /// Adds a named column to the DataFrame.
429    ///
430    /// Returns an error if the column length doesn't match the existing
431    /// row count (unless this is the first column).
432    pub fn add_column(&mut self, name: String, column: Column) -> Result<(), InsightError> {
433        let col_len = column.len();
434        if self.columns.is_empty() {
435            self.row_count = col_len;
436        } else if col_len != self.row_count {
437            return Err(InsightError::DimensionMismatch {
438                expected: self.row_count,
439                actual: col_len,
440            });
441        }
442        self.names.push(name);
443        self.columns.push(column);
444        Ok(())
445    }
446
447    /// Returns the number of rows.
448    #[inline]
449    pub fn row_count(&self) -> usize {
450        self.row_count
451    }
452
453    /// Returns the number of columns.
454    #[inline]
455    pub fn column_count(&self) -> usize {
456        self.columns.len()
457    }
458
459    /// Returns `true` if the DataFrame has no columns.
460    pub fn is_empty(&self) -> bool {
461        self.columns.is_empty()
462    }
463
464    /// Returns column names.
465    pub fn column_names(&self) -> &[String] {
466        &self.names
467    }
468
469    /// Returns a reference to the column at `index`.
470    pub fn column(&self, index: usize) -> Option<&Column> {
471        self.columns.get(index)
472    }
473
474    /// Returns a reference to the column with the given `name`.
475    pub fn column_by_name(&self, name: &str) -> Option<&Column> {
476        self.column_index(name).map(|i| &self.columns[i])
477    }
478
479    /// Returns the index of the column with the given `name`.
480    pub fn column_index(&self, name: &str) -> Option<usize> {
481        self.names.iter().position(|n| n == name)
482    }
483
484    /// Returns an iterator over (name, column) pairs.
485    pub fn iter(&self) -> impl Iterator<Item = (&str, &Column)> {
486        self.names
487            .iter()
488            .map(|s| s.as_str())
489            .zip(self.columns.iter())
490    }
491
492    /// Returns a summary of column data types.
493    pub fn schema(&self) -> Vec<(&str, DataType)> {
494        self.names
495            .iter()
496            .zip(self.columns.iter())
497            .map(|(name, col)| (name.as_str(), col.data_type()))
498            .collect()
499    }
500
501    /// Returns the total number of null values across all columns.
502    pub fn total_null_count(&self) -> usize {
503        self.columns.iter().map(|c| c.null_count()).sum()
504    }
505}
506
507impl Default for DataFrame {
508    fn default() -> Self {
509        Self::new()
510    }
511}
512
513// ── Tests ─────────────────────────────────────────────────────────────
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518
519    // ── ValidityBitmap tests ──────────────────────────────────────
520
521    #[test]
522    fn bitmap_all_valid() {
523        let bm = ValidityBitmap::all_valid(100);
524        assert_eq!(bm.len(), 100);
525        assert_eq!(bm.null_count(), 0);
526        assert_eq!(bm.valid_count(), 100);
527        for i in 0..100 {
528            assert!(bm.is_valid(i));
529        }
530    }
531
532    #[test]
533    fn bitmap_all_invalid() {
534        let bm = ValidityBitmap::all_invalid(100);
535        assert_eq!(bm.null_count(), 100);
536        assert_eq!(bm.valid_count(), 0);
537        for i in 0..100 {
538            assert!(!bm.is_valid(i));
539        }
540    }
541
542    #[test]
543    fn bitmap_set_operations() {
544        let mut bm = ValidityBitmap::all_valid(10);
545        bm.set_invalid(3);
546        bm.set_invalid(7);
547        assert_eq!(bm.null_count(), 2);
548        assert!(!bm.is_valid(3));
549        assert!(!bm.is_valid(7));
550        assert!(bm.is_valid(0));
551        assert!(bm.is_valid(9));
552
553        bm.set_valid(3);
554        assert!(bm.is_valid(3));
555        assert_eq!(bm.null_count(), 1);
556    }
557
558    #[test]
559    fn bitmap_push() {
560        let mut bm = ValidityBitmap::empty();
561        bm.push(true);
562        bm.push(false);
563        bm.push(true);
564        assert_eq!(bm.len(), 3);
565        assert!(bm.is_valid(0));
566        assert!(!bm.is_valid(1));
567        assert!(bm.is_valid(2));
568        assert_eq!(bm.null_count(), 1);
569    }
570
571    #[test]
572    fn bitmap_boundary_64() {
573        let bm = ValidityBitmap::all_valid(64);
574        assert_eq!(bm.bits.len(), 1);
575        assert_eq!(bm.null_count(), 0);
576
577        let bm65 = ValidityBitmap::all_valid(65);
578        assert_eq!(bm65.bits.len(), 2);
579        assert_eq!(bm65.null_count(), 0);
580        assert!(bm65.is_valid(64));
581    }
582
583    #[test]
584    fn bitmap_push_across_word_boundary() {
585        let mut bm = ValidityBitmap::empty();
586        for i in 0..128 {
587            bm.push(i % 3 != 0); // every 3rd is null
588        }
589        assert_eq!(bm.len(), 128);
590        let expected_nulls = (0..128).filter(|i| i % 3 == 0).count();
591        assert_eq!(bm.null_count(), expected_nulls);
592    }
593
594    #[test]
595    fn bitmap_valid_indices() {
596        let mut bm = ValidityBitmap::all_valid(5);
597        bm.set_invalid(1);
598        bm.set_invalid(3);
599        let indices: Vec<usize> = bm.valid_indices().collect();
600        assert_eq!(indices, vec![0, 2, 4]);
601    }
602
603    // ── Column tests ─────────────────────────────────────────────
604
605    #[test]
606    fn numeric_column_basics() {
607        let col = Column::numeric(vec![1.0, 2.0, 3.0], ValidityBitmap::all_valid(3));
608        assert_eq!(col.data_type(), DataType::Numeric);
609        assert_eq!(col.len(), 3);
610        assert_eq!(col.null_count(), 0);
611        assert_eq!(col.as_numeric(), Some(&[1.0, 2.0, 3.0][..]));
612    }
613
614    #[test]
615    fn numeric_column_with_nulls() {
616        let mut validity = ValidityBitmap::all_valid(4);
617        validity.set_invalid(1);
618        validity.set_invalid(3);
619        let col = Column::numeric(vec![1.0, 0.0, 3.0, 0.0], validity);
620        assert_eq!(col.null_count(), 2);
621        assert_eq!(col.valid_count(), 2);
622        assert!(col.is_valid(0));
623        assert!(!col.is_valid(1));
624        let valid = col.valid_numeric_values().expect("numeric column");
625        assert_eq!(valid, vec![1.0, 3.0]);
626    }
627
628    #[test]
629    fn boolean_column() {
630        let col = Column::boolean(vec![true, false, true], ValidityBitmap::all_valid(3));
631        assert_eq!(col.data_type(), DataType::Boolean);
632        assert_eq!(col.as_boolean(), Some(&[true, false, true][..]));
633    }
634
635    #[test]
636    fn categorical_column() {
637        let dict = vec!["low".into(), "med".into(), "high".into()];
638        let indices = vec![0, 1, 2, 1, 0];
639        let col = Column::categorical(dict, indices, ValidityBitmap::all_valid(5));
640        assert_eq!(col.data_type(), DataType::Categorical);
641        assert_eq!(col.category_at(0), Some("low"));
642        assert_eq!(col.category_at(1), Some("med"));
643        assert_eq!(col.category_at(2), Some("high"));
644        assert_eq!(col.category_at(3), Some("med"));
645    }
646
647    #[test]
648    fn categorical_column_with_null() {
649        let dict = vec!["a".into(), "b".into()];
650        let indices = vec![0, 0, 1];
651        let mut validity = ValidityBitmap::all_valid(3);
652        validity.set_invalid(1);
653        let col = Column::categorical(dict, indices, validity);
654        assert_eq!(col.category_at(0), Some("a"));
655        assert_eq!(col.category_at(1), None);
656        assert_eq!(col.category_at(2), Some("b"));
657    }
658
659    #[test]
660    fn text_column() {
661        let col = Column::text(
662            vec!["hello".into(), "world".into()],
663            ValidityBitmap::all_valid(2),
664        );
665        assert_eq!(col.data_type(), DataType::Text);
666        assert_eq!(col.text_at(0), Some("hello"));
667        assert_eq!(col.text_at(1), Some("world"));
668    }
669
670    #[test]
671    fn text_column_with_null() {
672        let mut validity = ValidityBitmap::all_valid(2);
673        validity.set_invalid(0);
674        let col = Column::text(vec![String::new(), "world".into()], validity);
675        assert_eq!(col.text_at(0), None);
676        assert_eq!(col.text_at(1), Some("world"));
677    }
678
679    // ── DataFrame tests ──────────────────────────────────────────
680
681    #[test]
682    fn empty_dataframe() {
683        let df = DataFrame::new();
684        assert_eq!(df.row_count(), 0);
685        assert_eq!(df.column_count(), 0);
686        assert!(df.is_empty());
687    }
688
689    #[test]
690    fn add_columns() {
691        let mut df = DataFrame::new();
692        df.add_column(
693            "x".to_string(),
694            Column::numeric(vec![1.0, 2.0, 3.0], ValidityBitmap::all_valid(3)),
695        )
696        .expect("first column");
697
698        df.add_column(
699            "y".to_string(),
700            Column::numeric(vec![4.0, 5.0, 6.0], ValidityBitmap::all_valid(3)),
701        )
702        .expect("second column");
703
704        assert_eq!(df.row_count(), 3);
705        assert_eq!(df.column_count(), 2);
706        assert_eq!(df.column_names(), &["x", "y"]);
707    }
708
709    #[test]
710    fn column_length_mismatch() {
711        let mut df = DataFrame::new();
712        df.add_column(
713            "x".to_string(),
714            Column::numeric(vec![1.0, 2.0], ValidityBitmap::all_valid(2)),
715        )
716        .unwrap();
717
718        let result = df.add_column(
719            "y".to_string(),
720            Column::numeric(vec![1.0, 2.0, 3.0], ValidityBitmap::all_valid(3)),
721        );
722        assert!(result.is_err());
723    }
724
725    #[test]
726    fn column_by_name_lookup() {
727        let mut df = DataFrame::new();
728        df.add_column(
729            "temp".to_string(),
730            Column::numeric(vec![20.5, 21.3], ValidityBitmap::all_valid(2)),
731        )
732        .unwrap();
733
734        let col = df.column_by_name("temp").expect("found");
735        assert_eq!(col.data_type(), DataType::Numeric);
736
737        assert!(df.column_by_name("missing").is_none());
738    }
739
740    #[test]
741    fn dataframe_schema() {
742        let mut df = DataFrame::new();
743        df.add_column(
744            "x".to_string(),
745            Column::numeric(vec![1.0], ValidityBitmap::all_valid(1)),
746        )
747        .unwrap();
748        df.add_column(
749            "ok".to_string(),
750            Column::boolean(vec![true], ValidityBitmap::all_valid(1)),
751        )
752        .unwrap();
753        df.add_column(
754            "label".to_string(),
755            Column::text(vec!["a".into()], ValidityBitmap::all_valid(1)),
756        )
757        .unwrap();
758
759        let schema = df.schema();
760        assert_eq!(schema[0], ("x", DataType::Numeric));
761        assert_eq!(schema[1], ("ok", DataType::Boolean));
762        assert_eq!(schema[2], ("label", DataType::Text));
763    }
764
765    #[test]
766    fn total_null_count() {
767        let mut df = DataFrame::new();
768        let mut v1 = ValidityBitmap::all_valid(3);
769        v1.set_invalid(1);
770        let mut v2 = ValidityBitmap::all_valid(3);
771        v2.set_invalid(0);
772        v2.set_invalid(2);
773        df.add_column("a".into(), Column::numeric(vec![1.0, 0.0, 3.0], v1))
774            .unwrap();
775        df.add_column("b".into(), Column::numeric(vec![0.0, 5.0, 0.0], v2))
776            .unwrap();
777        assert_eq!(df.total_null_count(), 3);
778    }
779
780    #[test]
781    fn dataframe_iter() {
782        let mut df = DataFrame::new();
783        df.add_column(
784            "x".into(),
785            Column::numeric(vec![1.0], ValidityBitmap::all_valid(1)),
786        )
787        .unwrap();
788        df.add_column(
789            "y".into(),
790            Column::numeric(vec![2.0], ValidityBitmap::all_valid(1)),
791        )
792        .unwrap();
793
794        let pairs: Vec<(&str, DataType)> = df.iter().map(|(n, c)| (n, c.data_type())).collect();
795        assert_eq!(
796            pairs,
797            vec![("x", DataType::Numeric), ("y", DataType::Numeric)]
798        );
799    }
800}