1use ndarray::{Array, Array1, Array2, ArrayView1, ArrayView2, Axis, ShapeBuilder};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5use crate::dataframe::column_store::typed_array::{TypedData, TypedDataArray};
6use crate::error::Error;
7use crate::{dataframe::index::Index, CandidateData, JoinBy, JoinRelation, Key};
8use data_value::{DataValue, Extract};
9use tracing::*;
10mod from;
11mod key_index;
12mod ops;
13pub mod sorted_df;
15pub use key_index::KeyIndex;
16pub mod filter_df;
18pub mod typed_array;
20
21#[derive(Debug, Clone, Default, PartialEq, Serialize)]
32pub struct ColumnFrame {
33 pub index: KeyIndex,
34 pub data_frame: Vec<TypedDataArray>,
35}
36
37impl<'de> Deserialize<'de> for ColumnFrame {
41 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
42 where
43 D: serde::Deserializer<'de>,
44 {
45 #[derive(Debug, Deserialize)]
48 #[serde(untagged)]
49 enum WireDataframe {
50 V3(Vec<TypedDataArray>),
51 V2(Vec<TypedData>),
52 V1(Array2<DataValue>),
53 }
54 #[derive(Debug, Deserialize)]
55 struct WireData {
56 index: KeyIndex,
57 data_frame: WireDataframe,
58 }
59
60 let helper = WireData::deserialize(deserializer)?;
61 match helper.data_frame {
62 WireDataframe::V1(data_frame) => {
63 let ncols = data_frame.ncols();
64 let data_frame: Vec<TypedDataArray> = (0..ncols)
65 .map(|i| {
66 let values: Vec<DataValue> = data_frame.column(i).iter().cloned().collect();
67 let dtype = helper
68 .index
69 .get_keys()
70 .get(i)
71 .map(|k| k.ctype)
72 .unwrap_or(crate::DataType::Unknown);
73 TypedDataArray::new(dtype, values)
74 })
75 .collect();
76 Ok(ColumnFrame {
77 index: helper.index,
78 data_frame,
79 })
80 }
81 WireDataframe::V2(data_frame) => {
82 let data_frame = data_frame.into_iter().map(TypedDataArray::from).collect();
83 Ok(ColumnFrame {
84 index: helper.index,
85 data_frame,
86 })
87 }
88 WireDataframe::V3(data_frame) => Ok(ColumnFrame {
89 index: helper.index,
90 data_frame,
91 }),
92 }
93 }
94}
95
96enum Continue {
97 Continue,
98 End,
99}
100
101impl Continue {
102 pub fn should_end(&self) -> bool {
103 matches!(self, Self::End)
104 }
105}
106
107use std::fmt;
108
109impl fmt::Display for ColumnFrame {
110 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111 write!(f, "\n|")?;
113
114 for key in &self.index.keys {
115 write!(f, " {key} |")?;
116 }
117
118 if self.index.is_empty() {
119 writeln!(f, "|")?;
120 }
121
122 write!(f, "\n|")?;
124 for value in self.index.keys.iter() {
125 write!(f, " {:10?} |", value.ctype)?;
127 }
128 writeln!(f)?;
129
130 writeln!(f, "---")?;
131
132 for row_idx in 0..std::cmp::min(self.nrows(), 257) {
134 write!(f, "|")?;
135 for col in self.data_frame.iter() {
136 write!(f, " {} |", col.get(row_idx).unwrap_or_default())?;
137 }
138 writeln!(f)?;
139 if row_idx >= 256 {
140 writeln!(f, "... (dataframe is too long)")?;
141 break;
142 }
143 }
144
145 writeln!(f, "---")
146 }
147}
148pub fn convert_data_value(item: DataValue, dtype: crate::DataType) -> DataValue {
153 let x = &item;
154 match dtype {
155 crate::DataType::Bool => DataValue::Bool(bool::extract(x)),
156 crate::DataType::U32 => DataValue::U32(u32::extract(x)),
157 crate::DataType::I32 => DataValue::I32(i32::extract(x)),
158 crate::DataType::U64 => DataValue::U64(u64::extract(x)),
159 crate::DataType::I64 => DataValue::I64(i64::extract(x)),
160 crate::DataType::F32 => DataValue::F32(f32::extract(x)),
161 crate::DataType::U128 => DataValue::U128(u128::extract(x)),
162 crate::DataType::I128 => DataValue::I128(i128::extract(x)),
163 crate::DataType::F64 => DataValue::F64(f64::extract(x)),
164 crate::DataType::U8 => DataValue::U8(u8::extract(x)),
165 crate::DataType::String => DataValue::String(String::extract(x).into()),
166 crate::DataType::Bytes => item,
167 crate::DataType::Map => item,
168 crate::DataType::Vec => item,
169 crate::DataType::Unknown => {
170 if matches!(item, DataValue::Null) {
171 return item;
172 }
173 let dtype = crate::detect_dtype(&item);
174 if matches!(dtype, crate::DataType::Unknown) {
176 tracing::error!("Unknown datatype {dtype:?} - {item:?}");
177 return item;
178 }
179 convert_data_value(item, dtype)
180 }
181 }
182}
183pub fn convert_dv_to_dtype(key: &Key, item: DataValue) -> DataValue {
187 convert_data_value(item, key.ctype)
188}
189
190pub enum MaybeView<'v> {
199 View(ArrayView2<'v, DataValue>),
201 Array(Array2<DataValue>),
203}
204impl MaybeView<'_> {
205 pub fn row_view(&self) -> ArrayView2<'_, DataValue> {
210 match self {
211 Self::View(v) => v.view(),
212 Self::Array(a) => a.t(),
213 }
214 }
215}
216impl ColumnFrame {
217 pub fn new<K, V>(index: K, data_frame: Vec<V>) -> Self
233 where
234 K: Into<KeyIndex>,
235 V: Into<TypedDataArray>,
236 {
237 let index = index.into();
238 let data_frame = data_frame.into_iter().map(Into::into).collect();
239 Self { data_frame, index }
240 }
241
242 pub fn new_coerced<K, V>(index: K, data_frame: Vec<V>) -> Self
249 where
250 K: Into<KeyIndex>,
251 V: Into<TypedDataArray>,
252 {
253 let index = index.into();
254 let data_frame = data_frame
255 .into_iter()
256 .zip(index.keys.iter())
257 .map(|(value, key)| {
258 let mut col: TypedDataArray = value.into();
259 if !matches!(key.ctype, crate::DataType::Unknown) && col.data_type() != key.ctype {
260 let _ = col.try_convert_to_dtype(key.ctype);
261 }
262 col
263 })
264 .collect();
265 Self { data_frame, index }
266 }
267
268 pub fn keys(&self) -> &[Key] {
270 self.index.get_keys()
271 }
272
273 pub fn nrows(&self) -> usize {
275 self.data_frame.first().map(|x| x.len()).unwrap_or_default()
276 }
277 pub fn ncolumns(&self) -> usize {
279 self.data_frame.len()
280 }
281
282 pub fn is_empty(&self) -> bool {
284 self.nrows() == 0
285 }
286
287 pub fn shrink(&mut self) {}
290
291 pub fn try_fix_dtype_for_keys(&mut self, force: bool) -> Result<(), Error> {
295 for i in 0..self.index.keys.len() {
296 let should_fix = force || matches!(self.index.keys[i].ctype, crate::DataType::Unknown);
297
298 if should_fix {
299 let column = self
300 .get_column(&self.index.keys[i])
301 .map_err(|_| Error::EmptyData)?;
302 let first = column.get(0).ok_or(Error::EmptyData)?;
303 let dtype = crate::detect_dtype(&first);
304 self.index.keys[i].ctype = dtype;
305 }
306 }
307
308 Ok(())
309 }
310 pub fn try_fix_dtype(&mut self) -> Result<(), Error> {
315 let mut errors = vec![];
316 let keys = self.index.keys.clone();
317 for key in keys {
318 tracing::trace!("key: {key:?}- {:?}", key.ctype);
319 if let Err(e) = self.try_fix_column_by_key(&key) {
320 errors.push((key, e.to_string()));
321 }
322 }
323 if errors.is_empty() {
324 Ok(())
325 } else {
326 Err(Error::CastFailed(errors))
327 }
328 }
329
330 pub fn get_column(&self, key: &Key) -> Result<&TypedDataArray, Error> {
332 let idx = self
333 .index
334 .get_column_index(key)
335 .ok_or(Error::MissingField(format!("{key}").into()))?;
336 self.get_column_by_idx(idx)
337 }
338
339 pub fn get_column_mut(&mut self, key: &Key) -> Result<&mut TypedDataArray, Error> {
341 let idx = self
342 .index
343 .get_column_index(key)
344 .ok_or(Error::MissingField(format!("{key}").into()))?;
345 self.get_column_by_idx_mut(idx)
346 }
347 pub fn get_column_by_idx(&self, idx: usize) -> Result<&TypedDataArray, Error> {
349 self.data_frame
350 .get(idx)
351 .ok_or_else(|| Error::IndexOutOfRange(idx, self.nrows()))
352 }
353
354 pub fn get_column_by_idx_mut(&mut self, idx: usize) -> Result<&mut TypedDataArray, Error> {
356 let n_cols = self.ncolumns();
357 self.data_frame
358 .get_mut(idx)
359 .ok_or_else(|| Error::IndexOutOfRange(idx, n_cols))
360 }
361
362 pub fn get_row(&self, idx: usize) -> Result<Vec<DataValue>, Error> {
364 let mut row = Vec::with_capacity(self.ncolumns());
365 for col in self.data_frame.iter() {
366 row.push(col.get(idx).unwrap_or_default())
367 }
368 Ok(row)
369 }
370
371 fn finish(&self) -> Result<Array2<DataValue>, Error> {
374 let ncols = self.ncolumns();
375 if ncols == 0 {
376 return Ok(Array2::default((0, 0)));
377 }
378 let nrows = self.nrows();
379 let mut data = Vec::with_capacity(nrows * ncols);
380 let mut selected_cols: Vec<Box<dyn Iterator<Item = DataValue>>> = self
381 .index
382 .indexes()
383 .iter()
384 .map(|col_idx| {
385 self.get_column_by_idx(*col_idx)
386 .expect("Cannot get column on index")
387 .iter_values()
388 })
389 .collect::<Vec<_>>();
390
391 for _ in 0..nrows {
392 for col in selected_cols.iter_mut() {
393 data.push(col.next().unwrap_or(DataValue::Null));
394 }
395 }
396
397 Array2::from_shape_vec((nrows, ncols), data)
398 .map_err(|e| Error::UnknownError(format!("finish reshape: {e}")))
399 }
400
401 fn push_row(&mut self, values: Vec<DataValue>) -> Result<(), Error> {
402 let n_cols = self.ncolumns();
403 for (idx, value) in values.into_iter().enumerate() {
404 let current_ptr = self
405 .data_frame
406 .get_mut(idx)
407 .ok_or(Error::IndexOutOfRange(idx, n_cols))?;
408 current_ptr.push(value)?;
409 }
410 Ok(())
411 }
412
413 pub fn try_fix_column_by_key(&mut self, key: &Key) -> Result<(), Error> {
417 let col = self.get_column_mut(key)?;
418 col.try_convert_to_dtype(key.ctype)?;
419 Ok(())
420 }
421
422 pub fn enforce_dtype_for_column(
427 &mut self,
428 key: &str,
429 dtype: crate::DataType,
430 ) -> Result<(), Error> {
431 if let Some(idx) = self.index.get_column_index_by_name(key) {
432 let new_key = Key::new(key, dtype);
433 let col = self.get_column_by_idx_mut(idx)?;
434 col.try_convert_to_dtype(new_key.ctype)?;
435 self.index.rename_key(key, new_key)?;
436 Ok(())
437 } else {
438 Err(Error::NotFound(Key::new(key, crate::DataType::Unknown)))
439 }
440 }
441
442 pub fn rename_key(&mut self, old: &str, new: Key) -> Result<(), Error> {
447 self.index.rename_key(old, new)
448 }
449
450 pub fn add_alias(&mut self, key: &str, alias: &str) -> Result<(), Error> {
454 self.index.add_alias(key, alias)
455 }
456
457 pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>> {
464 let selected = self.select(Some(keys));
465 let mut result = Vec::with_capacity(selected.nrows());
466 for row in selected.rows() {
467 let mut r = Vec::with_capacity(selected.ncols());
468 for value in row.iter() {
469 r.push(D::extract(value));
470 }
471 result.push(r);
472 }
473 result
474 }
475
476 pub fn select_transposed(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
487 let keys = keys.unwrap_or_else(|| self.index.get_keys());
488 let key_indexes = self.index.select(keys);
489 if key_indexes.is_empty() {
490 return Ok(Array2::default((0, 0)));
491 }
492 let data_vec: Result<Vec<Array1<DataValue>>, Error> = key_indexes
493 .indexes()
494 .iter()
495 .map(|x| self.get_column_by_idx(*x).map(|col| col.as_generic_array()))
496 .collect();
497 let data_vec = data_vec?;
498 let views: Vec<ArrayView1<DataValue>> = data_vec.iter().map(|a| a.view()).collect();
499 Ok(ndarray::stack(Axis(0), &views)?)
500 }
501
502 #[deprecated(note = "allocates O(n); use get_column() for zero-copy typed access")]
509 pub fn select_column(&self, key: &Key) -> Option<Array1<DataValue>> {
510 self.index
511 .get_column_index(key)
512 .and_then(|x| self.get_column_by_idx(x).ok())
513 .map(|col| col.as_generic_array())
514 }
515
516 pub fn apply_function<F>(&mut self, keys: &[Key], mut func: F) -> Result<(), Error>
521 where
522 F: FnMut(&[Key], &mut ColumnFrame) -> Result<(), Error>,
523 {
524 func(keys, self)
525 }
526
527 pub fn validate_entry_access(&self, column: &Key, row_index: usize) -> Result<usize, Error> {
534 if row_index >= self.nrows() {
535 return Err(Error::IndexOutOfRange(row_index, self.nrows()));
536 }
537 let Some(column_index) = self.index.get_column_index(column) else {
538 return Err(Error::NotFound(column.clone()));
539 };
540 Ok(column_index)
541 }
542
543 pub fn get_by_row_index(&self, column: &Key, row_index: usize) -> Option<DataValue> {
550 trace!(
551 "Column: {column} row_index: {row_index} data_frame: cols:{}-rows:{}",
552 self.ncolumns(),
553 self.nrows()
554 );
555 trace!("{:?}", self.data_frame);
556 match self.validate_entry_access(column, row_index) {
557 Ok(column_index) => self.data_frame.get(column_index)?.get(row_index),
558 Err(e) => {
559 trace!("Error: {e}");
560 None
561 }
562 }
563 }
564
565 pub fn set_by_row_index(
571 &mut self,
572 column: &Key,
573 row_index: usize,
574 value: DataValue,
575 ) -> Result<(), Error> {
576 let column_index = self.validate_entry_access(column, row_index)?;
577 let ncols = self.ncolumns();
578 self.data_frame
579 .get_mut(column_index)
580 .ok_or(Error::IndexOutOfRange(column_index, ncols))?
581 .set(row_index, value)
582 }
583
584 pub fn select_as_map(&self, keys: Option<&[Key]>) -> HashMap<Key, Vec<DataValue>> {
590 let keys = keys.unwrap_or_else(|| self.index.get_keys());
591 let indexes = self.index.select(keys);
592 if indexes.is_empty() {
593 return Default::default();
594 }
595
596 let mut new_data_frame = HashMap::with_capacity(keys.len());
597
598 for key in keys.iter() {
599 if let Some(column_index_in_source) = indexes.get_column_index(key) {
600 let column = self
601 .data_frame
602 .get(column_index_in_source)
603 .map(|x| x.to_vec())
604 .unwrap_or_else(|| vec![DataValue::Null; self.nrows()]);
605 new_data_frame.insert(key.clone(), column);
606 }
607 }
608
609 new_data_frame
610 }
611
612 pub fn select(&self, keys: Option<&[Key]>) -> Array2<DataValue> {
625 if keys.is_none() && !self.is_empty() {
626 return self.finish().expect("BUG: There has to be some data");
627 }
628 let keys = keys.unwrap_or_else(|| self.index.get_keys());
629 let indexes = self.index.select(keys);
630 if indexes.is_empty() || keys.is_empty() {
631 return Array2::default((0, 0));
632 }
633
634 let nrows = self.nrows();
635 let ncols = keys.len();
636
637 let mut data = Vec::with_capacity(nrows * ncols);
641 let mut selected_cols: Vec<Box<dyn Iterator<Item = DataValue> + '_>> = keys
642 .iter()
643 .map(|key| match indexes.get_column_index(key) {
644 Some(col_idx) => self
645 .get_column_by_idx(col_idx)
646 .expect("Cannot get column on index")
647 .iter_values(),
648 None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = DataValue> + '_>,
649 })
650 .collect();
651
652 for _ in 0..nrows {
653 for col in selected_cols.iter_mut() {
654 data.push(col.next().unwrap_or(DataValue::Null));
655 }
656 }
657
658 Array::from_shape_vec((nrows, ncols), data).unwrap_or_else(|_| Array2::default((0, 0)))
659 }
660
661 pub fn select_vec_view(
684 &self,
685 keys: Option<&[Key]>,
686 ) -> Result<Vec<Option<&TypedDataArray>>, Error> {
687 if keys.is_none() && !self.is_empty() {
688 return Ok(self.data_frame.iter().map(Some).collect());
689 }
690 let keys = keys.unwrap_or_else(|| self.index.get_keys());
691 let indexes = self.index.select(keys);
692 if indexes.is_empty() || keys.is_empty() {
693 return Err(Error::EmptyData);
694 }
695 let ncols = keys.len();
696
697 let mut views = Vec::with_capacity(ncols);
698 for col_key in keys {
699 if let Some(col_idx) = self.index.get_column_index(col_key) {
700 views.push(Some(self.get_column_by_idx(col_idx)?));
701 } else {
702 views.push(None);
703 }
704 }
705 Ok(views)
706 }
707
708 pub fn select_typed_columns(&self, keys: Option<&[Key]>) -> Result<Vec<TypedDataArray>, Error> {
711 if keys.is_none() && !self.is_empty() {
712 return Ok(self.data_frame.clone());
713 }
714 let keys = keys.unwrap_or_else(|| self.index.get_keys());
715 let indexes = self.index.select(keys);
716 if indexes.is_empty() || keys.is_empty() {
717 return Err(Error::EmptyData);
718 }
719 let mut out = Vec::with_capacity(keys.len());
720 for col_key in keys {
721 match indexes.get_column_index(col_key) {
722 Some(col_idx) => out.push(self.get_column_by_idx(col_idx)?.clone()),
723 None => out.push(TypedDataArray::default_init(col_key, self.nrows())),
724 }
725 }
726 Ok(out)
727 }
728
729 pub fn select_view(&self, keys: Option<&[Key]>) -> Result<MaybeView<'_>, Error> {
755 let keys = keys.unwrap_or_else(|| self.index.get_keys());
756 let indexes = self.index.select(keys);
757 if indexes.is_empty() || keys.is_empty() {
758 return Err(Error::EmptyData);
759 }
760 let ncols = keys.len();
761 let mut owned_cols: Vec<Array1<DataValue>> = Vec::with_capacity(ncols);
762 for col_idx in indexes.indexes() {
763 owned_cols.push(self.get_column_by_idx(col_idx)?.as_generic_array());
764 }
765 let views: Vec<ArrayView1<DataValue>> = owned_cols.iter().map(|a| a.view()).collect();
766 Ok(MaybeView::Array(ndarray::stack(Axis(0), &views)?))
767 }
768
769 pub fn select_typed<T: Extract + Clone>(&self, keys: Option<&[Key]>) -> Array2<T> {
783 let keys = keys.unwrap_or_else(|| self.index.get_keys());
784 let indexes = self.index.select(keys);
785 if indexes.is_empty() || keys.is_empty() {
786 return Array2::from_shape_vec((0, 0), vec![]).unwrap();
787 }
788
789 let nrows = self.nrows();
790 let ncols = keys.len();
791 let null_default = T::extract(&DataValue::Null);
792 let mut data = Vec::with_capacity(nrows * ncols);
793
794 for key in keys {
795 match indexes.get_column_index(key) {
796 Some(col_idx) => {
797 let col = self
798 .get_column_by_idx(col_idx)
799 .expect("Cannot get column on index");
800 for v in col.iter_values() {
801 data.push(T::extract(&v));
802 }
803 }
804 None => {
805 data.resize(data.len() + nrows, null_default.clone());
806 }
807 }
808 }
809
810 Array2::from_shape_vec((nrows, ncols).f(), data)
811 .unwrap_or_else(|_| Array2::from_shape_vec((0, 0), vec![]).unwrap())
812 }
813
814 fn extend_dataframe_for_column(&mut self, key: Key) -> Result<(), Error> {
815 let len = self.nrows();
816 let column = TypedDataArray::default_init(&key, len);
817 self.index.store_key(key);
818 self.data_frame.push(column);
819 Ok(())
820 }
821
822 pub fn push<C: CandidateData>(&mut self, row_candidate: C) -> Result<(), Error> {
828 let num_keys = self.index.len();
830 let candidate_keys = row_candidate.keys();
831 let mut arr = Vec::with_capacity(num_keys.max(candidate_keys.len()));
832
833 for key in &candidate_keys {
835 if self.index.get_column_index(key).is_none() {
836 self.extend_dataframe_for_column(key.clone())?;
837 }
838 }
839
840 arr.reserve(self.index.len());
842 for index in self.index.get_keys() {
843 arr.push(
844 row_candidate
845 .get_value_ref(index)
846 .cloned()
847 .unwrap_or(DataValue::Null),
848 );
849 }
850
851 self.push_row(arr)?;
852 Ok(())
853 }
854
855 pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error> {
859 let mut removed_index = KeyIndex::default();
860 let removed_data = self.select_typed_columns(Some(keys))?;
861
862 let mut indices_to_remove: Vec<usize> = keys
864 .iter()
865 .filter_map(|key| self.index.get_column_index(key))
866 .collect();
867
868 for key in keys {
870 if let Some((current, _)) = self.index.remove_key(key) {
871 removed_index.store_key(current);
872 }
873 }
874
875 indices_to_remove.sort_unstable();
877 indices_to_remove.dedup();
878 for idx in indices_to_remove.into_iter().rev() {
879 self.data_frame.remove(idx);
880 }
881
882 let remaining_keys = self.index.get_keys().to_vec();
884 self.index = KeyIndex::new(remaining_keys);
885
886 Ok(Self::new(removed_index, removed_data))
887 }
888
889 fn check_or_init_frame(&mut self, other: &Self) -> Result<Continue, Error> {
890 if self.index.is_empty() {
891 self.index = other.index.clone();
892 self.data_frame = other.data_frame.clone();
893 return Ok(Continue::End);
894 }
895 if other.index.is_empty() {
896 return Ok(Continue::End);
897 }
898 if self.is_empty() {
899 let n = other.nrows();
900 self.data_frame = self
901 .index
902 .get_keys()
903 .iter()
904 .map(|k| TypedDataArray::default_init(k, n))
905 .collect();
906 }
907
908 Ok(Continue::Continue)
909 }
910
911 fn extend_columns_from_other(&mut self, other: &Self) -> Result<(), Error> {
912 let missing_keys: Vec<Key> = other
913 .index
914 .get_keys()
915 .iter()
916 .filter(|key| self.index.get_column_index(key).is_none())
917 .cloned()
918 .collect();
919
920 if missing_keys.is_empty() {
921 return Ok(());
922 }
923
924 let nrows = self.nrows();
925 for key in missing_keys {
926 let column = TypedDataArray::default_init(&key, nrows);
927 self.data_frame.push(column);
928 self.index.store_key(key);
929 }
930
931 Ok(())
932 }
933
934 fn try_extend(&mut self, mut other: Self) -> Result<(), Error> {
935 let mut joined_keys = self.index.clone();
936 for key in other.keys() {
937 if self.index.get_column_index(key).is_none() {
938 joined_keys.store_key(key.clone());
939 }
940 }
941
942 let self_nrows = self.nrows();
943 let other_nrows = other.nrows();
944 let mut new_data: Vec<TypedDataArray> = Vec::with_capacity(joined_keys.len());
945
946 for key in joined_keys.get_keys() {
947 let self_col = self
948 .index
949 .get_column_index(key)
950 .map(|i| std::mem::take(&mut self.data_frame[i]));
951 let other_col = other
952 .index
953 .get_column_index(key)
954 .map(|i| std::mem::take(&mut other.data_frame[i]));
955
956 let col = match (self_col, other_col) {
957 (Some(mut s), Some(o)) => {
958 s.extend_from(&o);
959 s
960 }
961 (Some(mut s), None) => {
962 let filler = TypedDataArray::default_init(key, other_nrows);
963 s.extend_from(&filler);
964 s
965 }
966 (None, Some(o)) => {
967 let mut base = TypedDataArray::default_init(key, self_nrows);
968 base.extend_from(&o);
969 base
970 }
971 (None, None) => TypedDataArray::default_init(key, self_nrows + other_nrows),
972 };
973 new_data.push(col);
974 }
975
976 *self = ColumnFrame {
977 index: joined_keys,
978 data_frame: new_data,
979 };
980 Ok(())
981 }
982
983 pub fn extend(&mut self, mut other: Self) -> Result<(), Error> {
991 if self.check_or_init_frame(&other)?.should_end() {
992 return Ok(());
993 }
994
995 if self.index.check_order_of_indexes(&other.index).is_err() {
996 return self.try_extend(other);
997 }
998
999 trace!(
1000 "Extend columns from other {:?} vs {:?}",
1001 other.index.get_keys(),
1002 self.index.get_keys()
1003 );
1004
1005 self.extend_columns_from_other(&other)?;
1007 other.extend_columns_from_other(self)?;
1008
1009 let keys = self.index.get_keys().to_vec();
1011 for key in &keys {
1012 let self_idx = self.index.get_column_index(key).unwrap();
1013 let other_idx = other.index.get_column_index(key).unwrap();
1014 let other_col = std::mem::take(&mut other.data_frame[other_idx]);
1015 let self_col = &mut self.data_frame[self_idx];
1016 self_col.extend_from(&other_col);
1017 }
1018
1019 Ok(())
1020 }
1021
1022 pub fn replace(&mut self, other: Self) -> Result<(), Error> {
1028 if self.check_or_init_frame(&other)?.should_end() {
1029 return Ok(());
1030 }
1031
1032 if self.nrows() > other.nrows() {
1033 return Err(Error::DataSetSizeDoesntMatch(self.nrows(), other.nrows()));
1034 }
1035
1036 self.index = other.index;
1037 self.data_frame = other.data_frame;
1038
1039 Ok(())
1040 }
1041
1042 pub fn join_by_id_inner(&mut self, right: Self, keys: &[Key]) -> Result<(), Error> {
1046 if self.check_or_init_frame(&right)?.should_end() {
1047 return Ok(());
1048 }
1049
1050 let timer = std::time::Instant::now();
1051 let new_columns = right.index.get_complement_keys(self.index.get_keys());
1052
1053 self.extend_columns_from_other(&right)?;
1055 tracing::debug!("Extend took {}ns", timer.elapsed().as_nanos());
1056
1057 let column_mappings: Vec<(usize, usize)> = new_columns
1059 .iter()
1060 .filter_map(|key| {
1061 let left_idx = self.index.get_column_index(key)?;
1062 let right_idx = right.index.get_column_index(key)?;
1063 Some((left_idx, right_idx))
1064 })
1065 .collect();
1066
1067 let timer = std::time::Instant::now();
1069 let index = Index::new(keys.to_vec(), self);
1070 tracing::debug!("Left index build took: {}ns", timer.elapsed().as_nanos());
1071 tracing::trace!("Index {index:?}");
1072
1073 let timer = std::time::Instant::now();
1074 let right_index = Index::new(keys.to_vec(), &right);
1075 let joined_idx = index.join(right_index);
1076 tracing::debug!(
1077 "Right index build and join took: {}ns",
1078 timer.elapsed().as_nanos()
1079 );
1080
1081 let timer = std::time::Instant::now();
1084 let joined_idx_len = joined_idx.len();
1085
1086 for (left_col_idx, right_col_idx) in &column_mappings {
1087 let right_col = right.get_column_by_idx(*right_col_idx)?;
1088 let left_col = self.get_column_by_idx_mut(*left_col_idx)?;
1089
1090 for (left_indices, right_indices) in &joined_idx {
1091 for right_row_idx in right_indices {
1092 let value = right_col.get_or_null(*right_row_idx);
1093 for left_idx in left_indices {
1094 let _ = left_col.set(*left_idx, value.clone());
1095 }
1096 }
1097 }
1098 }
1099
1100 let elapsed = timer.elapsed();
1101 tracing::debug!(
1102 "Filled {} rows in {}ms|{}s",
1103 joined_idx_len,
1104 elapsed.as_millis(),
1105 elapsed.as_secs()
1106 );
1107
1108 Ok(())
1109 }
1110
1111 pub fn add_single_column<K, V>(&mut self, key: K, column: V) -> Result<(), Error>
1124 where
1125 K: Into<Key>,
1126 V: Into<TypedDataArray>,
1127 {
1128 let key = key.into();
1129 let mut column: TypedDataArray = column.into();
1130 if self.index.get_column_index(&key).is_some() {
1131 return Err(Error::ColumnAlreadyExists(key));
1132 }
1133 if self.nrows() != column.len() && !self.is_empty() {
1134 return Err(Error::DataSetSizeDoesntMatch(self.nrows(), column.len()));
1135 }
1136
1137 if self.is_empty() && !column.is_empty() {
1139 let new_len = column.len();
1140 let keys = self.index.get_keys().to_vec();
1141 for (i, existing_col) in self.data_frame.iter_mut().enumerate() {
1142 *existing_col = TypedDataArray::default_init(&keys[i], new_len);
1143 }
1144 }
1145
1146 if matches!(column.data_type(), crate::DataType::Unknown)
1147 && !matches!(key.ctype, crate::DataType::Unknown)
1148 {
1149 let _ = column.try_convert_to_dtype(key.ctype);
1150 }
1151
1152 self.index.store_key(key);
1153 self.data_frame.push(column);
1154 Ok(())
1155 }
1156 pub fn add_columns(&mut self, other: Self) -> Result<(), Error> {
1160 if self.check_or_init_frame(&other)?.should_end() {
1161 return Ok(());
1162 }
1163
1164 self.extend_columns_from_other(&other)?;
1165 let nrows = self.nrows();
1166 for (idx, key) in other.index.get_keys().iter().enumerate() {
1167 if let Some(index) = self.index.get_column_index(key) {
1168 let arr = match other.get_column_by_idx(idx) {
1169 Ok(arr) => arr.clone(),
1170 Err(_) => continue,
1171 };
1172 trace!(
1173 "Adding column {key:?} at index {idx} vs {index} datasize: self:{} vs other:{}",
1174 nrows,
1175 arr.len()
1176 );
1177 let dst = self.get_column_by_idx_mut(index)?;
1178 if arr.len() != nrows {
1179 dst.fill(DataValue::Null);
1180 } else {
1181 dst.assign(&arr);
1182 }
1183 }
1184 }
1185 Ok(())
1186 }
1187
1188 pub fn broadcast(&mut self, other: Self) -> Result<(), Error> {
1193 if self.check_or_init_frame(&other)?.should_end() {
1194 return Ok(());
1195 }
1196 if other.nrows() != 1 {
1197 return Err(Error::CannotBroadcast);
1198 }
1199
1200 let other_keys: Vec<_> = other
1202 .index
1203 .get_keys()
1204 .iter()
1205 .filter(|k| self.index.get_column_index(k).is_none())
1206 .cloned()
1207 .collect();
1208
1209 let nrows = self.nrows();
1210 for key in &other_keys {
1212 self.index.store_key(key.clone());
1213 let value = other.get_column(key)?;
1214 let first = value.get_or_null(0);
1215 let mut new_col = TypedDataArray::default_init(key, nrows);
1216 new_col.fill(first);
1217 self.data_frame.push(new_col);
1218 }
1219
1220 Ok(())
1221 }
1222
1223 pub fn cartesian_product(&mut self, other: Self) -> Result<(), Error> {
1229 if self.check_or_init_frame(&other)?.should_end() {
1230 return Ok(());
1231 }
1232
1233 let self_nrows = self.nrows();
1234 let other_nrows = other.nrows();
1235 let max_rows = self_nrows * other_nrows;
1236
1237 for other_key in other.keys() {
1239 if self.index.get_column_index(other_key).is_none() {
1240 self.index.store_key(other_key.clone());
1241 } else {
1242 self.index.store_key(Key::new(
1243 format!("{}-{}", other_key, other_key.id()).as_str(),
1244 other_key.ctype,
1245 ));
1246 }
1247 }
1248
1249 let mut df: Vec<TypedDataArray> = Vec::with_capacity(self.index.len());
1250 std::mem::swap(&mut df, &mut self.data_frame);
1252 for col in df.into_iter() {
1254 let mut new_col = vec![DataValue::Null; max_rows];
1255 for (idx, value) in col.iter_values().enumerate() {
1256 for self_idx in 0..other_nrows {
1257 new_col[self_idx + (idx * other_nrows)] = value.clone();
1258 }
1259 }
1260 self.data_frame
1261 .push(TypedDataArray::new(col.data_type(), new_col));
1262 }
1263
1264 for col in other.data_frame.into_iter() {
1266 let mut new_col: Vec<DataValue> = Vec::with_capacity(max_rows);
1267 let tile = col.to_vec();
1268 for _ in 0..self_nrows {
1269 new_col.extend(tile.iter().cloned());
1270 }
1271 self.data_frame
1272 .push(TypedDataArray::new(col.data_type(), new_col));
1273 }
1274
1275 Ok(())
1276 }
1277
1278 pub fn join(&mut self, right: Self, join_type: &JoinRelation) -> Result<(), Error> {
1286 use JoinBy::*;
1287 match &join_type.join_type {
1288 AddColumns => self.add_columns(right),
1289 Replace => self.replace(right),
1290 Extend => self.extend(right),
1291 Broadcast => self.broadcast(right),
1292 CartesianProduct => self.cartesian_product(right),
1293 JoinById(join) => self.join_by_id_inner(right, &join.keys),
1294 }
1295 }
1296
1297 #[deprecated(note = "allocates O(n); use get_column() for zero-copy typed access")]
1303 pub fn get_single_column(&self, key: &Key) -> Option<Array1<DataValue>> {
1304 self.index
1305 .get_column_index(key)
1306 .and_then(|x| self.get_column_by_idx(x).ok())
1307 .map(|col| col.as_generic_array())
1308 }
1309
1310 pub fn get_single_column_typed<T: Extract>(&self, key: &Key) -> Option<Array1<T>> {
1321 self.index
1322 .get_column_index(key)
1323 .and_then(|x| self.get_column_by_idx(x).ok())
1324 .map(|col| col.to_typed_array())
1325 }
1326
1327 pub fn sorted(&self, key: &Key) -> Result<sorted_df::SortedDataFrame<'_>, Error> {
1333 let index = self
1334 .index
1335 .get_column_index(key)
1336 .ok_or(Error::NotFound(key.clone()))?;
1337 let column = self.get_column_by_idx(index)?;
1338 let values: Vec<DataValue> = column.to_vec();
1339 let mut data_with_index: Vec<(usize, &DataValue)> = values.iter().enumerate().collect();
1340 tracing::trace!("Sorting by key: {key:?} vals {data_with_index:?}");
1341 data_with_index.sort_by(
1342 |(a_idx, a_val), (b_idx, b_val)| match a_val.partial_cmp(b_val) {
1343 Some(ordering) => ordering.then_with(|| a_idx.cmp(b_idx)),
1344 None => {
1345 let a_null = matches!(a_val, DataValue::Null);
1346 let b_null = matches!(b_val, DataValue::Null);
1347 match (a_null, b_null) {
1348 (true, true) => std::cmp::Ordering::Equal.then_with(|| a_idx.cmp(b_idx)),
1349 (true, false) => std::cmp::Ordering::Greater.then_with(|| a_idx.cmp(b_idx)),
1350 (false, true) => std::cmp::Ordering::Less.then_with(|| a_idx.cmp(b_idx)),
1351 (false, false) => std::cmp::Ordering::Equal.then_with(|| a_idx.cmp(b_idx)),
1352 }
1353 }
1354 },
1355 );
1356
1357 tracing::trace!("Sorted by key: {key:?} vals {data_with_index:?}");
1358 let indicies = data_with_index
1359 .into_iter()
1360 .map(|(idx, _)| idx)
1361 .collect::<Vec<_>>();
1362
1363 Ok(sorted_df::SortedDataFrame::new(self, indicies))
1364 }
1365
1366 pub fn filter(&self, filter: &crate::filter::FilterRules) -> Result<Self, Error> {
1371 let mut final_indices = Vec::new();
1372 let filter_df = filter_df::ColumnFrameFiltering { column_frame: self };
1373 for rule in &filter.rules {
1374 final_indices.extend(crate::filter::filter_combination(&filter_df, rule)?);
1375 }
1376
1377 final_indices.sort_unstable();
1378 final_indices.dedup();
1379
1380 let new_data: Vec<Vec<DataValue>> = self
1388 .data_frame
1389 .iter()
1390 .map(|col| {
1391 final_indices
1392 .iter()
1393 .map(|&idx| col.get_or_null(idx))
1394 .collect::<Vec<DataValue>>()
1395 })
1396 .collect();
1397
1398 Ok(ColumnFrame::new(self.index.clone(), new_data))
1399 }
1400}
1401
1402#[macro_export]
1403macro_rules! df {
1404 ($($everything:tt)*) => {
1405 $crate::DataFrame::new($crate::column_frame!($($everything)*))
1406 };
1407}
1408
1409#[macro_export]
1410macro_rules! column_frame {
1411 ($($key:expr => vec![$($value:expr),*]),*) => {
1415 $crate::column_frame!($($key => [$($value),*]),*)
1416 };
1417 ($($key:expr => [$($value:expr),*]),*) => {
1419 {
1420 let data: ::std::vec::Vec<::std::vec::Vec<$crate::data_value::DataValue>> = vec!($(
1421 vec![$($value.into(),)*],
1422 )*);
1423
1424 let _keys = vec![$($key.into(),)*];
1425
1426 $crate::ColumnFrame::new(
1427 $crate::KeyIndex::new(_keys),
1428 data
1429 )
1430
1431 }
1432 };
1433 ($($key:expr => $value:expr $(,)?)*) => {
1435 {
1436 let _data: ::std::vec::Vec<::std::vec::Vec<$crate::data_value::DataValue>> =
1437 vec![$(vec![$value.into()],)*];
1438 let _keys = vec![$($key.into(),)*];
1439 tracing::trace!("{_keys:?}, {_data:?}");
1440 $crate::ColumnFrame::new(
1441 $crate::KeyIndex::new(_keys),
1442 _data,
1443 )
1444 }
1445 };
1446}
1447
1448#[cfg(test)]
1449mod test {
1450 use crate::JoinById;
1451
1452 use super::*;
1453 use data_value::stdhashmap;
1454 use rstest::*;
1455 use tracing_test::traced_test;
1456
1457 #[rstest]
1458 #[traced_test]
1459 fn test_macro() {
1460 let df = column_frame! {
1461 "a" => 1,
1462 "b" => 2,
1463 "c" => 3,
1464 "d" => 4,
1465 };
1466
1467 assert_eq!(df.nrows(), 1);
1468 assert_eq!(df.keys(), &["a".into(), "b".into(), "c".into(), "d".into()]);
1469 let f = Array2::from_shape_vec((1, 4), vec![1.into(), 2.into(), 3.into(), 4.into()])
1470 .expect("BUG: cannot create array");
1471 assert_eq!(df.select(None), f);
1472
1473 let df = column_frame! {
1474 "a" => [1, 2, 3],
1475 "b" => [4, 5, 6],
1476 "c" => [7, 8, 9]
1477 };
1478
1479 assert_eq!(df.nrows(), 3);
1480 assert_eq!(df.keys(), &["a".into(), "b".into(), "c".into()]);
1481 let f = Array2::from_shape_vec(
1482 (3, 3),
1483 vec![
1484 1.into(),
1485 4.into(),
1486 7.into(),
1487 2.into(),
1488 5.into(),
1489 8.into(),
1490 3.into(),
1491 6.into(),
1492 9.into(),
1493 ],
1494 )
1495 .expect("BUG: cannot create array");
1496 let selected = df.select(None);
1497 trace!("{selected:?}");
1498 assert_eq!(selected, f);
1499
1500 let df1 = df! {
1501 "a" => [1, 2, 3],
1502 "b" => [4, 5, 6],
1503 "c" => [7, 8, 9]
1504 };
1505
1506 let formatted = format!("{}", df);
1508 debug!("{}", formatted);
1509
1510 assert_eq!(df1, crate::DataFrame::from(df));
1511 }
1512
1513 #[rstest]
1514 #[case(
1515 column_frame! {
1516 "a" => [1, 2, 3],
1517 "b" => [4, 5, 6],
1518 "c" => [7, 8, 9]
1519 },
1520 column_frame! {
1521 "a_new" => [1, 2, 3],
1522 "b" => [4, 5, 6],
1523 "c" => [7, 8, 9]
1524 },
1525 vec!["a_new", "b", "c"].into_iter().map(|x| x.into()).collect(),
1526 vec![("a", "a_new".into())]
1527 )]
1528 #[traced_test]
1529 fn rename_test(
1530 #[case] df: ColumnFrame,
1531 #[case] expected: ColumnFrame,
1532 #[case] keys: Vec<Key>,
1533 #[case] renames: Vec<(&str, Key)>,
1534 ) {
1535 let mut df = df;
1536 for (old, new) in renames {
1537 df.rename_key(old, new).expect("BUG: cannot rename key");
1538 }
1539 assert_eq!(df, expected);
1540 assert_eq!(df.keys(), keys.as_slice());
1541 }
1542
1543 #[rstest]
1544 #[case(
1545 column_frame!("a" => [1, 2, 3]),
1546 Key::new("a", crate::DataType::I32),
1547 column_frame!("a" => [1i32, 2i32, 3i32])
1548 )]
1549 #[case(
1550 column_frame!("a" => [1, 2, 3]),
1551 Key::new("a", crate::DataType::U32),
1552 column_frame!("a" => [1u32, 2u32, 3u32])
1553 )]
1554 #[case(
1555 column_frame!("a" => [1, 2, 3]),
1556 Key::new("a", crate::DataType::I64),
1557 column_frame!("a" => [1i64, 2i64, 3i64])
1558 )]
1559 #[case(
1560 column_frame!("a" => [1, 2, 3]),
1561 Key::new("a", crate::DataType::U64),
1562 column_frame!("a" => [1u64, 2u64, 3u64])
1563 )]
1564 #[case(
1565 column_frame!("a" => [1, 2, 3]),
1566 Key::new("a", crate::DataType::F64),
1567 column_frame!("a" => [1f64, 2f64, 3f64])
1568 )]
1569 #[case(
1570 column_frame!("a" => [1, 2, 3]),
1571 Key::new("a", crate::DataType::F32),
1572 column_frame!("a" => [1f32, 2f32, 3f32])
1573 )]
1574 fn test_try_fix_dtype(
1580 #[case] mut df: ColumnFrame,
1581 #[case] key: Key,
1582 #[case] expected: ColumnFrame,
1583 ) {
1584 assert!(df.try_fix_column_by_key(&key).is_ok());
1585 assert_eq!(
1586 df.select(Some(&[key.clone()])),
1587 expected.select(Some(&[key.clone()]))
1588 );
1589 }
1590
1591 #[fixture]
1592 fn unknown_df() -> ColumnFrame {
1593 let mut hm: HashMap<String, Vec<DataValue>> = HashMap::new();
1594
1595 hm.insert("a".into(), vec![1u32.into()]);
1596 hm.insert("b".into(), vec![3i64.into()]);
1597 hm.insert("c".into(), vec![1f64.into()]);
1598 hm.insert("d".into(), vec![1u64.into()]);
1599
1600 hm.into()
1601 }
1602 #[rstest]
1603 #[case(stdhashmap!(
1604 "a" => crate::DataType::U32,
1605 "b" => crate::DataType::I64,
1606 "c" => crate::DataType::F64,
1607 "d" => crate::DataType::U64)
1608 )]
1609 fn test_try_fix_dtype_unknown(
1610 mut unknown_df: ColumnFrame,
1611 #[case] dtypes: HashMap<String, crate::DataType>,
1612 ) {
1613 for dtype in dtypes.iter() {
1614 let t: &Key = unknown_df
1615 .keys()
1616 .iter()
1617 .find(|x| x.name() == dtype.0)
1618 .unwrap();
1619 assert_ne!(t.ctype, crate::DataType::Unknown);
1620 }
1621 assert!(unknown_df.try_fix_dtype_for_keys(false).is_ok());
1622 for dtype in dtypes.iter() {
1623 let t: &Key = unknown_df
1624 .keys()
1625 .iter()
1626 .find(|x| x.name() == dtype.0)
1627 .unwrap();
1628 assert_eq!(t.ctype, *dtype.1);
1629 assert!(unknown_df.try_fix_dtype_for_keys(false).is_ok());
1630 }
1631 assert!(unknown_df.try_fix_dtype_for_keys(true).is_ok());
1632 }
1633
1634 #[rstest]
1635 #[case(
1636 column_frame!(Key::new("a", crate::DataType::F32) => [1, 2, 3]),
1637 Key::new("a", crate::DataType::F32),
1638 column_frame!("a" => [1f32, 2f32, 3f32])
1639 )]
1640 #[traced_test]
1641 fn test_try_fix(#[case] mut df: ColumnFrame, #[case] key: Key, #[case] expected: ColumnFrame) {
1642 assert!(df.try_fix_dtype().is_ok());
1643 assert_eq!(
1644 df.select(Some(&[key.clone()])),
1645 expected.select(Some(&[key]))
1646 )
1647 }
1648
1649 #[rstest]
1650 #[traced_test]
1651 fn test_not_key_fix() {
1652 let mut cf = column_frame!("a" => [1]);
1653 let non_existing = Key::new("b", crate::DataType::I32);
1654 assert!(cf.try_fix_column_by_key(&non_existing).is_err());
1655 }
1656
1657 #[rstest]
1658 #[case(
1659 column_frame! {
1660 "a" => [1, 2, 3],
1661 "b" => [4, 5, 6],
1662 "c" => [7, 8, 9]
1663 },
1664 vec!["a_alias", "b", "c"].into_iter().map(|x| x.into()).collect(),
1665 vec![("a", "a_alias")]
1666 )]
1667 #[traced_test]
1668 fn alias_test(
1669 #[case] df: ColumnFrame,
1670 #[case] keys: Vec<Key>,
1671 #[case] aliases: Vec<(&str, &str)>,
1672 ) {
1673 let mut df = df;
1674 for (old, new) in aliases {
1675 df.add_alias(old, new).expect("BUG: cannot rename key");
1676 }
1677 let origin_keys = df.keys().to_vec();
1678 let selected_aliases = df.select(Some(keys.as_slice()));
1679 let selected = df.select(Some(origin_keys.as_slice()));
1680 assert_eq!(selected, selected_aliases);
1681 }
1682
1683 #[rstest]
1684 #[traced_test]
1685 fn test_mut_view() {
1686 let keys: Vec<Key> = vec!["a".into(), "b".into()];
1689 let index = KeyIndex::new(keys.clone());
1690 let data_frame = vec![
1691 Array1::from_vec(vec![
1692 DataValue::from(1f64),
1693 DataValue::from(2f64),
1694 DataValue::from(f64::NAN),
1695 ]),
1696 Array1::from_vec(vec![
1697 DataValue::from(4f32),
1698 DataValue::from(f32::NAN),
1699 DataValue::from(f32::INFINITY),
1700 ]),
1701 ];
1702 let mut df = ColumnFrame::new(index.clone(), data_frame);
1703 for col in &mut df.data_frame {
1704 col.mapv_inplace(|x| match x {
1705 DataValue::F32(f) if f.is_infinite() || f.is_nan() => DataValue::F32(0f32),
1706 DataValue::F64(f) if f.is_infinite() || f.is_nan() => DataValue::F64(0f64),
1707 e => e,
1708 });
1709 }
1710 let expected = ColumnFrame::new(
1711 index,
1712 vec![
1713 Array1::from_vec(vec![
1714 DataValue::from(1f64),
1715 DataValue::from(2f64),
1716 DataValue::from(0f64),
1717 ]),
1718 Array1::from_vec(vec![
1719 DataValue::from(4f32),
1720 DataValue::from(0f32),
1721 DataValue::from(0f32),
1722 ]),
1723 ],
1724 );
1725 assert_eq!(df, expected);
1726 }
1727
1728 #[rstest]
1729 #[traced_test]
1730 fn dummy_test() {
1731 let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into(), "d".into()];
1732 let index = KeyIndex::new(keys.clone());
1733 let data_frame = vec![
1734 Array1::from_vec(vec![DataValue::U32(1)]),
1735 Array1::from_vec(vec![DataValue::I32(2)]),
1736 Array1::from_vec(vec![DataValue::I64(3)]),
1737 Array1::from_vec(vec![DataValue::U64(4)]),
1738 ];
1739
1740 let frame = ColumnFrame::new(index, data_frame);
1741 assert_eq!(
1742 frame.get_by_row_index(&"a".into(), 0),
1743 Some(DataValue::U32(1))
1744 );
1745 assert_eq!(frame.get_by_row_index(&"aa".into(), 0), None);
1746 assert_eq!(frame.get_by_row_index(&"a".into(), 1), None);
1747 assert_eq!(
1748 frame.select(Some(&["a".into(), "b".into()])),
1749 Array2::from_shape_vec((1, 2), vec![DataValue::U32(1), DataValue::I32(2)])
1750 .expect("BUG: cannot create array")
1751 );
1752 }
1753
1754 #[rstest]
1755 #[traced_test]
1756 fn dummy_test_multiple_rows() {
1757 let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into(), "d".into()];
1760 let index = KeyIndex::new(keys.clone());
1761 let data_frame = vec![
1762 Array1::from_vec(vec![DataValue::U32(1), DataValue::U32(12)]),
1763 Array1::from_vec(vec![DataValue::I32(2), DataValue::I32(22)]),
1764 Array1::from_vec(vec![DataValue::I64(3), DataValue::I64(32)]),
1765 Array1::from_vec(vec![DataValue::U64(4), DataValue::U64(42)]),
1766 ];
1767
1768 let frame = ColumnFrame::new(index, data_frame);
1769 assert_eq!(
1770 frame.get_by_row_index(&"a".into(), 0),
1771 Some(DataValue::U32(1))
1772 );
1773 assert_eq!(frame.get_by_row_index(&"aa".into(), 0), None);
1774 assert_eq!(frame.get_by_row_index(&"a".into(), 3), None);
1775 let arr = Array2::from_shape_vec(
1776 (2, 2),
1777 vec![
1778 DataValue::U32(1),
1779 DataValue::I32(2),
1780 DataValue::U32(12),
1781 DataValue::I32(22),
1782 ],
1783 )
1784 .expect("BUG: cannot create array");
1785 trace!("{arr:?}");
1786 assert_eq!(frame.select(Some(&["a".into(), "b".into()])), arr);
1787 }
1788
1789 #[rstest]
1790 #[traced_test]
1791 fn dummy_test_multiple_rows_push() {
1792 let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into(), "d".into()];
1793 let index = KeyIndex::new(keys.clone());
1794 let data_frame = vec![
1795 Array1::from_vec(vec![DataValue::U32(1), DataValue::U32(12)]),
1796 Array1::from_vec(vec![DataValue::I32(2), DataValue::I32(22)]),
1797 Array1::from_vec(vec![DataValue::I64(3), DataValue::I64(32)]),
1798 Array1::from_vec(vec![DataValue::U64(4), DataValue::U64(42)]),
1799 ];
1800
1801 let mut frame = ColumnFrame::new(index, data_frame);
1802 assert!(frame
1803 .push(data_value::stdhashmap!(
1804 "a" => DataValue::U32(2),
1805 "b" => DataValue::I32(3),
1806 "c" => DataValue::I64(4),
1807 "d" => DataValue::U64(5)
1808 ))
1809 .is_ok());
1810 let arr = Array2::from_shape_vec(
1811 (3, 2),
1812 vec![
1813 DataValue::U32(1),
1814 DataValue::I32(2),
1815 DataValue::U32(12),
1816 DataValue::I32(22),
1817 DataValue::U32(2),
1818 DataValue::I32(3),
1819 ],
1820 )
1821 .expect("BUG: cannot create array");
1822 trace!("{arr:?}");
1823 assert_eq!(frame.select(Some(&["a".into(), "b".into()])), arr);
1824 let result = frame.push(data_value::stdhashmap!(
1825 "a" => DataValue::U32(34),
1826 "b" => DataValue::I32(44),
1827 "c" => DataValue::I64(54),
1828 "e" => DataValue::F32(6f32)
1829 ));
1830 assert!(result.is_ok(), "{result:?}");
1831 let arr = Array2::from_shape_vec(
1832 (4, 2),
1833 vec![
1834 DataValue::U64(4),
1835 DataValue::Null,
1836 DataValue::U64(42),
1837 DataValue::Null,
1838 DataValue::U64(5),
1839 DataValue::Null,
1840 DataValue::Null,
1841 DataValue::F32(6f32),
1842 ],
1843 )
1844 .expect("BUG: cannot create array");
1845 trace!("{arr:?}");
1846 assert_eq!(frame.select(Some(&["d".into(), "e".into()])), arr);
1847 }
1848
1849 #[rstest]
1850 #[case(
1851 column_frame! {
1852 "group_id" => vec![1, 2],
1853 "feed_tag" => vec![3, 4]
1854 },
1855 Some(vec![Key::from("group_id")]),
1856 ndarray::array!([1.into()], [2.into()])
1857 )]
1858 #[case(
1859 column_frame! {
1860 "group_id" => vec![1, 2],
1861 "feed_tag" => vec![3, 4]
1862 },
1863 Some(vec!["group_id".into(), "feed_tag".into()]),
1864 ndarray::array!([1.into(), 3.into()], [2.into(), 4.into()])
1865 )]
1866 #[case(
1867 column_frame! {
1868 "group_id" => vec![1, 2],
1869 "feed_tag" => vec![3, DataValue::Null]
1870 },
1871 Some(vec!["feed_tag".into()]),
1872 ndarray::array![[3.into()], [DataValue::Null]]
1873 )]
1874 #[case(
1875 column_frame! {
1876 "group_id" => vec![1, 2],
1877 "feed_tag" => vec![1, DataValue::Null]
1878 },
1879 Some(vec!["feed_tag2".into()]),
1880 Array2::<DataValue>::default((0, 0))
1881 )]
1882 #[traced_test]
1883 fn test_select(
1884 #[case] input: ColumnFrame,
1885 #[case] keys: Option<Vec<Key>>,
1886 #[case] expected: Array2<DataValue>,
1887 ) {
1888 trace!("input={input:?}");
1889 let keys_slice = keys.as_deref();
1890 let selected = input.select(keys_slice);
1891 trace!("selected={selected:?}");
1892 assert_eq!(selected, expected);
1893 let selected = input.select_transposed(keys_slice);
1894 trace!("selected_transposed={selected:?}");
1895 assert!(selected.is_ok());
1896 assert_eq!(selected.unwrap(), expected.t());
1897 }
1898
1899 #[rstest]
1900 #[case(
1901 column_frame! {
1902 "group_id" => vec![1, 2],
1903 "feed_tag" => vec![3, 4]
1904 },
1905 Key::from("group_id"),
1906 Some(ndarray::array!(1.into(), 2.into()))
1907 )]
1908 #[case(
1909 column_frame! {
1910 "group_id" => vec![1, 2, 5, 6],
1911 "feed_tag" => vec![3, 4, 7, 8]
1912 },
1913 Key::from("group_id"),
1914 Some(ndarray::array!(1.into(), 2.into(), 5.into(), 6.into()))
1915 )]
1916 #[case(
1917 column_frame! {
1918 "group_id" => vec![1, 2],
1919 "feed_tag" => vec![1, 1]
1920 },
1921 Key::from("feed_tag1"),
1922 None
1923 )]
1924 #[traced_test]
1925 #[allow(deprecated)]
1926 fn test_select_column(
1927 #[case] input: ColumnFrame,
1928 #[case] key: Key,
1929 #[case] expected: Option<Array1<DataValue>>,
1930 ) {
1931 #[allow(deprecated)]
1932 let selected = input.select_column(&key);
1933 trace!("selected={selected:?}");
1934 match expected {
1935 Some(expected) => {
1936 assert!(selected.is_some());
1937 assert_eq!(selected.expect("BUG: checked above"), expected);
1938 }
1939 None => assert!(selected.is_none()),
1940 }
1941 }
1942
1943 #[test]
1944 #[traced_test]
1945 fn empty_join_test() {
1946 let join = JoinRelation::add_columns();
1947 let mut column_frame = ColumnFrame::default();
1948 column_frame
1949 .add_single_column("group_id", Vec::<DataValue>::new())
1950 .expect("BUG: cannot add column");
1951 let column_frame2 = column_frame! {
1952 "group_id" => vec![2, 1, 3],
1953 "feed_tag" => vec![1, 1, 1],
1954 "clicks" => vec![100, 10, 10],
1955 "imps" => vec![1000, 200, 200]
1956 };
1957 assert!(column_frame.join(ColumnFrame::default(), &join).is_ok());
1958
1959 let joined = column_frame.join(column_frame2, &join);
1960 assert!(joined.is_ok(), "{joined:?}");
1961
1962 trace!("{column_frame:?}");
1963 assert_eq!(
1964 column_frame.select(Some(&[
1965 "group_id".into(),
1966 "feed_tag".into(),
1967 "clicks".into(),
1968 "imps".into()
1969 ])),
1970 ndarray::array!(
1971 [2.into(), 1.into(), 100.into(), 1000.into()],
1972 [1.into(), 1.into(), 10.into(), 200.into()],
1973 [3.into(), 1.into(), 10.into(), 200.into()],
1974 )
1975 );
1976
1977 let mut column_frame2 = column_frame! {
1978 "feed_tag" => vec![1, 1, 1],
1979 "clicks" => vec![100, 10, 10],
1980 "imps" => vec![1000, 200, 200]
1981 };
1982 let mut column_frame = ColumnFrame::default();
1983 column_frame
1984 .add_single_column("group_id", Array1::from_vec(Vec::<DataValue>::new()))
1985 .expect("BUG: cannot add column");
1986 let joined = column_frame2.join(column_frame, &join);
1987 assert!(joined.is_ok(), "{joined:?}");
1988
1989 trace!("{column_frame2:?}");
1990 assert_eq!(
1991 column_frame2.select(Some(&[
1992 "group_id".into(),
1993 "feed_tag".into(),
1994 "clicks".into(),
1995 "imps".into()
1996 ])),
1997 ndarray::array!(
1998 [DataValue::Null, 1.into(), 100.into(), 1000.into()],
1999 [DataValue::Null, 1.into(), 10.into(), 200.into()],
2000 [DataValue::Null, 1.into(), 10.into(), 200.into()],
2001 )
2002 );
2003
2004 let mut column_frame = ColumnFrame::default();
2005 column_frame.index = KeyIndex::new(vec!["group_id2".into()]);
2006 let joined = column_frame2.join(column_frame, &join);
2007 assert!(joined.is_ok(), "{joined:?}");
2008
2009 trace!("{column_frame2:?}");
2010 assert_eq!(
2011 column_frame2.select(Some(&[
2012 "group_id2".into(),
2013 "feed_tag".into(),
2014 "clicks".into(),
2015 "imps".into()
2016 ])),
2017 ndarray::array!(
2018 [DataValue::Null, 1.into(), 100.into(), 1000.into()],
2019 [DataValue::Null, 1.into(), 10.into(), 200.into()],
2020 [DataValue::Null, 1.into(), 10.into(), 200.into()],
2021 )
2022 );
2023 }
2024
2025 #[test]
2026 #[traced_test]
2027 fn join_test_multiple() {
2028 let join = JoinRelation::new(JoinBy::JoinById(JoinById::new(vec!["group_id".into()])));
2029 let mut column_frame = column_frame! {
2030 "group_id" => vec![1, 1, 3]
2031 };
2032 let column_frame2 = column_frame! {
2033 "group_id" => vec![2, 1, 1],
2034 "clicks" => vec![100, 10, 10],
2035 "imps" => vec![1000, 200, 200]
2036 };
2037
2038 let joined = column_frame.join(column_frame2, &join);
2039 assert!(joined.is_ok(), "{joined:?}");
2040
2041 trace!("{column_frame:?}");
2042 assert_eq!(
2043 column_frame.select(Some(&["group_id".into(), "clicks".into(), "imps".into(),])),
2044 ndarray::array!(
2045 [1.into(), 10.into(), 200.into()],
2046 [1.into(), 10.into(), 200.into()],
2047 [3.into(), DataValue::Null, DataValue::Null],
2048 )
2049 )
2050 }
2051
2052 #[test]
2053 #[traced_test]
2054 fn join_test_no_matches() {
2055 let join = JoinRelation::new(JoinBy::JoinById(JoinById::new(vec!["group_id".into()])));
2056 let mut column_frame = column_frame! {
2057 "group_id" => vec![DataValue::I32(1), DataValue::I32(2), DataValue::I32(3)]
2058 };
2059 let column_frame2 = column_frame! {
2060 "group_id" => vec![DataValue::I32(4), DataValue::I32(5), DataValue::I32(6)],
2061 "clicks" => vec![DataValue::I32(100), DataValue::I32(200), DataValue::I32(300)],
2062 };
2063
2064 let joined = column_frame.join(column_frame2, &join);
2065 assert!(joined.is_ok(), "{joined:?}");
2066
2067 trace!("{column_frame:?}");
2068 assert_eq!(
2069 column_frame.select(Some(&["group_id".into(), "clicks".into()])),
2070 ndarray::array!(
2071 [DataValue::I32(1), DataValue::Null],
2072 [DataValue::I32(2), DataValue::Null],
2073 [DataValue::I32(3), DataValue::Null],
2074 )
2075 )
2076 }
2077 #[test]
2078 #[traced_test]
2079 fn join_test() {
2080 let join = JoinRelation::new(JoinBy::JoinById(JoinById::new(vec![
2081 "group_id".into(),
2082 "feed_tag".into(),
2083 ])));
2084 let mut column_frame = column_frame! {
2085 "group_id" => vec![1, 2, 8],
2086 "feed_tag" => vec![1, 1, 10]
2087 };
2088 let column_frame2 = column_frame! {
2089 "group_id" => vec![2, 1, 3],
2090 "feed_tag" => vec![1, 1, 1],
2091 "clicks" => vec![100, 10, 10],
2092 "imps" => vec![1000, 200, 200]
2093 };
2094 assert!(column_frame.join(ColumnFrame::default(), &join).is_ok());
2095
2096 let joined = column_frame.join(column_frame2, &join);
2097 assert!(joined.is_ok(), "{joined:?}");
2098
2099 trace!("{column_frame:?}");
2100 assert_eq!(
2101 column_frame.select(Some(&[
2102 "group_id".into(),
2103 "feed_tag".into(),
2104 "clicks".into(),
2105 "imps".into()
2106 ])),
2107 ndarray::array!(
2108 [1.into(), 1.into(), 10.into(), 200.into()],
2109 [2.into(), 1.into(), 100.into(), 1000.into()],
2110 [8.into(), 10.into(), DataValue::Null, DataValue::Null]
2111 ),
2112 "DF {column_frame:?}"
2113 );
2114 assert_eq!(
2115 column_frame
2116 .select_view(Some(&[
2117 "group_id".into(),
2118 "feed_tag".into(),
2119 "clicks".into(),
2120 "imps".into()
2121 ]))
2122 .unwrap()
2123 .row_view(),
2124 ndarray::array!(
2125 [1.into(), 1.into(), 10.into(), 200.into()],
2126 [2.into(), 1.into(), 100.into(), 1000.into()],
2127 [8.into(), 10.into(), DataValue::Null, DataValue::Null]
2128 )
2129 .view(),
2130 "DF {column_frame:?}"
2131 )
2132 }
2133
2134 #[test]
2135 #[traced_test]
2136 fn join_test_with_additional() {
2137 let join = JoinRelation::new(JoinBy::JoinById(JoinById::new(vec![
2138 "group_id".into(),
2139 "feed_tag".into(),
2140 ])));
2141 let mut column_frame = column_frame! {
2142 "group_id" => vec![1, 2, 8],
2143 "feed_tag" => vec![1, 1, 10],
2144 "clicked" => vec![0, 0, 1]
2145 };
2146 let column_frame2 = column_frame! {
2147 "group_id" => vec![2, 1, 3],
2148 "feed_tag" => vec![1, 1, 1],
2149 "clicks" => vec![100, 10, 10],
2150 "imps" => vec![1000, 200, 200]
2151 };
2152 assert!(column_frame.join(ColumnFrame::default(), &join).is_ok());
2153
2154 let joined = column_frame.join(column_frame2, &join);
2155 assert!(joined.is_ok(), "{joined:?}");
2156
2157 trace!("{column_frame:?}");
2158 assert_eq!(
2159 column_frame.select(Some(&[
2160 "group_id".into(),
2161 "feed_tag".into(),
2162 "clicks".into(),
2163 "imps".into(),
2164 "clicked".into()
2165 ])),
2166 ndarray::array!(
2167 [1.into(), 1.into(), 10.into(), 200.into(), 0.into()],
2168 [2.into(), 1.into(), 100.into(), 1000.into(), 0.into()],
2169 [
2170 8.into(),
2171 10.into(),
2172 DataValue::Null,
2173 DataValue::Null,
2174 1.into()
2175 ]
2176 )
2177 );
2178 assert_eq!(
2179 column_frame
2180 .select_view(Some(&[
2181 "group_id".into(),
2182 "feed_tag".into(),
2183 "clicks".into(),
2184 "imps".into(),
2185 "clicked".into()
2186 ]))
2187 .unwrap()
2188 .row_view(),
2189 ndarray::array!(
2190 [1.into(), 1.into(), 10.into(), 200.into(), 0.into()],
2191 [2.into(), 1.into(), 100.into(), 1000.into(), 0.into()],
2192 [
2193 8.into(),
2194 10.into(),
2195 DataValue::Null,
2196 DataValue::Null,
2197 1.into()
2198 ]
2199 )
2200 .view(),
2201 "DF {column_frame:?}"
2202 );
2203 }
2204
2205 #[test]
2206 #[traced_test]
2207 fn join_test_with_additional_single() {
2208 let join = JoinRelation::new(JoinBy::JoinById(JoinById::new(vec![
2209 "group_id".into(),
2210 "feed_tag".into(),
2211 ])));
2212 let mut column_frame = column_frame! {
2213 "group_id" => vec![1, 2, 8],
2214 "feed_tag" => vec![1, 1, 10],
2215 "clicked" => vec![0, 0, 1]
2216 };
2217 let column_frame2 = column_frame! {
2218 "a" => vec![1],
2219 "group_id" => vec![2],
2220 "feed_tag" => vec![1],
2221 "clicks" => vec![10],
2222 "imps" => vec![200]
2223 };
2224 assert!(column_frame.join(ColumnFrame::default(), &join).is_ok());
2225
2226 let joined = column_frame.join(column_frame2, &join);
2227 assert!(joined.is_ok(), "{joined:?}");
2228
2229 trace!("{column_frame:?}");
2230 assert_eq!(
2231 column_frame.select(Some(&[
2232 "group_id".into(),
2233 "feed_tag".into(),
2234 "clicks".into(),
2235 "imps".into(),
2236 "clicked".into()
2237 ])),
2238 ndarray::array!(
2239 [
2240 1.into(),
2241 1.into(),
2242 DataValue::Null,
2243 DataValue::Null,
2244 0.into(),
2245 ],
2246 [2.into(), 1.into(), 10.into(), 200.into(), 0.into()],
2247 [
2248 8.into(),
2249 10.into(),
2250 DataValue::Null,
2251 DataValue::Null,
2252 1.into()
2253 ]
2254 )
2255 )
2256 }
2257
2258 #[rstest]
2259 #[traced_test]
2260 fn cartesian_product_join() {
2261 let mut df = column_frame! {
2262 "group_id" => vec![1, 2, 3],
2263 "feed_tag" => vec![1, 2, 3]
2264 };
2265 let df2 = column_frame! {
2266 "zone_id" => vec![111111, 111133],
2267 "zone_avg_ctr" => vec![0.1, 0.001]
2268 };
2269 assert!(df
2270 .join(
2271 ColumnFrame::default(),
2272 &JoinRelation::new(JoinBy::CartesianProduct)
2273 )
2274 .is_ok());
2275 let join = JoinRelation::new(JoinBy::CartesianProduct);
2276 let result = df.join(df2, &join);
2277 assert!(result.is_ok(), "{result:?}");
2278 let selected = df.select(None);
2279 trace!("{selected:?}");
2280 assert_eq!(
2281 selected,
2282 ndarray::array!(
2283 [1.into(), 1.into(), 111111.into(), 0.1.into()],
2284 [1.into(), 1.into(), 111133.into(), 0.001.into()],
2285 [2.into(), 2.into(), 111111.into(), 0.1.into()],
2286 [2.into(), 2.into(), 111133.into(), 0.001.into()],
2287 [3.into(), 3.into(), 111111.into(), 0.1.into()],
2288 [3.into(), 3.into(), 111133.into(), 0.001.into()],
2289 )
2290 );
2291
2292 let df2 = column_frame! {
2293 "zone_id" => vec![111]
2294 };
2295 let result = df.join(df2, &join);
2296 assert!(result.is_ok(), "{result:?}");
2297 let selected = df.select(None);
2298 trace!("{selected:?}");
2299 assert_eq!(
2300 selected,
2301 ndarray::array!(
2302 [1.into(), 1.into(), 111111.into(), 0.1.into(), 111.into()],
2303 [1.into(), 1.into(), 111133.into(), 0.001.into(), 111.into()],
2304 [2.into(), 2.into(), 111111.into(), 0.1.into(), 111.into()],
2305 [2.into(), 2.into(), 111133.into(), 0.001.into(), 111.into()],
2306 [3.into(), 3.into(), 111111.into(), 0.1.into(), 111.into()],
2307 [3.into(), 3.into(), 111133.into(), 0.001.into(), 111.into()],
2308 )
2309 );
2310 }
2311
2312 #[rstest]
2313 #[traced_test]
2314 fn broadcast_join() {
2315 let mut df = column_frame! {
2316 "group_id" => vec![1, 2, 3],
2317 "feed_tag" => vec![1, 2, 3]
2318 };
2319 let df2 = column_frame! {
2320 "zone_id" => vec![111111]
2321 };
2322 assert!(df
2323 .join(
2324 ColumnFrame::default(),
2325 &JoinRelation::new(JoinBy::Broadcast)
2326 )
2327 .is_ok());
2328 let join = JoinRelation::new(JoinBy::Broadcast);
2329 assert!(df.join(df2, &join).is_ok());
2330 let selected = df.select(None);
2331 trace!("{selected:?}");
2332 assert_eq!(
2333 selected,
2334 ndarray::array!(
2335 [1.into(), 1.into(), 111111.into()],
2336 [2.into(), 2.into(), 111111.into()],
2337 [3.into(), 3.into(), 111111.into()]
2338 )
2339 );
2340 }
2341 #[rstest]
2342 #[traced_test]
2343 fn merge_test() {
2344 let mut df = column_frame! {
2345 "group_id" => vec![1, 2, 3],
2346 "feed_tag" => vec![1, 2, 3]
2347 };
2348 let df2 = column_frame! {
2349 "group_id" => vec![11, 21, 31],
2350 "feed_tag" => vec![12, 22, 32]
2351 };
2352
2353 let join = JoinRelation::new(JoinBy::Replace);
2354 assert!(df.join(df2, &join).is_ok());
2355 let selected = df.select(None);
2356 trace!("{selected:?}");
2357 assert_eq!(
2358 selected,
2359 ndarray::array!(
2360 [11.into(), 12.into()],
2361 [21.into(), 22.into()],
2362 [31.into(), 32.into()]
2363 )
2364 );
2365 }
2366
2367 #[rstest]
2368 #[traced_test]
2369 fn extend_test() {
2370 let mut df = column_frame! {
2371 "group_id" => vec![1, 2, 3],
2372 "feed_tag" => vec![1, 2, 3]
2373 };
2374 let df2 = column_frame! {
2375 "group_id" => vec![11, 21, 31],
2376 "feed_tag" => vec![5, 6, 7]
2377 };
2378 assert!(df
2379 .join(ColumnFrame::default(), &JoinRelation::new(JoinBy::Extend))
2380 .is_ok());
2381
2382 let join = JoinRelation::new(JoinBy::Extend);
2383 assert!(df.join(df2, &join).is_ok());
2384 let selected = df.select(Some(&["feed_tag".into(), "group_id".into()]));
2385 trace!("{selected:?}");
2386 assert_eq!(
2387 selected,
2388 ndarray::array!(
2389 [1.into(), 1.into()],
2390 [2.into(), 2.into()],
2391 [3.into(), 3.into()],
2392 [5.into(), 11.into()],
2393 [6.into(), 21.into()],
2394 [7.into(), 31.into()]
2395 )
2396 );
2397 let as_map = df.select_as_map(Some(&["feed_tag".into(), "group_id".into()]));
2398 trace!("{as_map:?}");
2399 assert_eq!(
2400 as_map,
2401 stdhashmap!(
2402 "feed_tag" => vec![1, 2, 3, 5, 6, 7],
2403 "group_id" => vec![1, 2, 3, 11, 21, 31]
2404 )
2405 );
2406
2407 let as_map = df.select_as_map(Some(&["feed_tag1".into()]));
2408 trace!("{as_map:?}");
2409 assert_eq!(as_map, HashMap::default());
2410 }
2411
2412 #[rstest]
2413 #[traced_test]
2414 fn extend_test_with_non_existing_cols() {
2415 let mut df = column_frame! {
2416 "group_id" => vec![1, 2, 3],
2417 "feed_tag" => vec![1, 2, 3]
2418 };
2419 let mut df2 = column_frame! {
2420 "group_id" => vec![11, 21, 31],
2421 "feed_tag" => vec![5, 6, 7],
2422 "clicks" => vec![100, 200, 300],
2423 "impressions" => vec![1000, 2000, 3000]
2424 };
2425 let df_bckp = df.clone();
2426 let join = JoinRelation::new(JoinBy::Extend);
2427 assert!(df.join(df2.clone(), &join).is_ok());
2428 let selected = df.select(None);
2429 trace!("{selected:?}");
2430 assert_eq!(
2431 selected,
2432 ndarray::array!(
2433 [1.into(), 1.into(), DataValue::Null, DataValue::Null],
2434 [2.into(), 2.into(), DataValue::Null, DataValue::Null],
2435 [3.into(), 3.into(), DataValue::Null, DataValue::Null],
2436 [11.into(), 5.into(), 100.into(), 1000.into()],
2437 [21.into(), 6.into(), 200.into(), 2000.into()],
2438 [31.into(), 7.into(), 300.into(), 3000.into()]
2439 )
2440 );
2441 let join = JoinRelation::new(JoinBy::Extend);
2442 let r = df2.join(df_bckp, &join);
2443 assert!(r.is_ok(), "{r:?}");
2444 let selected = df2.select(None);
2445 trace!("{selected:?}");
2446 assert_eq!(
2447 selected,
2448 ndarray::array!(
2449 [11.into(), 5.into(), 100.into(), 1000.into()],
2450 [21.into(), 6.into(), 200.into(), 2000.into()],
2451 [31.into(), 7.into(), 300.into(), 3000.into()],
2452 [1.into(), 1.into(), DataValue::Null, DataValue::Null],
2453 [2.into(), 2.into(), DataValue::Null, DataValue::Null],
2454 [3.into(), 3.into(), DataValue::Null, DataValue::Null]
2455 )
2456 );
2457 }
2458
2459 #[rstest]
2460 #[traced_test]
2461 fn extend_test_with_non_existing_cols_wrong_order() {
2462 let mut df = column_frame! {
2463 "group_id" => vec![1, 2, 3],
2464 "feed_tag" => vec![1, 2, 3]
2465 };
2466 let df2 = column_frame! {
2467 "feed_tag" => vec![5, 6, 7],
2468 "group_id" => vec![11, 21, 31]
2469 };
2470 let join = JoinRelation::new(JoinBy::Extend);
2471 let err = df.join(df2, &join);
2472 assert!(err.is_ok(), "{err:?}");
2473
2474 assert_eq!(df.nrows(), 6);
2475 assert_eq!(
2476 df.select(Some(&["group_id".into(), "feed_tag".into()])),
2477 ndarray::array![
2478 [1.into(), 1.into()],
2479 [2.into(), 2.into()],
2480 [3.into(), 3.into()],
2481 [11.into(), 5.into()],
2482 [21.into(), 6.into()],
2483 [31.into(), 7.into()],
2484 ]
2485 );
2486 }
2487
2488 #[rstest]
2489 #[traced_test]
2490 fn extend_test_wrong_order_with_extra_columns() {
2491 let mut df = column_frame! {
2492 "a" => vec![1, 2],
2493 "b" => vec![10, 20]
2494 };
2495 let df2 = column_frame! {
2496 "c" => vec![100, 200],
2497 "a" => vec![3, 4]
2498 };
2499 let join = JoinRelation::new(JoinBy::Extend);
2500 assert!(df.join(df2, &join).is_ok());
2501
2502 assert_eq!(df.nrows(), 4);
2503 assert_eq!(
2504 df.select(Some(&["a".into(), "b".into(), "c".into()])),
2505 ndarray::array![
2506 [1.into(), 10.into(), DataValue::Null],
2507 [2.into(), 20.into(), DataValue::Null],
2508 [3.into(), DataValue::Null, 100.into()],
2509 [4.into(), DataValue::Null, 200.into()],
2510 ]
2511 );
2512 }
2513
2514 #[rstest]
2515 #[traced_test]
2516 fn test_replace_not_compatible() {
2517 let mut df = column_frame! {
2518 "group_id" => vec![1, 2, 3],
2519 "feed_tag" => vec![1, 2, 3]
2520 };
2521 let df2 = column_frame! {
2522 "feed_tag" => vec![5, 6],
2523 "group_id" => vec![11, 21]
2524 };
2525 let join = JoinRelation::new(JoinBy::Replace);
2526 let err = df.join(df2, &join);
2527 assert!(err.is_err(), "{err:?}");
2528 let empty = ColumnFrame::default();
2529 let err = df.join(empty, &join);
2530 assert!(err.is_ok(), "{err:?}");
2531 }
2532
2533 #[rstest]
2534 #[traced_test]
2535 fn test_different_data() {
2536 let mut df = column_frame! {
2537 "group_id" => vec![1, 2, 3],
2538 "feed_tag" => vec![1, 2, 3]
2539 };
2540 let df2 = column_frame! {
2541 "group_id" => vec![11, 21],
2542 "a" => vec![5, 6]
2543 };
2544 let join = JoinRelation::new(JoinBy::Extend);
2545 let err = df.join(df2, &join);
2546 assert!(err.is_ok(), "{err:?}");
2547 println!("{df:?}");
2548 let expected_df = ColumnFrame::new(
2549 KeyIndex::from(vec!["group_id".into(), "feed_tag".into(), "a".into()]),
2550 vec![
2551 Array1::from_vec(vec![1.into(), 2.into(), 3.into(), 11.into(), 21.into()]),
2552 Array1::from_vec(vec![
2553 1.into(),
2554 2.into(),
2555 3.into(),
2556 DataValue::Null,
2557 DataValue::Null,
2558 ]),
2559 Array1::from_vec(vec![
2560 DataValue::Null,
2561 DataValue::Null,
2562 DataValue::Null,
2563 5.into(),
2564 6.into(),
2565 ]),
2566 ],
2567 );
2568 assert_eq!(df, expected_df)
2569 }
2570
2571 #[rstest]
2572 #[traced_test]
2573 fn serde_column_frame() {
2574 let df = column_frame! {
2575 "group_id" => vec![1u64, 2u64, 3u64],
2576 "feed_tag" => vec![1u64, 2u64, 3u64]
2577 };
2578 let key_idx = df.index.clone();
2579 let serialized = serde_json::to_string(&key_idx).expect("BUG: cannot serialize");
2580 let deserialized: KeyIndex =
2581 serde_json::from_str(&serialized).expect("BUG: cannot deserialize");
2582 assert_eq!(key_idx, deserialized);
2583 assert!(key_idx.get_key(0).is_some_and(|x| x == "group_id".into()));
2584 let serialized = serde_json::to_string(&df).expect("BUG: cannot serialize");
2585 let deserialized: ColumnFrame =
2586 serde_json::from_str(&serialized).expect("BUG: cannot deserialize");
2587 assert_eq!(df, deserialized);
2588 }
2589
2590 #[rstest]
2591 #[traced_test]
2592 fn update_value() {
2593 let mut df = column_frame! {
2594 "group_id" => vec![1, 2, 3],
2595 "feed_tag" => vec![1, 2, 3]
2596 };
2597 let group_id: Key = "group_id".into();
2598 let v = df.get_by_row_index(&group_id, 1);
2599 assert_eq!(v, Some(DataValue::I32(2)));
2600 df.set_by_row_index(&group_id, 1, DataValue::I32(22))
2601 .expect("set must succeed");
2602 let v = df.get_by_row_index(&group_id, 1);
2603 assert_eq!(v, Some(DataValue::I32(22)));
2604
2605 assert!(df
2606 .set_by_row_index(&"group_id2".into(), 1, DataValue::Null)
2607 .is_err());
2608 }
2609
2610 #[rstest]
2611 fn get_single_column_typed_f64() {
2612 let df = column_frame! {
2613 "a" => [1i32, 2i32, 3i32],
2614 "b" => [10u64, 20u64, 30u64]
2615 };
2616 let key: Key = "a".into();
2617 let col = df.get_single_column_typed::<f64>(&key).unwrap();
2618 assert_eq!(col, ndarray::arr1(&[1.0, 2.0, 3.0]));
2619 }
2620
2621 #[rstest]
2622 fn get_single_column_typed_i64() {
2623 let df = column_frame! {
2624 "x" => [10u32, 20u32, 30u32]
2625 };
2626 let key: Key = "x".into();
2627 let col = df.get_single_column_typed::<i64>(&key).unwrap();
2628 assert_eq!(col, ndarray::arr1(&[10i64, 20i64, 30i64]));
2629 }
2630
2631 #[rstest]
2632 fn get_single_column_typed_string() {
2633 let df = column_frame! {
2634 "name" => ["alice", "bob", "carol"]
2635 };
2636 let key: Key = "name".into();
2637 let col = df.get_single_column_typed::<String>(&key).unwrap();
2638 assert_eq!(
2639 col,
2640 ndarray::arr1(&["alice".to_string(), "bob".to_string(), "carol".to_string()])
2641 );
2642 }
2643
2644 #[rstest]
2645 fn get_single_column_typed_bool() {
2646 let df = column_frame! {
2647 "flag" => [1i32, 0i32, 1i32]
2648 };
2649 let key: Key = "flag".into();
2650 let col = df.get_single_column_typed::<bool>(&key).unwrap();
2651 assert_eq!(col, ndarray::arr1(&[true, false, true]));
2652 }
2653
2654 #[rstest]
2655 fn get_single_column_typed_missing_key_returns_none() {
2656 let df = column_frame! {
2657 "a" => [1, 2, 3]
2658 };
2659 let missing: Key = "nonexistent".into();
2660 assert!(df.get_single_column_typed::<f64>(&missing).is_none());
2661 }
2662
2663 #[rstest]
2664 fn get_single_column_typed_numeric_coercion_from_mixed() {
2665 let df = column_frame! {
2666 "vals" => [1.5f64, 2.7f64, 3.9f64]
2667 };
2668 let key: Key = "vals".into();
2669 let col = df.get_single_column_typed::<i32>(&key).unwrap();
2671 assert_eq!(col, ndarray::arr1(&[1i32, 2i32, 3i32]));
2672 }
2673
2674 #[rstest]
2675 fn get_single_column_typed_selects_correct_column() {
2676 let df = column_frame! {
2677 "x" => [1, 2, 3],
2678 "y" => [10, 20, 30],
2679 "z" => [100, 200, 300]
2680 };
2681 let key: Key = "y".into();
2682 let col = df.get_single_column_typed::<i64>(&key).unwrap();
2683 assert_eq!(col, ndarray::arr1(&[10i64, 20i64, 30i64]));
2684 }
2685
2686 #[rstest]
2687 fn get_single_column_typed_u64_identity() {
2688 let df = column_frame! {
2689 "id" => [100u64, 200u64, 300u64]
2690 };
2691 let key: Key = "id".into();
2692 let col = df.get_single_column_typed::<u64>(&key).unwrap();
2693 assert_eq!(col, ndarray::arr1(&[100u64, 200u64, 300u64]));
2694 }
2695
2696 #[rstest]
2697 fn get_single_column_typed_single_row() {
2698 let df = column_frame! {
2699 "x" => [42i32]
2700 };
2701 let key: Key = "x".into();
2702 let col = df.get_single_column_typed::<f64>(&key).unwrap();
2703 assert_eq!(col, ndarray::arr1(&[42.0f64]));
2704 }
2705
2706 #[rstest]
2707 fn get_single_column_typed_empty_frame() {
2708 let df = ColumnFrame::default();
2709 let key: Key = "x".into();
2710 let col = df.get_single_column_typed::<f64>(&key);
2711 assert!(col.is_none());
2712 }
2713
2714 #[rstest]
2715 fn select_typed_all_columns() {
2716 let df = column_frame! {
2717 "a" => [1i32, 2i32],
2718 "b" => [3i32, 4i32]
2719 };
2720 let result = df.select_typed::<f64>(None);
2721 assert_eq!(result.nrows(), 2);
2722 assert_eq!(result.ncols(), 2);
2723 assert_eq!(result[[0, 0]], 1.0);
2724 assert_eq!(result[[0, 1]], 3.0);
2725 assert_eq!(result[[1, 0]], 2.0);
2726 assert_eq!(result[[1, 1]], 4.0);
2727 }
2728
2729 #[rstest]
2730 fn select_typed_subset_of_columns() {
2731 let df = column_frame! {
2732 "a" => [10u64, 20u64],
2733 "b" => [30u64, 40u64],
2734 "c" => [50u64, 60u64]
2735 };
2736 let keys: Vec<Key> = vec!["a".into(), "c".into()];
2737 let result = df.select_typed::<i64>(Some(&keys));
2738 assert_eq!(result.nrows(), 2);
2739 assert_eq!(result.ncols(), 2);
2740 assert_eq!(result[[0, 0]], 10i64);
2741 assert_eq!(result[[0, 1]], 50i64);
2742 assert_eq!(result[[1, 0]], 20i64);
2743 assert_eq!(result[[1, 1]], 60i64);
2744 }
2745
2746 #[rstest]
2747 fn select_typed_nonexistent_keys_returns_empty() {
2748 let df = column_frame! {
2749 "a" => [1i32, 2i32]
2750 };
2751 let keys: Vec<Key> = vec!["z".into()];
2752 let result = df.select_typed::<f64>(Some(&keys));
2753 assert_eq!(result.shape(), &[0, 0]);
2754 }
2755
2756 #[rstest]
2757 fn select_typed_string_extraction() {
2758 let df = column_frame! {
2759 "name" => ["hello", "world"]
2760 };
2761 let result = df.select_typed::<String>(None);
2762 assert_eq!(result[[0, 0]], "hello");
2763 assert_eq!(result[[1, 0]], "world");
2764 }
2765
2766 #[rstest]
2767 fn select_typed_matches_manual_mapv() {
2768 let df = column_frame! {
2769 "x" => [1i32, 2i32, 3i32],
2770 "y" => [4i32, 5i32, 6i32]
2771 };
2772 let typed = df.select_typed::<f64>(None);
2773 let manual = df.select(None).mapv(|v| f64::extract(&v));
2774 assert_eq!(typed, manual);
2775 }
2776
2777 #[rstest]
2781 #[traced_test]
2782 fn select_vec_view_all_columns() {
2783 let cf = column_frame! {
2784 "a" => [1i32, 2i32, 3i32],
2785 "b" => [4i32, 5i32, 6i32]
2786 };
2787 let cols = cf.select_vec_view(None).expect("should succeed");
2788 assert_eq!(cols.len(), 2, "must return one borrow per column");
2789 let a = cols[0].expect("column 'a' present");
2790 let b = cols[1].expect("column 'b' present");
2791 assert_eq!(a.len(), 3, "each column has 3 rows");
2792 assert_eq!(b.len(), 3);
2793 assert_eq!(
2795 a.to_vec(),
2796 vec![
2797 DataValue::from(1i32),
2798 DataValue::from(2i32),
2799 DataValue::from(3i32),
2800 ]
2801 );
2802 assert_eq!(
2803 b.to_vec(),
2804 vec![
2805 DataValue::from(4i32),
2806 DataValue::from(5i32),
2807 DataValue::from(6i32),
2808 ]
2809 );
2810 }
2811
2812 #[rstest]
2814 #[traced_test]
2815 fn select_vec_view_subset_in_order() {
2816 let cf = column_frame! {
2817 "a" => [10i32, 20i32],
2818 "b" => [30i32, 40i32],
2819 "c" => [50i32, 60i32]
2820 };
2821 let cols = cf
2823 .select_vec_view(Some(&["c".into(), "a".into()]))
2824 .expect("should succeed");
2825 assert_eq!(cols.len(), 2);
2826 assert_eq!(
2827 cols[0].expect("first col is 'c'").to_vec(),
2828 vec![DataValue::from(50i32), DataValue::from(60i32)],
2829 );
2830 assert_eq!(
2831 cols[1].expect("second col is 'a'").to_vec(),
2832 vec![DataValue::from(10i32), DataValue::from(20i32)],
2833 );
2834 }
2835
2836 #[rstest]
2838 #[traced_test]
2839 fn select_vec_view_single_column() {
2840 let cf = column_frame! {
2841 "x" => [7i32, 8i32, 9i32],
2842 "y" => [1i32, 2i32, 3i32]
2843 };
2844 let cols = cf
2845 .select_vec_view(Some(&["x".into()]))
2846 .expect("should succeed");
2847 assert_eq!(cols.len(), 1);
2848 assert_eq!(
2849 cols[0].expect("column 'x' present").to_vec(),
2850 vec![
2851 DataValue::from(7i32),
2852 DataValue::from(8i32),
2853 DataValue::from(9i32),
2854 ]
2855 );
2856 }
2857
2858 #[rstest]
2860 #[traced_test]
2861 fn select_vec_view_unknown_keys_returns_err() {
2862 let cf = column_frame! {
2863 "a" => [1i32, 2i32]
2864 };
2865 let result = cf.select_vec_view(Some(&["nonexistent".into()]));
2866 assert!(
2867 result.is_err(),
2868 "should error on unknown key, got {result:?}"
2869 );
2870 }
2871
2872 #[rstest]
2874 #[traced_test]
2875 fn select_vec_view_empty_keys_returns_err() {
2876 let cf = column_frame! {
2877 "a" => [1i32, 2i32]
2878 };
2879 let result = cf.select_vec_view(Some(&[]));
2880 assert!(result.is_err(), "empty slice should return an error");
2881 }
2882
2883 #[rstest]
2885 #[traced_test]
2886 fn select_vec_view_preserves_nulls() {
2887 let cf = column_frame! {
2888 "v" => [DataValue::Null, DataValue::from(42i32), DataValue::Null]
2889 };
2890 let cols = cf.select_vec_view(None).expect("should succeed");
2891 assert_eq!(cols.len(), 1);
2892 assert_eq!(
2893 cols[0].expect("column 'v' present").to_vec(),
2894 vec![DataValue::Null, DataValue::from(42i32), DataValue::Null]
2895 );
2896 }
2897
2898 #[rstest]
2902 #[traced_test]
2903 fn select_view_all_columns_shape() {
2904 let cf = column_frame! {
2905 "a" => [1i32, 2i32, 3i32],
2906 "b" => [4i32, 5i32, 6i32]
2907 };
2908 let mv = cf.select_view(None).expect("should succeed");
2909 let rv = mv.row_view();
2911 assert_eq!(rv.nrows(), 3, "nrows should be 3");
2912 assert_eq!(rv.ncols(), 2, "ncols should be 2");
2913 }
2914
2915 #[rstest]
2917 #[traced_test]
2918 fn select_view_subset_shape() {
2919 let cf = column_frame! {
2920 "a" => [10i32, 20i32],
2921 "b" => [30i32, 40i32],
2922 "c" => [50i32, 60i32]
2923 };
2924 let mv = cf
2925 .select_view(Some(&["a".into(), "c".into()]))
2926 .expect("should succeed");
2927 let rv = mv.row_view();
2928 assert_eq!(rv.nrows(), 2);
2929 assert_eq!(rv.ncols(), 2);
2930 }
2931
2932 #[rstest]
2938 #[traced_test]
2939 fn select_view_data_matches_select() {
2940 let cf = column_frame! {
2941 "p" => [1i32, 2i32],
2942 "q" => [3i32, 4i32]
2943 };
2944 let keys: &[Key] = &["p".into(), "q".into()];
2945 let mv = cf.select_view(Some(keys)).expect("should succeed");
2946 let view_data = mv.row_view().to_owned();
2947 let select_data = cf.select(Some(keys));
2948 assert_eq!(view_data, select_data);
2949 }
2950
2951 #[rstest]
2953 #[traced_test]
2954 fn select_view_unknown_keys_returns_err() {
2955 let cf = column_frame! {
2956 "a" => [1i32, 2i32]
2957 };
2958 let result = cf.select_view(Some(&["does_not_exist".into()]));
2959 assert!(result.is_err(), "unknown key should return an error");
2960 }
2961
2962 #[rstest]
2964 #[traced_test]
2965 fn select_view_empty_keys_returns_err() {
2966 let cf = column_frame! {
2967 "a" => [1i32, 2i32]
2968 };
2969 let result = cf.select_view(Some(&[]));
2970 assert!(result.is_err(), "empty slice should return an error");
2971 }
2972
2973 #[rstest]
2978 #[traced_test]
2979 fn select_view_single_column() {
2980 let cf = column_frame! {
2981 "only" => [5i32, 6i32, 7i32, 8i32]
2982 };
2983 let mv = cf
2984 .select_view(Some(&["only".into()]))
2985 .expect("should succeed");
2986 let rv = mv.row_view();
2987 assert_eq!(rv.nrows(), 4, "four rows after transposing single column");
2988 assert_eq!(rv.ncols(), 1, "one column");
2989 }
2990
2991 #[test]
2996 fn serde_v1_array2_roundtrip() {
2997 #[derive(Serialize)]
2998 struct WireV1 {
2999 index: KeyIndex,
3000 data_frame: Array2<DataValue>,
3001 }
3002
3003 let index = KeyIndex::new(vec![
3004 Key::new("a", crate::DataType::I32),
3005 Key::new("b", crate::DataType::String),
3006 ]);
3007 let data_frame = Array2::from_shape_vec(
3008 (2, 2),
3009 vec![
3010 DataValue::I32(1),
3011 DataValue::String("x".into()),
3012 DataValue::I32(2),
3013 DataValue::String("y".into()),
3014 ],
3015 )
3016 .unwrap();
3017 let wire = WireV1 { index, data_frame };
3018 let bytes = rmp_serde::to_vec(&wire).unwrap();
3019 let deserialized: ColumnFrame = rmp_serde::from_slice(&bytes).unwrap();
3020
3021 assert_eq!(deserialized.nrows(), 2);
3022 assert_eq!(deserialized.ncolumns(), 2);
3023 assert_eq!(
3024 deserialized.get_column(&"a".into()).unwrap().get(0),
3025 Some(DataValue::I32(1))
3026 );
3027 assert_eq!(
3028 deserialized.get_column(&"a".into()).unwrap().get(1),
3029 Some(DataValue::I32(2))
3030 );
3031 assert_eq!(
3032 deserialized.get_column(&"b".into()).unwrap().get(0),
3033 Some(DataValue::String("x".into()))
3034 );
3035 }
3036
3037 #[test]
3038 fn serde_v2_vec_typed_data_roundtrip() {
3039 #[derive(Serialize)]
3040 struct WireV2 {
3041 index: KeyIndex,
3042 data_frame: Vec<TypedData>,
3043 }
3044
3045 let index = KeyIndex::new(vec![
3046 Key::new("c", crate::DataType::F64),
3047 Key::new("d", crate::DataType::Bool),
3048 ]);
3049 let data_frame = vec![
3050 TypedData::from(vec![1.5f64, 2.5]),
3051 TypedData::from(vec![true, false]),
3052 ];
3053 let wire = WireV2 { index, data_frame };
3054 let bytes = rmp_serde::to_vec(&wire).unwrap();
3055 let deserialized: ColumnFrame = rmp_serde::from_slice(&bytes).unwrap();
3056
3057 assert_eq!(deserialized.nrows(), 2);
3058 assert_eq!(deserialized.ncolumns(), 2);
3059 assert_eq!(
3060 deserialized.get_column(&"c".into()).unwrap().get(0),
3061 Some(DataValue::F64(1.5))
3062 );
3063 assert_eq!(
3064 deserialized.get_column(&"d".into()).unwrap().get(1),
3065 Some(DataValue::Bool(false))
3066 );
3067 }
3068
3069 #[test]
3070 fn serde_v3_current_format_roundtrip() {
3071 let cf = column_frame! {
3072 "x" => [100i32, 200i32, 300i32],
3073 "y" => [true, false, true]
3074 };
3075 let bytes = rmp_serde::to_vec(&cf).unwrap();
3076 let deserialized: ColumnFrame = rmp_serde::from_slice(&bytes).unwrap();
3077
3078 assert_eq!(deserialized.nrows(), 3);
3079 assert_eq!(deserialized.ncolumns(), 2);
3080 let col_x = deserialized.get_column(&"x".into()).unwrap();
3081 assert_eq!(col_x.len(), 3);
3082 let col_y = deserialized.get_column(&"y".into()).unwrap();
3083 assert_eq!(col_y.get(0), Some(DataValue::Bool(true)));
3084 assert_eq!(col_y.get(1), Some(DataValue::Bool(false)));
3085 }
3086}