Skip to main content

trs_dataframe/
dataframe.rs

1use column_store::sorted_df::SortedDataFrame;
2use data_value::{DataValue, Extract};
3use halfbrown::HashMap;
4use ndarray::{Array1, Array2};
5use std::fmt;
6/// Column-oriented storage, typed arrays, and frame operations.
7pub mod column_store;
8/// Row-level hashing and indexing utilities.
9pub mod index;
10/// Join strategies and relation descriptors.
11pub mod join;
12/// Column key type with name, id, and data-type metadata.
13pub mod key;
14use crate::{error::Error, CandidateData};
15#[cfg(feature = "python")]
16pub mod python;
17
18#[cfg(feature = "python")]
19use pyo3::prelude::*;
20
21use crate::{
22    dataframe::{
23        column_store::typed_array::TypedDataArray, column_store::ColumnFrame,
24        column_store::MaybeView, join::JoinRelation, key::Key,
25    },
26    MLChefMap,
27};
28
29/// Controls how many rows to take from a sorted dataframe.
30///
31/// Used with [`SortedDataFrame::topn`] to retrieve a fixed number of rows
32/// from the top or bottom of a sorted result.
33#[derive(Debug, Clone, PartialEq, Eq, Copy)]
34pub enum TopN {
35    /// Take the first `n` rows (smallest values).
36    First(usize),
37    /// Take the last `n` rows (largest values).
38    Last(usize),
39}
40
41/// User-facing dataframe: a [`ColumnFrame`] with attached constants and
42/// metadata.
43///
44/// # Storage
45/// The underlying [`ColumnFrame`] is column-oriented — each column lives in
46/// its own [`crate::TypedData`] variant. Materialize a 2-D view
47/// with [`DataFrame::select`] (row-major) or
48/// [`DataFrame::select_view`] (zero-copy where possible).
49///
50/// # Example
51/// ```
52/// use trs_dataframe::{DataFrame, column_frame};
53///
54/// let df = DataFrame::new(column_frame! {
55///     "a" => [1, 2, 3],
56///     "b" => [4, 5, 6]
57/// });
58///
59/// // Materialize all columns as a row-major 2-D array (rows × columns).
60/// let all_data = df.select(None);
61///
62/// // Materialize a specific subset of columns.
63/// let keys = vec!["a".into(), "b".into()];
64/// let selected = df.select(Some(&keys));
65/// ```
66#[derive(Debug, Clone, PartialEq, Default, serde::Serialize, serde::Deserialize)]
67#[cfg_attr(feature = "python", pyclass)]
68pub struct DataFrame {
69    /// Whole-frame constants — values that logically apply to every row but
70    /// are not stored per-row. Useful for shared metadata that joins should
71    /// preserve. They do not appear in [`select`](Self::select) results.
72    pub constants: HashMap<Key, DataValue>,
73    /// Column-oriented storage backing this dataframe.
74    pub dataframe: ColumnFrame,
75    /// Free-form user metadata. Does not participate in any data operation.
76    pub metadata: HashMap<String, DataValue>,
77}
78
79impl fmt::Display for DataFrame {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        self.dataframe.fmt(f)
82    }
83}
84
85impl DataFrame {
86    /// Creates a new [`DataFrame`] from anything that can be converted into a [`ColumnFrame`].
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// use trs_dataframe::{DataFrame, column_frame};
92    ///
93    /// let df = DataFrame::new(column_frame! {
94    ///     "a" => [1, 2, 3],
95    ///     "b" => [4, 5, 6]
96    /// });
97    /// assert_eq!(df.n_rows(), 3);
98    /// assert_eq!(df.n_columns(), 2);
99    /// ```
100    pub fn new<C: Into<ColumnFrame>>(dataframe: C) -> Self {
101        Self {
102            constants: HashMap::new(),
103            dataframe: dataframe.into(),
104            metadata: HashMap::new(),
105        }
106    }
107
108    /// Returns the number of columns which dataframe contains.
109    pub fn n_columns(&self) -> usize {
110        self.dataframe.ncolumns()
111    }
112
113    /// Returns the number of rows which dataframe contains.
114    pub fn n_rows(&self) -> usize {
115        self.dataframe.nrows()
116    }
117
118    /// Compacts the internal storage to reclaim memory after row deletions or
119    /// filter operations that may leave excess capacity allocated.
120    pub fn shrink(&mut self) {
121        self.dataframe.shrink();
122    }
123
124    /// Attaches a key-value metadata entry to this dataframe.
125    ///
126    /// Metadata does not participate in data operations (select, join, filter, etc.)
127    /// and is intended for user-defined annotations such as source info or timestamps.
128    pub fn add_metadata(&mut self, key: String, value: DataValue) {
129        self.metadata.insert(key, value);
130    }
131
132    /// Returns a reference to the metadata value for the given key, or `None` if absent.
133    pub fn get_metadata(&self, key: &str) -> Option<&DataValue> {
134        self.metadata.get(key)
135    }
136
137    /// Joins another dataframe into this one according to the given [`JoinRelation`].
138    ///
139    /// The join strategy is determined by the variant inside `join_type`:
140    /// - [`crate::dataframe::join::JoinBy::AddColumns`] — adds non-existing columns from `other`
141    /// - [`crate::dataframe::join::JoinBy::Replace`] — replaces the entire frame with `other`
142    /// - [`crate::dataframe::join::JoinBy::Extend`] — appends rows from `other`
143    /// - [`crate::dataframe::join::JoinBy::Broadcast`] — replicates a single-row `other` across all rows
144    /// - [`crate::dataframe::join::JoinBy::CartesianProduct`] — produces all row combinations
145    /// - [`crate::dataframe::join::JoinBy::JoinById`] — hash-based join on shared key columns
146    ///
147    /// Constants from `other` are merged into this dataframe's constants map.
148    pub fn join(&mut self, other: Self, join_type: &JoinRelation) -> Result<(), Error> {
149        for (key, value) in other.constants {
150            self.constants.insert(key, value);
151        }
152        self.dataframe.join(other.dataframe, join_type)
153    }
154
155    /// Applies a user-defined function to the underlying [`ColumnFrame`].
156    ///
157    /// The closure receives the provided `keys` and a mutable reference to the
158    /// internal [`ColumnFrame`], allowing arbitrary in-place transformations.
159    pub fn apply_function<F>(&mut self, keys: &[Key], mut func: F) -> Result<(), Error>
160    where
161        F: FnMut(&[Key], &mut ColumnFrame) -> Result<(), Error>,
162    {
163        self.dataframe.apply_function(keys, &mut func)
164    }
165
166    /// Selects columns and returns their data as a 2D array of [`DataValue`] in row-major order.
167    ///
168    /// If `keys` is `None`, all columns are returned. If a requested key does not
169    /// exist, its cells are filled with [`DataValue::Null`].
170    ///
171    /// # Examples
172    ///
173    /// ```
174    /// use trs_dataframe::{df, Key};
175    ///
176    /// let df = df! { "a" => [1, 2], "b" => [3, 4] };
177    /// let data = df.select(None).unwrap();
178    /// assert_eq!(data.nrows(), 2);
179    /// ```
180    pub fn select(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
181        Ok(self.dataframe.select(keys))
182    }
183
184    /// Returns selected columns as a typed 2D array, converting each [`DataValue`]
185    /// via the [`Extract`] trait.
186    ///
187    /// This is the typed counterpart of [`select`](Self::select). If `keys` is `None`,
188    /// all columns are returned. The data is in row-major order (rows × columns).
189    ///
190    /// # Type coercion
191    ///
192    /// The [`Extract`] trait performs best-effort numeric coercion (e.g. `I32 -> f64`).
193    /// Values that cannot be meaningfully converted yield the type's default
194    /// (0 for numbers, `false` for bool, empty string for `String`).
195    ///
196    /// # Examples
197    ///
198    /// ```
199    /// use trs_dataframe::{df, Key};
200    ///
201    /// let df = df! {
202    ///     "a" => [1i32, 2i32, 3i32],
203    ///     "b" => [4i32, 5i32, 6i32]
204    /// };
205    /// let keys = vec![Key::from("a"), Key::from("b")];
206    /// let arr = df.select_typed::<f64>(Some(&keys)).unwrap();
207    /// assert_eq!(arr[[0, 0]], 1.0);
208    /// assert_eq!(arr[[1, 1]], 5.0);
209    /// ```
210    pub fn select_typed<T: Extract + Clone>(
211        &self,
212        keys: Option<&[Key]>,
213    ) -> Result<Array2<T>, Error> {
214        Ok(self.dataframe.select_typed(keys))
215    }
216
217    /// Returns selected columns wrapped in a [`MaybeView`].
218    ///
219    /// View-oriented counterpart of [`select`](Self::select). The selected
220    /// columns are stacked into an owned [`Array2`] of shape
221    /// `(ncols, nrows)`. When `keys` is `None`, every column from the
222    /// underlying [`crate::KeyIndex`] is included.
223    ///
224    /// Call [`MaybeView::row_view`] on the result to obtain a uniform
225    /// `(nrows, ncols)` read-only view regardless of which variant was
226    /// produced.
227    ///
228    /// # Errors
229    ///
230    /// Returns an error when `keys` resolves to an empty or entirely unknown
231    /// key set.
232    ///
233    /// # Examples
234    ///
235    /// ```
236    /// use trs_dataframe::{df, Key};
237    ///
238    /// let frame = df! { "a" => [1i32, 2i32], "b" => [3i32, 4i32] };
239    /// let keys = vec![Key::from("a"), Key::from("b")];
240    /// let view = frame.select_view(Some(&keys)).unwrap();
241    /// // row_view() yields a (nrows, ncols) ArrayView2.
242    /// assert_eq!(view.row_view().nrows(), 2);
243    /// ```
244    pub fn select_view(&self, keys: Option<&[Key]>) -> Result<MaybeView<'_>, Error> {
245        self.dataframe.select_view(keys)
246    }
247
248    /// Returns selected columns as borrowed [`crate::TypedData`] views.
249    ///
250    /// Each entry of the returned `Vec` corresponds to one requested column,
251    /// in the same order as `keys`. Missing keys yield `None`; present keys
252    /// yield `Some(&TypedData)` borrowed directly from the underlying column
253    /// store — no allocation, no per-element [`DataValue`] boxing.
254    ///
255    /// Use [`crate::TypedData::as_slice_i32`] (and friends) for zero-copy access to
256    /// the native primitive storage, or [`crate::TypedData::iter`] for a generic
257    /// [`DataValue`] iterator.
258    ///
259    /// # Errors
260    ///
261    /// Returns an error when `keys` resolves to an empty or entirely unknown
262    /// key set.
263    ///
264    /// # Examples
265    ///
266    /// ```
267    /// use trs_dataframe::{df, Key};
268    ///
269    /// let frame = df! {
270    ///     "score" => [1i32, 2i32, 3i32],
271    ///     "rank"  => [10i32, 20i32, 30i32]
272    /// };
273    /// let cols = frame.select_vec_view(Some(&["score".into()])).unwrap();
274    /// assert_eq!(cols.len(), 1);
275    /// assert_eq!(cols[0].as_ref().unwrap().len(), 3);
276    /// ```
277    pub fn select_vec_view(
278        &self,
279        keys: Option<&[Key]>,
280    ) -> Result<Vec<Option<&TypedDataArray>>, Error> {
281        self.dataframe.select_vec_view(keys)
282    }
283
284    /// Returns the requested columns as `Vec<Vec<D>>` in row-major order —
285    /// outer `Vec` is rows, inner `Vec` is one cell per selected key, with
286    /// each cell coerced to `D` via [`Extract`].
287    ///
288    /// Despite the name, the result is **not** transposed.
289    pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>> {
290        self.dataframe.select_transposed_typed::<D>(keys)
291    }
292
293    /// Returns a single column materialized as an owned [`Array1<DataValue>`],
294    /// or `None` if the key is absent.
295    ///
296    /// Typed columns allocate a [`DataValue`] per element on the fly. For
297    /// zero-copy typed access, use [`Self::get_column`].
298    #[deprecated(note = "allocates O(n); use get_column() for zero-copy typed access")]
299    pub fn select_column(&self, key: Key) -> Option<ndarray::Array1<DataValue>> {
300        #[allow(deprecated)]
301        self.dataframe.select_column(&key)
302    }
303
304    /// Stacks the selected columns into an [`Array2`] of shape
305    /// `(ncols, nrows)` — each row of the output is one column from the
306    /// dataframe.
307    ///
308    /// If `keys` is `None`, all columns are included.
309    pub fn select_transposed(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
310        self.dataframe.select_transposed(keys)
311    }
312
313    /// Stores a constant value that logically applies to every row without
314    /// being physically stored per-row.
315    ///
316    /// Constants are carried through joins but do not appear in
317    /// [`select`](Self::select) results.
318    pub fn insert_constant(&mut self, key: Key, value: DataValue) {
319        self.constants.insert(key, value);
320    }
321
322    /// Appends a single row to the dataframe.
323    ///
324    /// The row is supplied as any type implementing [`CandidateData`]
325    /// (e.g. `HashMap<Key, DataValue>`). New columns are added automatically
326    /// if the row contains keys not yet present in the frame.
327    pub fn push<C: CandidateData>(&mut self, item: C) -> Result<(), Error> {
328        self.dataframe.push(item)
329    }
330
331    /// Removes the specified columns from this dataframe and returns them as a
332    /// new [`DataFrame`].
333    pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error> {
334        self.dataframe.remove_column(keys).map(|x| x.into())
335    }
336
337    /// Appends all rows from `items` to this dataframe.
338    ///
339    /// If the two frames have different column sets, missing columns are filled
340    /// with [`DataValue::Null`].
341    pub fn extend(&mut self, items: Self) -> Result<(), Error> {
342        self.dataframe.extend(items.dataframe)
343    }
344
345    /// Returns the number of rows in the dataframe.
346    pub fn len(&self) -> usize {
347        self.dataframe.nrows()
348    }
349
350    /// Returns `true` if the dataframe contains no rows.
351    pub fn is_empty(&self) -> bool {
352        self.dataframe.is_empty()
353    }
354
355    /// Adds a new column to the dataframe.
356    ///
357    /// The column accepts anything that converts into a
358    /// [`crate::TypedData`] — e.g. `Vec<DataValue>`,
359    /// `Vec<T>` for any supported primitive, `Array1<DataValue>`, or a raw
360    /// `TypedData`.
361    ///
362    /// Returns an error if the column key already exists or if the length of
363    /// the supplied column does not match the current row count.
364    pub fn add_single_column<K, V>(&mut self, key: K, values: V) -> Result<(), Error>
365    where
366        K: Into<Key>,
367        V: Into<TypedDataArray>,
368    {
369        self.dataframe.add_single_column(key, values)
370    }
371
372    /// Returns a reference to the underlying typed column storage, or `None`
373    /// if `key` is absent.
374    ///
375    /// This is the zero-copy counterpart of
376    /// [`get_single_column`](Self::get_single_column): callers can use
377    /// [`TypedData::as_slice_i32`](crate::TypedData::as_slice_i32) (and its
378    /// siblings for other primitives) to borrow the native storage without
379    /// allocating a `DataValue` per element.
380    pub fn get_column(&self, key: &Key) -> Option<&TypedDataArray> {
381        self.dataframe.get_column(key).ok()
382    }
383
384    /// Returns a single column materialized as an owned [`Array1<DataValue>`],
385    /// or `None` if the key is absent.
386    ///
387    /// Typed columns allocate a [`DataValue`] per element on the fly. For
388    /// zero-copy typed access, use [`Self::get_column`].
389    #[deprecated(note = "allocates O(n); use get_column() for zero-copy typed access")]
390    pub fn get_single_column(&self, key: &Key) -> Option<Array1<DataValue>> {
391        #[allow(deprecated)]
392        self.dataframe.get_single_column(key)
393    }
394
395    /// Returns a column extracted into a typed [`Array1<T>`], where each [`DataValue`]
396    /// is converted via the [`Extract`] trait.
397    ///
398    /// This is a convenience wrapper around [`get_single_column`](Self::get_single_column)
399    /// that maps every element through `T::extract`, producing an owned array of the
400    /// target type. Returns `None` if the key does not exist in the dataframe.
401    ///
402    /// # Type coercion
403    ///
404    /// The [`Extract`] trait performs best-effort numeric coercion (e.g. `I32 -> f64`).
405    /// Values that cannot be meaningfully converted yield the type's default
406    /// (0 for numbers, `false` for bool, empty string for `String`).
407    ///
408    /// # Examples
409    ///
410    /// ```
411    /// use trs_dataframe::{df, Key};
412    ///
413    /// let df = df! {
414    ///     "score" => [1.5f64, 2.5f64, 3.5f64]
415    /// };
416    /// let key: Key = "score".into();
417    /// let col = df.get_single_column_typed::<f64>(&key).unwrap();
418    /// assert_eq!(col.len(), 3);
419    /// assert_eq!(col[0], 1.5);
420    /// ```
421    pub fn get_single_column_typed<T: Extract>(&self, key: &Key) -> Option<Array1<T>> {
422        self.dataframe.get_single_column_typed(key)
423    }
424
425    /// Returns a [`SortedDataFrame`] view sorted by the given column key.
426    ///
427    /// The sort is ascending with `Null` values pushed to the end.
428    /// Use [`SortedDataFrame::topn`] to efficiently extract the first/last N rows.
429    pub fn sorted(&self, key: &Key) -> Result<SortedDataFrame<'_>, Error> {
430        self.dataframe.sorted(key)
431    }
432
433    /// Returns a new dataframe containing only rows that satisfy the filter expression.
434    ///
435    /// Filter expressions are parsed from strings — see [`FilterRules`](crate::filter::FilterRules)
436    /// for the supported grammar (comparison, regex, set membership, logical combinators).
437    ///
438    /// Constants and metadata are cloned into the result.
439    pub fn filter(&self, filter: &crate::filter::FilterRules) -> Result<Self, Error> {
440        let filtered_df = self.dataframe.filter(filter)?;
441        Ok(Self {
442            constants: self.constants.clone(),
443            dataframe: filtered_df,
444            metadata: self.metadata.clone(),
445        })
446    }
447
448    /// Converts this dataframe into a Polars [`DataFrame`](polars::prelude::DataFrame).
449    ///
450    /// Each column is mapped to its Polars equivalent via [`into_polars_value`].
451    /// Requires the `polars-df` feature.
452    #[cfg(feature = "polars-df")]
453    #[allow(deprecated)]
454    pub fn as_polars(&self) -> Result<polars::prelude::DataFrame, Error> {
455        let mut columns = vec![];
456        for key in self.dataframe.keys() {
457            let values = self
458                .dataframe
459                .get_single_column(key)
460                .ok_or_else(|| Error::NotFound(key.clone()))?
461                .into_iter()
462                .map(|x| into_polars_value(key, x.clone()))
463                .collect::<Vec<_>>();
464            let s = polars::prelude::Column::new(key.name().into(), values);
465
466            columns.push(s);
467        }
468
469        Ok(polars::prelude::DataFrame::new(columns)?)
470    }
471
472    /// Deserializes a dataframe from MessagePack bytes.
473    ///
474    /// This is the inverse of [`store_into_messagepack`](Self::store_into_messagepack)
475    /// and is useful for compact binary serialization in IPC or storage scenarios.
476    pub fn load_from_messagepack(bytes: &[u8]) -> Result<Self, Error> {
477        rmp_serde::decode::from_slice(bytes).map_err(|e| Error::UnknownError(format!("{e:?}")))
478    }
479
480    /// Serializes this dataframe into MessagePack bytes.
481    ///
482    /// The resulting bytes can be deserialized back with
483    /// [`load_from_messagepack`](Self::load_from_messagepack).
484    pub fn store_into_messagepack(&self) -> Result<Vec<u8>, Error> {
485        rmp_serde::encode::to_vec(&self).map_err(|e| Error::UnknownError(format!("{e:?}")))
486    }
487}
488
489/// Converts a [`DataType`](crate::DataType) to its Polars equivalent.
490///
491/// Requires the `polars-df` feature.
492#[cfg(feature = "polars-df")]
493pub fn polars_dtype(dtype: crate::DataType) -> polars::prelude::DataType {
494    use crate::DataType::*;
495    use polars::prelude::DataType::*;
496    match dtype {
497        Bool => Boolean,
498        U32 => UInt32,
499        I32 => Int32,
500        U8 => UInt8,
501        U64 => UInt64,
502        I64 => Int64,
503        F32 => Float32,
504        F64 => Float64,
505        U128 => UInt128,
506        I128 => Int128,
507        crate::DataType::String => polars::prelude::DataType::String,
508        Bytes => Binary,
509        crate::DataType::Unknown => Null,
510        Vec => List(Box::new(polars::prelude::DataType::Unknown(
511            polars::prelude::UnknownKind::Any,
512        ))),
513        Map => Struct(vec![]),
514    }
515}
516
517/// Converts a [`DataValue`] into a Polars [`AnyValue`](polars::prelude::AnyValue),
518/// applying type coercion based on the column's [`Key`] dtype.
519///
520/// Requires the `polars-df` feature.
521#[cfg(feature = "polars-df")]
522pub fn into_polars_value(key: &Key, dv: DataValue) -> polars::prelude::AnyValue<'static> {
523    use polars::prelude::AnyValue::*;
524    use polars::prelude::Field;
525
526    use crate::dataframe::column_store::convert_dv_to_dtype;
527    let dv = convert_dv_to_dtype(key, dv);
528    match dv {
529        DataValue::String(smart_string) => StringOwned(smart_string.as_str().into()),
530        DataValue::Bytes(items) => BinaryOwned(items),
531        DataValue::U8(x) => UInt32(x as _),
532        DataValue::Bool(x) => Boolean(x),
533        DataValue::I32(x) => Int32(x),
534        DataValue::U32(x) => UInt32(x),
535        DataValue::I64(x) => Int64(x),
536        DataValue::U64(x) => UInt64(x),
537        DataValue::I128(x) => Int128(x),
538        DataValue::F32(x) => Float32(x),
539        DataValue::F64(x) => Float64(x),
540        DataValue::Null => Null,
541        DataValue::Vec(data_values) => {
542            let mut dt = crate::DataType::Unknown;
543            for d in data_values.iter() {
544                match crate::detect_dtype(d) {
545                    crate::DataType::Unknown => continue,
546                    e => {
547                        dt = e;
548                        break;
549                    }
550                }
551            }
552            let vec_key = Key::new(key.name(), dt);
553            let s = polars::series::Series::from_any_values(
554                key.name().into(),
555                &data_values
556                    .into_iter()
557                    .map(|x| into_polars_value(&vec_key, x))
558                    .collect::<Vec<_>>(),
559                true,
560            );
561            List(s.expect(&format!("Cannot create series for {key:?}")))
562        }
563        DataValue::EnumNumber(x) => Int32(x),
564        DataValue::U128(x) => UInt128(x),
565        DataValue::Map(x) => {
566            let mut values = vec![];
567            let mut fields = vec![];
568            let mut sorted_keys = x.keys().collect::<Vec<_>>();
569            sorted_keys.sort();
570            for k in sorted_keys {
571                let value = x.get(k).expect(&format!("Key {key:?} should exists in hm"));
572                let dtype = crate::detect_dtype(value);
573                let k = Key::new(k, dtype);
574                values.push(into_polars_value(&k, value.to_owned()));
575                fields.push(Field::new(k.name().into(), polars_dtype(dtype)));
576            }
577            StructOwned(Box::new((values, fields)))
578        }
579    }
580}
581
582/// Converts a Polars [`AnyValue`](polars::prelude::AnyValue) back into a [`DataValue`].
583///
584/// Requires the `polars-df` feature.
585#[cfg(feature = "polars-df")]
586pub fn from_polars_value(dv: polars::prelude::AnyValue<'_>) -> DataValue {
587    use polars::prelude::AnyValue::*;
588    match dv {
589        Null => DataValue::Null,
590        Boolean(v) => v.into(),
591        String(v) => DataValue::String(v.into()),
592        UInt8(v) => DataValue::U8(v),
593        UInt16(v) => DataValue::U32(v as u32),
594        UInt32(v) => v.into(),
595        UInt64(v) => v.into(),
596        Int8(v) => (v as i32).into(),
597        Int16(v) => (v as i32).into(),
598        Int32(v) => v.into(),
599        Int64(v) => v.into(),
600        Float32(v) => v.into(),
601        Float64(v) => v.into(),
602        Int128(v) => v.into(),
603        List(series) => DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>()),
604        // Array(series, _) => {
605        //     DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>())
606        // }
607        StringOwned(v) => DataValue::String(v.as_str().into()),
608        Binary(v) => DataValue::Bytes(v.to_owned()),
609        BinaryOwned(v) => DataValue::Bytes(v),
610        StructOwned(m) => {
611            let mut hm: std::collections::HashMap<smartstring::alias::String, DataValue> =
612                std::collections::HashMap::new();
613            for (k, v) in m.1.into_iter().zip(m.0.into_iter()) {
614                hm.insert(k.name.as_str().into(), from_polars_value(v));
615            }
616            DataValue::Map(hm)
617        }
618        e => {
619            tracing::warn!("Unsupported polars value: {e:?}");
620            DataValue::Null
621        }
622    }
623}
624
625impl From<ColumnFrame> for DataFrame {
626    fn from(dataframe: ColumnFrame) -> Self {
627        Self::new(dataframe)
628    }
629}
630
631impl From<Vec<std::collections::HashMap<Key, DataValue>>> for DataFrame {
632    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
633        Self::new(ColumnFrame::from(dataframe))
634    }
635}
636
637impl From<Vec<HashMap<Key, DataValue>>> for DataFrame {
638    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
639        Self::new(ColumnFrame::from(dataframe))
640    }
641}
642
643impl From<std::collections::HashMap<String, Vec<DataValue>>> for DataFrame {
644    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
645        Self::new(ColumnFrame::from(dataframe))
646    }
647}
648
649impl From<MLChefMap> for DataFrame {
650    fn from(dataframe: MLChefMap) -> Self {
651        Self::new(ColumnFrame::from(dataframe))
652    }
653}
654impl From<Vec<(Key, Vec<DataValue>)>> for DataFrame {
655    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
656        Self::new(ColumnFrame::from(dataframe))
657    }
658}
659
660impl From<std::collections::HashMap<String, Array1<DataValue>>> for DataFrame {
661    fn from(dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
662        Self::new(ColumnFrame::from(dataframe))
663    }
664}
665
666#[cfg(feature = "polars-df")]
667impl From<polars::prelude::DataFrame> for DataFrame {
668    fn from(dataframe: polars::prelude::DataFrame) -> Self {
669        Self::new(ColumnFrame::from(dataframe))
670    }
671}
672#[cfg(test)]
673#[allow(deprecated)]
674mod test {
675    use crate::filter::FilterRules;
676
677    use super::*;
678    use halfbrown::hashmap;
679    #[cfg(feature = "polars-df")]
680    use polars::prelude::NamedFrom as _;
681    use rstest::*;
682    use tracing_test::traced_test;
683    #[fixture]
684    fn dummy_candidates() -> ColumnFrame {
685        ColumnFrame::from(vec![
686            hashmap! {
687                "key1".into() => 1.into(),
688                "key2".into() => "a".into(),
689            },
690            hashmap! {
691                "key1".into() => 2.into(),
692                "key2".into() => "b".into(),
693            },
694        ])
695    }
696
697    #[rstest]
698    fn test_serde() {
699        let df = crate::df! {
700            "a" => [1u64, 2u64, 3u64],
701            "b" => [4u64, 5u64, 6u64],
702            "c" => [7u64, 8u64, 9u64]
703        };
704
705        let serialized = serde_json::to_string(&df).expect("BUG: Unable to serialize dataframe");
706
707        let deserialized =
708            serde_json::from_str(&serialized).expect("BUG: Unable to deserialize dataframe");
709
710        assert_eq!(df, deserialized);
711    }
712
713    #[cfg(feature = "polars-df")]
714    #[rstest]
715    fn test_polars() {
716        let expected = crate::df! {
717            "a" => [1u64, 2u64, 3u64],
718            "b" => [4f64, 5f64, 6f64],
719            "c" => [7i64, 8i64, 9i64]
720        };
721
722        let polars_df = polars::df!(
723            "a" => [1u64, 2u64, 3u64],
724            "b" => [4f64, 5f64, 6f64],
725            "c" => [7i64, 8i64, 9i64]
726        )
727        .expect("BUG: should be ok");
728        let as_df: DataFrame = polars_df.into();
729        let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into()];
730        assert_eq!(
731            as_df.select(Some(keys.as_slice())),
732            expected.select(Some(keys.as_slice()))
733        );
734    }
735    #[cfg(feature = "polars-df")]
736    use crate::DataType;
737    #[cfg(feature = "polars-df")]
738    #[rstest]
739    #[case::str(Key::new("a", DataType::String), DataValue::String("test".into()), polars::prelude::AnyValue::String("test".into()))]
740    #[case::u32(
741        Key::new("a", DataType::U32),
742        DataValue::U32(u32::MAX),
743        polars::prelude::AnyValue::UInt32(u32::MAX)
744    )]
745    #[case::i32(
746        Key::new("a", DataType::I32),
747        DataValue::I32(i32::MIN),
748        polars::prelude::AnyValue::Int32(i32::MIN)
749    )]
750    #[case::i64(
751        Key::new("a", DataType::I64),
752        DataValue::I64(i64::MIN),
753        polars::prelude::AnyValue::Int64(i64::MIN)
754    )]
755    #[case::u64(
756        Key::new("a", DataType::U64),
757        DataValue::U64(u64::MIN),
758        polars::prelude::AnyValue::UInt64(u64::MIN)
759    )]
760    #[case::f32(
761        Key::new("a", DataType::F32),
762        DataValue::F32(f32::MIN),
763        polars::prelude::AnyValue::Float32(f32::MIN)
764    )]
765    #[case::f64(
766        Key::new("a", DataType::F64),
767        DataValue::F64(f64::MIN),
768        polars::prelude::AnyValue::Float64(f64::MIN)
769    )]
770    #[case::null(
771        Key::new("a", DataType::Unknown),
772        DataValue::Null,
773        polars::prelude::AnyValue::Null
774    )]
775    #[case::i128(
776        Key::new("a", DataType::I128),
777        DataValue::I128(i128::MIN),
778        polars::prelude::AnyValue::Int128(i128::MIN)
779    )]
780    #[case::u8(
781        Key::new("a", DataType::U8),
782        DataValue::U8(255),
783        polars::prelude::AnyValue::UInt8(255)
784    )]
785    #[case::bool(
786        Key::new("a", DataType::Bool),
787        DataValue::Bool(true),
788        polars::prelude::AnyValue::Boolean(true)
789    )]
790    #[case::bytes(Key::new("a", DataType::Bytes), DataValue::Bytes("aaaaa".as_bytes().to_vec()), polars::prelude::AnyValue::BinaryOwned("aaaaa".as_bytes().to_vec()))]
791    #[case::vec_uints(Key::new("a", DataType::Vec), DataValue::Vec(vec![DataValue::U32(0), DataValue::U32(1)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::UInt32(1)])))]
792    #[case::map(Key::new("a", DataType::Map), DataValue::Map(data_value::stdhashmap!("a" => 0u64, "b" => "s")), polars::prelude::AnyValue::StructOwned(Box::new((
793        vec![polars::prelude::AnyValue::UInt64(0u64), polars::prelude::AnyValue::String("s".into())],
794        vec![polars::prelude::Field::new("a".into(), polars::prelude::DataType::UInt64), polars::prelude::Field::new("b".into(), polars::prelude::DataType::String)]))))]
795    // polars converts all by first element type
796    // #[case::vec_diff_int(DataValue::Vec(vec![ DataValue::I32(1), DataValue::U32(0)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::Int32(1i32), polars::prelude::AnyValue::UInt32(0u32)])))]
797    //#[case::vec_int_str(DataValue::Vec(vec![DataValue::U32(0), DataValue::String("1".into())]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::StringOwned("1".into())])))]
798    fn into_polars_value_test(
799        #[case] key: Key,
800        #[case] input: DataValue,
801        #[case] output: polars::prelude::AnyValue<'static>,
802    ) {
803        assert_eq!(into_polars_value(&key, input.clone()), output);
804        assert_eq!(from_polars_value(output), input);
805    }
806
807    // #[cfg(feature = "polars-df")]
808    // #[rstest]
809    // fn as_polars() {
810    //     let state = include_bytes!("../part_00330.dfb");
811    //     let df: Result<DataFrame, _> = rmp_serde::decode::from_slice(state);
812    //     assert!(df.is_ok());
813    //     let df = df.unwrap();
814    //     println!("{df}");
815    //     let polars_df = df.as_polars();
816    //     assert!(polars_df.is_ok(), "{polars_df:?}");
817    // }
818    #[rstest]
819    #[case(
820        DataFrame::new(crate::column_frame! {
821            "a" => [1f64, 2f64, 3f64],
822            "b" => [4i64, 5i64, 6i64],
823            "c" => [7i64, 8i64, 9i64]
824        }),
825        DataFrame::new(crate::column_frame! {
826            "a" => [1f64, 2f64],
827            "b" => [4i64, 5i64],
828            "c" => [7i64, 8i64]
829        }),
830        FilterRules::try_from("a >= 1f64 && (b <= 5 || c <= 8) && b >= 4").expect("BUG: cannot create filter rules"),
831    )]
832    #[case(
833        DataFrame::new(crate::column_frame! {
834            "a" => [1f64, 2f64, 3f64],
835            "b" => [4i64, 5i64, 6i64],
836            "c" => [7i64, 8i64, 9i64]
837        }),
838        DataFrame::new(crate::column_frame! {
839            "a" => [2f64],
840            "b" => [5i64],
841            "c" => [8i64]
842        }),
843        FilterRules::try_from("a % 2f64 == 0f64").expect("BUG: cannot create filter rules"),
844    )]
845    #[traced_test]
846    fn filter_test(
847        #[case] df: DataFrame,
848        #[case] expected: DataFrame,
849        #[case] filter: FilterRules,
850    ) {
851        let filtered = df.filter(&filter).expect("BUG: cannot filter");
852        assert_eq!(filtered, expected);
853    }
854
855    #[rstest]
856    fn test_serde_complex() {
857        let simple = r#"
858{
859    "constants": {},
860    "dataframe": {
861        "index": {
862            "keys": [
863                {
864                    "key": 3162770485,
865                    "name": "a",
866                    "ctype": "U32"
867                },
868                {
869                    "key": 2279056742,
870                    "name": "b",
871                    "ctype": "F64"
872                },
873                {
874                    "key": 2994984227,
875                    "name": "c",
876                    "ctype": "U64"
877                },
878                {
879                    "key": 3319645144,
880                    "name": "d",
881                    "ctype": "F64"
882                },
883                {
884                    "key": 1291847470,
885                    "name": "e",
886                    "ctype": "U32"
887                },
888                {
889                    "key": 874241070,
890                    "name": "f",
891                    "ctype": "Bool"
892                }
893            ],
894            "indexes": {
895                "a": 0,
896                "b": 1,
897                "c": 2,
898                "d": 3,
899                "e": 4,
900                "f": 5
901            },
902            "alias": {}
903        },
904        "data_frame": {
905            "v": 1,
906            "dim": [
907                2,
908                6
909            ],
910            "data": [
911                253780,
912                0.009369421750307085,
913                1633222860381359,
914                8,
915                5,
916                true,
917                64512,
918                0.003391335718333721,
919                1633222860810557,
920                8,
921                5,
922                null
923            ]
924        }
925    },
926    "metadata": {}
927}
928        "#;
929
930        let simple_deserialized: DataFrame =
931            serde_json::from_str(simple).expect("BUG: Unable to deserialize dataframe");
932
933        println!("deserialized: {simple_deserialized:?}");
934        let array = format!("[{}, {}, {}]", simple, simple, simple);
935        let deserialized: Vec<DataFrame> =
936            serde_json::from_str(&array).expect("BUG: Unable to deserialize dataframe");
937
938        println!("deserialized: {deserialized:?}");
939        assert_eq!(deserialized.len(), 3);
940        assert_eq!(simple_deserialized, deserialized[0]);
941    }
942
943    #[rstest]
944    #[case(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
945    #[case(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
946    #[case(vec![hashmap! {
947        "key1".into() => 1.into(),
948        "key2".into() => "a".into(),
949    },
950    hashmap! {
951        "key1".into() => 2.into(),
952    },])]
953    #[case(vec![data_value::stdhashmap! {
954        "key1" => DataValue::from(1),
955        "key2" => DataValue::from("a"),
956    },data_value::stdhashmap! {
957        "key1" => DataValue::from(2),
958    },])]
959    #[case(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(),
960    vec![DataValue::from("a"), DataValue::Null])])]
961    fn test_select_column<T: Into<DataFrame>>(#[case] input: T) {
962        let df: DataFrame = input.into();
963        assert_eq!(
964            df,
965            DataFrame {
966                constants: HashMap::new(),
967                dataframe: ColumnFrame::from(vec![
968                    hashmap! {
969                        "key1".into() => 1.into(),
970                        "key2".into() => "a".into(),
971                    },
972                    hashmap! {
973                        "key1".into() => 2.into(),
974                    },
975                ]),
976                metadata: HashMap::new(),
977            }
978        );
979        let selected_transposed = df.select_column("key1".into());
980        assert!(selected_transposed.is_some());
981        let selected_transposed = selected_transposed.unwrap();
982        assert_eq!(selected_transposed.len(), 2);
983        assert_eq!(selected_transposed, ndarray::array![1.into(), 2.into()]);
984    }
985
986    #[rstest]
987    #[case::hhm(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
988    #[case::stdhm(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
989    #[case::hm({
990        let hm: std::collections::HashMap<String, Array1<DataValue>> = data_value::stdhashmap!("key1".to_string() => Array1::from_vec(vec![DataValue::from(1), DataValue::from(2)]), "key2".to_string() => Array1::from_vec(vec![DataValue::from("a"), DataValue::Null]));
991        hm
992    })]
993    #[case::vec_hhm(vec![hashmap! {
994        "key1".into() => 1.into(),
995        "key2".into() => "a".into(),
996    },
997    hashmap! {
998        "key1".into() => 2.into(),
999    },])]
1000    #[case::vec_hme(vec![data_value::stdhashmap! {
1001        "key1" => DataValue::from(1),
1002        "key2" => DataValue::from("a"),
1003    },data_value::stdhashmap! {
1004        "key1" => DataValue::from(2),
1005    },])]
1006    #[case::vec_vec(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(), vec![DataValue::from("a"), DataValue::Null])])]
1007    fn test_from_conversion<T: Into<DataFrame>>(#[case] input: T) {
1008        let df: DataFrame = input.into();
1009        let expected: DataFrame = DataFrame {
1010            constants: HashMap::new(),
1011            dataframe: ColumnFrame::from(vec![
1012                hashmap! {
1013                    "key1".into() => 1.into(),
1014                    "key2".into() => "a".into(),
1015                },
1016                hashmap! {
1017                    "key1".into() => 2.into(),
1018                },
1019            ]),
1020            metadata: HashMap::new(),
1021        };
1022        assert_eq!(
1023            df.select(Some(&["key1".into(), "key2".into()])),
1024            expected.select(Some(&["key1".into(), "key2".into()])),
1025            "{df} vs {expected}"
1026        );
1027        let selected_transposed = df.select_transposed_typed::<i32>(&["key1".into()]);
1028        assert_eq!(selected_transposed.len(), 2);
1029        println!("{:?}", selected_transposed);
1030        assert_eq!(selected_transposed, vec![vec![1], vec![2]]);
1031    }
1032    #[rstest]
1033    fn test_dataframe(dummy_candidates: ColumnFrame) {
1034        let mut dataframe: DataFrame = DataFrame::default();
1035        assert!(dataframe.is_empty());
1036        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
1037        assert_eq!(dataframe.len(), 2);
1038
1039        let candidate = hashmap! {
1040            "key1".into() => 3.into(),
1041            "key2".into() => "c".into(),
1042        };
1043
1044        assert!(dataframe.push(candidate).is_ok());
1045        assert_eq!(dataframe.len(), 3);
1046        assert!(!dataframe.is_empty());
1047
1048        dataframe.insert_constant("key3".into(), 4.into());
1049        assert_eq!(dataframe.constants.len(), 1);
1050        assert!(dataframe
1051            .apply_function(&["key1".into()], |keys, df| {
1052                let key = keys[0].clone();
1053                let s = df
1054                    .get_single_column(&key)
1055                    .expect("BUG: Cannot get column")
1056                    .to_owned();
1057                let s = s.mapv(|x| x + DataValue::from(1));
1058                df.add_single_column("key5", s)?;
1059                Ok(())
1060            })
1061            .is_ok());
1062        let original = dataframe.clone();
1063        dataframe.shrink();
1064        let remove_df = dataframe.remove_column(&["key1".into()]);
1065        assert!(remove_df.is_ok());
1066        let mut remove_df = remove_df.unwrap();
1067        assert_eq!(remove_df.len(), 3);
1068        let selected = dataframe.select(Some(&["key2".into()]));
1069        assert!(selected.is_ok());
1070        let selected = selected.unwrap();
1071        println!("{:?}", selected);
1072
1073        // fixme later
1074        let joined_result =
1075            remove_df.join(dataframe, &JoinRelation::new(crate::JoinBy::AddColumns));
1076        assert!(joined_result.is_ok(), "{:?}", joined_result);
1077        let keys = vec!["key1".into(), "key2".into(), "key5".into()];
1078        assert_eq!(
1079            original.select(Some(keys.as_slice())),
1080            remove_df.select(Some(keys.as_slice()))
1081        );
1082    }
1083
1084    #[rstest]
1085    fn test_size_methods() {
1086        let candidate = hashmap! {
1087            "key1".into() => 3.into(),
1088            "key2".into() => "c".into(),
1089            "key3".into() => false.into()
1090        };
1091
1092        let dataframe: DataFrame = vec![candidate].into();
1093
1094        assert_eq!(dataframe.n_columns(), 3);
1095        assert_eq!(dataframe.n_rows(), 1);
1096    }
1097
1098    #[rstest]
1099    fn test_metadata(dummy_candidates: ColumnFrame) {
1100        let mut dataframe: DataFrame = DataFrame::default();
1101        assert!(dataframe.is_empty());
1102        println!("{:?}", dataframe);
1103        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
1104        println!("{:?}", dataframe);
1105        assert_eq!(dataframe.len(), 2);
1106
1107        dataframe.add_metadata("test".into(), 1.into());
1108        assert_eq!(dataframe.get_metadata("test"), Some(&1.into()));
1109        let dataframe = DataFrame::new(ColumnFrame::from(vec![
1110            hashmap! {
1111                "key1".into() => 1.into(),
1112                "key2".into() => "a".into(),
1113            },
1114            hashmap! {
1115                "key1".into() => 2.into(),
1116                "key2".into() => "b".into(),
1117            },
1118        ]));
1119        assert_eq!(dataframe.get_metadata("test"), None);
1120        let tt = dataframe.select_transposed(None);
1121        assert!(tt.is_ok());
1122        let tt = tt.unwrap();
1123        assert_eq!(tt.shape(), [2, 2]);
1124        assert_eq!(
1125            tt,
1126            Array2::from_shape_vec((2, 2), vec![1.into(), 2.into(), "a".into(), "b".into()])
1127                .unwrap()
1128        );
1129    }
1130
1131    #[rstest]
1132    #[traced_test]
1133    fn add_single_column_test() {
1134        let mut dataframe = DataFrame::default();
1135        let values: Array1<DataValue> = Array1::from_vec(vec![1.into(), 2.into(), 3.into()]);
1136        let r = dataframe.add_single_column("key1", values);
1137        assert!(r.is_ok(), "{r:?}");
1138        let selected = dataframe.select(None);
1139        assert!(selected.is_ok());
1140        let selected = selected.unwrap();
1141        assert_eq!(selected.shape(), [3, 1]);
1142        assert_eq!(
1143            selected,
1144            Array2::from_shape_vec((3, 1), vec![1.into(), 2.into(), 3.into()]).unwrap()
1145        );
1146        let values: Array1<i32> = Array1::from_vec(vec![1, 2]);
1147        assert!(dataframe.add_single_column("key1", values).is_err());
1148        let values: Vec<i32> = vec![3i32, 4, 5];
1149        assert!(dataframe.add_single_column("key2", values).is_ok());
1150        let values: Array1<i32> = Array1::from_vec(vec![3i32]);
1151        assert!(dataframe.add_single_column("key3", values).is_err());
1152    }
1153
1154    #[rstest]
1155    #[traced_test]
1156    fn add_single_column_empty_test() {
1157        let mut dataframe = DataFrame::default();
1158        let values: Array1<DataValue> = Array1::from(vec![]);
1159        let r = dataframe.add_single_column("key1", values);
1160        assert!(r.is_ok(), "{r:?}");
1161        let selected = dataframe.select(None);
1162        assert!(selected.is_ok());
1163        let selected = selected.unwrap();
1164        assert_eq!(selected.shape(), [0, 1]);
1165        assert_eq!(selected, Array2::from_shape_vec((0, 1), vec![]).unwrap());
1166        let values: Array1<DataValue> = Array1::from(vec![1.into(), 2.into()]);
1167        assert!(dataframe.add_single_column("key1", values).is_err());
1168        let values: Array1<DataValue> = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1169        assert!(dataframe.add_single_column("key2", values).is_ok());
1170        let values: Array1<DataValue> = Array1::from(vec![3.into(), 4.into()]);
1171        assert!(dataframe.add_single_column("key3", values).is_err());
1172        let values: Array1<DataValue> = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1173        assert!(dataframe.add_single_column("key3", values).is_ok());
1174
1175        assert_eq!(
1176            dataframe
1177                .select_column("key1".into())
1178                .expect("BUG: has to exists"),
1179            ndarray::arr1(&[DataValue::Null, DataValue::Null, DataValue::Null]),
1180        );
1181        assert_eq!(
1182            dataframe
1183                .select_column("key2".into())
1184                .expect("BUG: has to exists"),
1185            ndarray::arr1(&[3.into(), 4.into(), 5.into()]),
1186        );
1187        assert_eq!(
1188            dataframe.select(None).expect("BUG: cannot get data"),
1189            ndarray::arr2(&[
1190                [DataValue::Null, 3.into(), 3.into()],
1191                [DataValue::Null, 4.into(), 4.into()],
1192                [DataValue::Null, 5.into(), 5.into()],
1193            ])
1194        );
1195    }
1196
1197    #[rstest]
1198    #[case(
1199        DataFrame::new(ColumnFrame::from(vec![
1200            hashmap! {
1201                "k".into() => 1.into(),
1202                "k2".into() => 2.into(),
1203                "k3".into() => 2.2.into(),
1204            },
1205            hashmap! {
1206                "k".into() => 11.into(),
1207                "k2".into() => 3.into(),
1208            },
1209            hashmap! {
1210                "k".into() => 4.into(),
1211                "k2".into() => 5.into(),
1212                "k3".into() => 2.3.into(),
1213            },
1214            hashmap! {
1215                "k".into() => 4.into(),
1216                "k2".into() => 5.into(),
1217                "k3".into() => 2.4.into(),
1218            },
1219        ])),
1220        vec!["k".into(), "k2".into()],
1221        Array2::from_shape_vec((4, 2), vec![1.into(), 2.into(), 11.into(), 3.into(), 4.into(), 5.into(), 4.into(), 5.into()]).unwrap()
1222    )]
1223    #[case(
1224        DataFrame::new(ColumnFrame::from(vec![
1225            hashmap! {
1226                "k".into() => 1.into(),
1227                "k2".into() => 2.into(),
1228                "k3".into() => 2.2.into(),
1229            },
1230            hashmap! {
1231                "k".into() => 11.into(),
1232                "k2".into() => 3.into(),
1233            },
1234            hashmap! {
1235                "k".into() => 4.into(),
1236                "k2".into() => 5.into(),
1237                "k3".into() => 2.3.into(),
1238            },
1239            hashmap! {
1240                "k".into() => 4.into(),
1241                "k2".into() => 5.into(),
1242                "k3".into() => 2.4.into(),
1243            },
1244        ])),
1245        vec!["k2".into(), "k3".into(), "nonexist1".into(), "nonexists2".into(), "k".into()],
1246        Array2::from_shape_vec((4, 5), vec![
1247            2.into(), 2.2.into(), DataValue::Null, DataValue::Null, 1.into(),
1248            3.into(), DataValue::Null, DataValue::Null, DataValue::Null, 11.into(),
1249            5.into(), 2.3.into(),  DataValue::Null, DataValue::Null, 4.into(),
1250            5.into(), 2.4.into(), DataValue::Null, DataValue::Null, 4.into()]).unwrap()
1251    )]
1252    #[traced_test]
1253    fn select_multiple(
1254        #[case] input: DataFrame,
1255        #[case] columns: Vec<Key>,
1256        #[case] expected: Array2<DataValue>,
1257    ) {
1258        let selected = input.select(Some(&columns));
1259        assert!(selected.is_ok());
1260        let selected = selected.unwrap();
1261
1262        assert_eq!(selected, expected);
1263    }
1264
1265    #[rstest]
1266    #[case(
1267        DataFrame::new(ColumnFrame::from(vec![
1268            hashmap! {
1269                "k".into() => 1.into(),
1270                "k2".into() => 2.into(),
1271                "k3".into() => 2.2.into(),
1272            },
1273            hashmap! {
1274                "k".into() => 11.into(),
1275                "k2".into() => 3.into(),
1276            },
1277            hashmap! {
1278                "k".into() => 4.into(),
1279                "k2".into() => 5.into(),
1280                "k3".into() => 2.3.into(),
1281            },
1282            hashmap! {
1283                "k".into() => 4.into(),
1284                "k2".into() => 5.into(),
1285                "k3".into() => 2.4.into(),
1286            },
1287        ])),
1288        "k".into(),
1289        Array2::from_shape_vec((4, 3), vec![
1290            1.into(), 2.into(), 2.2.into(),
1291            4.into(), 5.into(), 2.3.into(),
1292            4.into(), 5.into(), 2.4.into(),
1293            11.into(), 3.into(), DataValue::Null,
1294            ]
1295        ).unwrap(),
1296        vec!["k".into(), "k2".into(), "k3".into()],
1297    )]
1298    #[rstest]
1299    #[case(
1300        DataFrame::new(ColumnFrame::from(vec![
1301            hashmap! {
1302                "k".into() => 1.into(),
1303                "k2".into() => 2.into(),
1304                "k3".into() => 2.2.into(),
1305            },
1306            hashmap! {
1307                "k".into() => 11.into(),
1308                "k2".into() => 3.into(),
1309            },
1310            hashmap! {
1311                "k".into() => 4.into(),
1312                "k2".into() => 5.into(),
1313                "k3".into() => 2.3.into(),
1314            },
1315            hashmap! {
1316                "k".into() => 4.into(),
1317                "k2".into() => 5.into(),
1318                "k3".into() => 2.4.into(),
1319            },
1320        ])),
1321        "k3".into(),
1322        Array2::from_shape_vec((4, 3), vec![
1323            11.into(), 3.into(), DataValue::Null,
1324            1.into(), 2.into(), 2.2.into(),
1325            4.into(), 5.into(), 2.3.into(),
1326            4.into(), 5.into(), 2.4.into(),
1327            ]
1328        ).unwrap(),
1329        vec!["k".into(), "k2".into(), "k3".into()],
1330    )]
1331    #[case(
1332        DataFrame::new(ColumnFrame::from(vec![
1333            hashmap! {
1334                "k".into() => 2.into(),
1335                "k2".into() => 0.000001.into(),
1336            },
1337            hashmap! {
1338                "k".into() => 1.into(),
1339                "k2".into() =>0.0000001.into(),
1340            },
1341            hashmap! {
1342                "k".into() => 3.into(),
1343                "k2".into() => 0.00001.into(),
1344            },
1345            hashmap! {
1346                "k".into() => 4.into(),
1347                "k2".into() => 0.001.into(),
1348            },
1349        ])),
1350        "k2".into(),
1351        Array2::from_shape_vec((4, 2), vec![
1352            1.into(), 0.0000001.into(),
1353            2.into(), 0.000001.into(),
1354            3.into(), 0.00001.into(),
1355            4.into(), 0.001.into(),
1356            ]
1357        ).unwrap(),
1358        vec!["k".into(), "k2".into()],
1359    )]
1360    #[case(
1361        DataFrame::new(ColumnFrame::from(vec![
1362            hashmap! {
1363                "k".into() => 2.into(),
1364                "k2".into() => "b".into(),
1365            },
1366            hashmap! {
1367                "k".into() => 1.into(),
1368                "k2".into() =>"a".into(),
1369            },
1370            hashmap! {
1371                "k".into() => 3.into(),
1372                "k2".into() =>"c".into(),
1373            },
1374            hashmap! {
1375                "k".into() => 4.into(),
1376                "k2".into() =>"z".into(),
1377            },
1378        ])),
1379        "k2".into(),
1380        Array2::from_shape_vec((4, 2), vec![
1381            1.into(),"a".into(),
1382            2.into(), "b".into(),
1383            3.into(), "c".into(),
1384            4.into(), "z".into(),
1385            ]
1386        ).unwrap(),
1387        vec!["k".into(), "k2".into()],
1388    )]
1389    #[traced_test]
1390    fn sort_by(
1391        #[case] input: DataFrame,
1392        #[case] column: Key,
1393        #[case] expected: Array2<DataValue>,
1394        #[case] columns: Vec<Key>,
1395    ) {
1396        let result = input.sorted(&column);
1397        assert!(result.is_ok(), "{result:?}");
1398        let result = result.unwrap().get_sorted();
1399        let selected = result.select(Some(&columns));
1400
1401        assert_eq!(selected, expected);
1402    }
1403    #[rstest]
1404    #[case(
1405        DataFrame::new(ColumnFrame::from(vec![
1406            hashmap! {
1407                "k".into() => 2.into(),
1408                "k2".into() => 0.000001.into(),
1409            },
1410            hashmap! {
1411                "k".into() => 1.into(),
1412                "k2".into() =>0.0000001.into(),
1413            },
1414            hashmap! {
1415                "k".into() => 3.into(),
1416                "k2".into() => 0.00001.into(),
1417            },
1418            hashmap! {
1419                "k".into() => 4.into(),
1420                "k2".into() => 0.001.into(),
1421            },
1422        ])),
1423        "k2".into(),
1424        TopN::Last(1),
1425        Array2::from_shape_vec((1, 2), vec![
1426            4.into(), 0.001.into(),
1427            ]
1428        ).unwrap(),
1429        vec!["k".into(), "k2".into()],
1430    )]
1431    #[case(
1432        DataFrame::new(ColumnFrame::from(vec![
1433            hashmap! {
1434                "k".into() => 2.into(),
1435                "k2".into() => 0.000001.into(),
1436            },
1437            hashmap! {
1438                "k".into() => 1.into(),
1439                "k2".into() =>0.0000001.into(),
1440            },
1441            hashmap! {
1442                "k".into() => 3.into(),
1443                "k2".into() => 0.00001.into(),
1444            },
1445            hashmap! {
1446                "k".into() => 4.into(),
1447                "k2".into() => 0.001.into(),
1448            },
1449        ])),
1450        "k2".into(),
1451        TopN::Last(2),
1452        Array2::from_shape_vec((2, 2), vec![
1453            4.into(), 0.001.into(),
1454            3.into(), 0.00001.into(),
1455            ]
1456        ).unwrap(),
1457        vec!["k".into(), "k2".into()],
1458    )]
1459    #[case(
1460        DataFrame::new(ColumnFrame::from(vec![
1461            hashmap! {
1462                "k".into() => 2.into(),
1463                "k2".into() => "b".into(),
1464            },
1465            hashmap! {
1466                "k".into() => 1.into(),
1467                "k2".into() =>"a".into(),
1468            },
1469            hashmap! {
1470                "k".into() => 3.into(),
1471                "k2".into() =>"c".into(),
1472            },
1473            hashmap! {
1474                "k".into() => 4.into(),
1475                "k2".into() =>"z".into(),
1476            },
1477        ])),
1478        "k2".into(),
1479        TopN::First(1),
1480        Array2::from_shape_vec((1, 2), vec![
1481            1.into(),"a".into(),
1482            ]
1483        ).unwrap(),
1484        vec!["k".into(), "k2".into()],
1485    )]
1486    #[case(
1487        DataFrame::new(ColumnFrame::from(vec![
1488            hashmap! {
1489                "k".into() => 2.into(),
1490                "k2".into() => "b".into(),
1491            },
1492            hashmap! {
1493                "k".into() => 1.into(),
1494                "k2".into() =>"a".into(),
1495            },
1496            hashmap! {
1497                "k".into() => 3.into(),
1498                "k2".into() =>"c".into(),
1499            },
1500            hashmap! {
1501                "k".into() => 4.into(),
1502                "k2".into() =>"z".into(),
1503            },
1504        ])),
1505        "k2".into(),
1506        TopN::First(2),
1507        Array2::from_shape_vec((2, 2), vec![
1508            1.into(),"a".into(),
1509            2.into(),"b".into(),
1510            ]
1511        ).unwrap(),
1512        vec!["k".into(), "k2".into()],
1513    )]
1514    #[traced_test]
1515    fn top_n(
1516        #[case] input: DataFrame,
1517        #[case] column: Key,
1518        #[case] topn: TopN,
1519        #[case] expected: Array2<DataValue>,
1520        #[case] columns: Vec<Key>,
1521    ) {
1522        let result = input.sorted(&column);
1523        assert!(result.is_ok(), "{result:?}");
1524        let result = result.unwrap();
1525        let first = result.topn(topn).unwrap();
1526        let selected = first.select(Some(&columns));
1527        assert_eq!(selected, expected);
1528    }
1529
1530    #[rstest]
1531    fn test_messagepack_roundtrip_empty_dataframe() {
1532        let df = DataFrame::default();
1533
1534        let bytes = df
1535            .store_into_messagepack()
1536            .expect("failed to serialize empty df");
1537        let restored =
1538            DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize empty df");
1539        assert_eq!(df, restored);
1540        assert!(restored.is_empty());
1541    }
1542
1543    #[rstest]
1544    fn test_messagepack_roundtrip_strings_and_bools() {
1545        // Strings and bools are preserved exactly by messagepack
1546        let df = DataFrame::new(ColumnFrame::from(vec![
1547            hashmap! {
1548                "str".into() => DataValue::String("hello".into()),
1549                "bool".into() => DataValue::Bool(true),
1550            },
1551            hashmap! {
1552                "str".into() => DataValue::String("".into()),
1553                "bool".into() => DataValue::Bool(false),
1554            },
1555        ]));
1556
1557        let bytes = df.store_into_messagepack().expect("failed to serialize");
1558        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1559        assert_eq!(df, restored);
1560    }
1561
1562    #[rstest]
1563    fn test_messagepack_roundtrip_f64_values() {
1564        let df = DataFrame::new(ColumnFrame::from(vec![
1565            hashmap! {
1566                "a".into() => DataValue::F64(3.14),
1567            },
1568            hashmap! {
1569                "a".into() => DataValue::F64(-2.718),
1570            },
1571        ]));
1572
1573        let bytes = df.store_into_messagepack().expect("failed to serialize");
1574        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1575        assert_eq!(df, restored);
1576    }
1577
1578    #[rstest]
1579    fn test_messagepack_f64_special_values_survive_roundtrip() {
1580        // f64::INFINITY serializes/deserializes but PartialEq may differ due to
1581        // DataValue Eq semantics; verify at the value level
1582        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1583            "a".into() => DataValue::F64(f64::INFINITY),
1584        }]));
1585
1586        let bytes = df.store_into_messagepack().expect("failed to serialize");
1587        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1588        assert_eq!(restored.len(), 1);
1589        let col = restored.select_column("a".into()).expect("col exists");
1590        match &col[0] {
1591            DataValue::F64(v) => assert!(v.is_infinite() && v.is_sign_positive()),
1592            other => panic!("expected F64, got {other:?}"),
1593        }
1594    }
1595
1596    #[rstest]
1597    fn test_messagepack_roundtrip_with_nulls() {
1598        let df = DataFrame::new(ColumnFrame::from(vec![
1599            hashmap! {
1600                "a".into() => DataValue::String("x".into()),
1601                "b".into() => DataValue::String("y".into()),
1602            },
1603            hashmap! {
1604                "a".into() => DataValue::String("z".into()),
1605                // "b" missing => Null
1606            },
1607        ]));
1608
1609        let bytes = df.store_into_messagepack().expect("failed to serialize");
1610        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1611        assert_eq!(df, restored);
1612    }
1613
1614    #[rstest]
1615    fn test_messagepack_roundtrip_with_metadata() {
1616        let mut df = DataFrame::new(crate::column_frame! {
1617            "col" => ["a", "b"]
1618        });
1619        df.add_metadata("name".into(), DataValue::String("test_df".into()));
1620        df.add_metadata("flag".into(), DataValue::Bool(true));
1621
1622        let bytes = df.store_into_messagepack().expect("failed to serialize");
1623        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1624        assert_eq!(df, restored);
1625        assert_eq!(
1626            restored.get_metadata("name"),
1627            Some(&DataValue::String("test_df".into()))
1628        );
1629        assert_eq!(restored.get_metadata("flag"), Some(&DataValue::Bool(true)));
1630    }
1631
1632    #[rstest]
1633    fn test_messagepack_roundtrip_with_constants() {
1634        let mut df = DataFrame::new(crate::column_frame! {
1635            "x" => ["a", "b"]
1636        });
1637        df.insert_constant("const_key".into(), DataValue::String("const_val".into()));
1638        df.insert_constant("const_flag".into(), DataValue::Bool(false));
1639
1640        let bytes = df.store_into_messagepack().expect("failed to serialize");
1641        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1642        assert_eq!(df, restored);
1643        assert_eq!(
1644            restored.constants.get(&"const_key".into()),
1645            Some(&DataValue::String("const_val".into()))
1646        );
1647    }
1648
1649    #[rstest]
1650    fn test_messagepack_integer_type_coercion() {
1651        // MessagePack uses compact integer encoding: small I64 values may
1652        // deserialize as U8/U32 etc. This test documents this lossy behavior.
1653        let df = crate::df! {
1654            "a" => [1i64, 2i64, 3i64]
1655        };
1656
1657        let bytes = df.store_into_messagepack().expect("failed to serialize");
1658        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1659
1660        // The row count is preserved even if integer types differ
1661        assert_eq!(restored.len(), 3);
1662
1663        // Values that fit in u8 get coerced to U8 by messagepack
1664        let col = restored
1665            .select_column("a".into())
1666            .expect("column should exist");
1667        // Values are semantically equivalent but may be different DataValue variants
1668        assert_ne!(
1669            col[0],
1670            DataValue::I64(1),
1671            "messagepack coerces small ints to compact types"
1672        );
1673    }
1674
1675    #[rstest]
1676    fn test_messagepack_large_i64_preserved() {
1677        // Values that exceed u32 range stay as large integer types
1678        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1679            "big".into() => DataValue::I64(i64::MIN),
1680        }]));
1681
1682        let bytes = df.store_into_messagepack().expect("failed to serialize");
1683        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1684        assert_eq!(df, restored);
1685    }
1686
1687    #[rstest]
1688    fn test_messagepack_load_invalid_bytes() {
1689        let result = DataFrame::load_from_messagepack(&[0xFF, 0xFE, 0xFD, 0x00]);
1690        assert!(result.is_err());
1691    }
1692
1693    #[rstest]
1694    fn test_messagepack_load_empty_bytes() {
1695        let result = DataFrame::load_from_messagepack(&[]);
1696        assert!(result.is_err());
1697    }
1698
1699    #[rstest]
1700    fn test_messagepack_load_truncated_bytes() {
1701        let df = DataFrame::new(ColumnFrame::from(vec![
1702            hashmap! {
1703                "a".into() => DataValue::String("hello world".into()),
1704                "b".into() => DataValue::Bool(true),
1705            },
1706            hashmap! {
1707                "a".into() => DataValue::String("test".into()),
1708                "b".into() => DataValue::Bool(false),
1709            },
1710        ]));
1711        let bytes = df.store_into_messagepack().expect("failed to serialize");
1712        // Truncate to half
1713        let truncated = &bytes[..bytes.len() / 2];
1714        let result = DataFrame::load_from_messagepack(truncated);
1715        assert!(result.is_err());
1716    }
1717
1718    #[rstest]
1719    fn test_messagepack_roundtrip_with_nested_vec_data() {
1720        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1721            "vec_col".into() => DataValue::Vec(vec![
1722                DataValue::String("a".into()),
1723                DataValue::String("b".into()),
1724            ]),
1725            "bytes_col".into() => DataValue::Bytes(vec![0, 1, 255]),
1726        }]));
1727
1728        let bytes = df.store_into_messagepack().expect("failed to serialize");
1729        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1730        assert_eq!(df, restored);
1731    }
1732
1733    #[rstest]
1734    fn test_messagepack_roundtrip_preserves_row_count() {
1735        let df = DataFrame::new(ColumnFrame::from(vec![
1736            hashmap! { "a".into() => DataValue::String("x".into()) },
1737            hashmap! { "a".into() => DataValue::String("y".into()) },
1738            hashmap! { "a".into() => DataValue::String("z".into()) },
1739        ]));
1740
1741        let bytes = df.store_into_messagepack().expect("failed to serialize");
1742        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1743        assert_eq!(restored.len(), 3);
1744        assert_eq!(restored.n_rows(), 3);
1745        assert_eq!(restored.n_columns(), 1);
1746    }
1747
1748    #[rstest]
1749    fn test_messagepack_idempotent_double_roundtrip() {
1750        // Use types that survive messagepack coercion (strings, bools, bytes)
1751        let mut df = DataFrame::new(ColumnFrame::from(vec![
1752            hashmap! {
1753                "a".into() => DataValue::String("hello".into()),
1754                "b".into() => DataValue::Bool(true),
1755            },
1756            hashmap! {
1757                "a".into() => DataValue::String("world".into()),
1758                "b".into() => DataValue::Bool(false),
1759            },
1760        ]));
1761        df.add_metadata("meta".into(), DataValue::Bool(true));
1762        df.insert_constant("c".into(), DataValue::String("const".into()));
1763
1764        let bytes1 = df.store_into_messagepack().expect("first serialize");
1765        let restored1 = DataFrame::load_from_messagepack(&bytes1).expect("first deserialize");
1766        let bytes2 = restored1
1767            .store_into_messagepack()
1768            .expect("second serialize");
1769        let restored2 = DataFrame::load_from_messagepack(&bytes2).expect("second deserialize");
1770
1771        assert_eq!(df, restored2);
1772        assert_eq!(bytes1, bytes2);
1773    }
1774
1775    #[rstest]
1776    fn test_messagepack_single_byte_payload() {
1777        // A single valid msgpack byte (e.g. fixint) should fail as incomplete DataFrame
1778        let result = DataFrame::load_from_messagepack(&[0x01]);
1779        assert!(result.is_err());
1780    }
1781
1782    // === hash_datavalue public API edge case tests ===
1783
1784    #[rstest]
1785    fn test_hash_datavalue_public_api_accessible() {
1786        // Verify the re-exported function works from the crate root
1787        let val = DataValue::I32(42);
1788        let h = crate::hash_datavalue(&val);
1789        // Deterministic
1790        assert_eq!(h, crate::hash_datavalue(&DataValue::I32(42)));
1791    }
1792
1793    #[rstest]
1794    fn test_hash_datavalue_vec_length_matters() {
1795        // [1] and [1, Null] should produce different hashes
1796        let short = DataValue::Vec(vec![DataValue::I32(1)]);
1797        let long = DataValue::Vec(vec![DataValue::I32(1), DataValue::Null]);
1798        assert_ne!(crate::hash_datavalue(&short), crate::hash_datavalue(&long));
1799    }
1800
1801    #[rstest]
1802    fn test_hash_datavalue_map_different_keys_same_values() {
1803        let mut m1 = std::collections::HashMap::new();
1804        m1.insert("a".into(), DataValue::I32(1));
1805        let mut m2 = std::collections::HashMap::new();
1806        m2.insert("b".into(), DataValue::I32(1));
1807
1808        assert_ne!(
1809            crate::hash_datavalue(&DataValue::Map(m1)),
1810            crate::hash_datavalue(&DataValue::Map(m2))
1811        );
1812    }
1813
1814    #[rstest]
1815    fn test_hash_datavalue_empty_string_vs_empty_bytes() {
1816        let empty_str = DataValue::String("".into());
1817        let empty_bytes = DataValue::Bytes(vec![]);
1818        assert_ne!(
1819            crate::hash_datavalue(&empty_str),
1820            crate::hash_datavalue(&empty_bytes)
1821        );
1822    }
1823
1824    #[rstest]
1825    fn test_hash_datavalue_empty_vec_vs_empty_map() {
1826        let empty_vec = DataValue::Vec(vec![]);
1827        let empty_map = DataValue::Map(std::collections::HashMap::new());
1828        assert_ne!(
1829            crate::hash_datavalue(&empty_vec),
1830            crate::hash_datavalue(&empty_map)
1831        );
1832    }
1833
1834    #[rstest]
1835    fn test_hash_datavalue_i128_boundary_values() {
1836        let max = DataValue::I128(i128::MAX);
1837        let min = DataValue::I128(i128::MIN);
1838        let zero = DataValue::I128(0);
1839        let neg_one = DataValue::I128(-1);
1840
1841        // All distinct
1842        let hashes: std::collections::HashSet<u64> = [&max, &min, &zero, &neg_one]
1843            .iter()
1844            .map(|v| crate::hash_datavalue(v))
1845            .collect();
1846        assert_eq!(hashes.len(), 4);
1847    }
1848
1849    #[rstest]
1850    fn test_hash_datavalue_u128_boundary_values() {
1851        let max = DataValue::U128(u128::MAX);
1852        let zero = DataValue::U128(0);
1853        let one = DataValue::U128(1);
1854        // u128::MAX is all bits set; ensure it differs from i128(-1) which is also all bits
1855        let i128_neg1 = DataValue::I128(-1);
1856
1857        assert_ne!(
1858            crate::hash_datavalue(&max),
1859            crate::hash_datavalue(&i128_neg1)
1860        );
1861        let hashes: std::collections::HashSet<u64> = [&max, &zero, &one]
1862            .iter()
1863            .map(|v| crate::hash_datavalue(v))
1864            .collect();
1865        assert_eq!(hashes.len(), 3);
1866    }
1867
1868    #[rstest]
1869    fn test_hash_datavalue_f64_special_values() {
1870        // NaN bit patterns: NaN == NaN for hashing since we use to_bits()
1871        let nan1 = DataValue::F64(f64::NAN);
1872        let nan2 = DataValue::F64(f64::NAN);
1873        assert_eq!(crate::hash_datavalue(&nan1), crate::hash_datavalue(&nan2));
1874
1875        // subnormal
1876        let subnormal = DataValue::F64(f64::MIN_POSITIVE / 2.0);
1877        let normal = DataValue::F64(f64::MIN_POSITIVE);
1878        assert_ne!(
1879            crate::hash_datavalue(&subnormal),
1880            crate::hash_datavalue(&normal)
1881        );
1882    }
1883
1884    #[rstest]
1885    fn test_hash_datavalue_enum_number_vs_i32_same_value() {
1886        // EnumNumber(42) and I32(42) should hash differently (different discriminant)
1887        let enum_val = DataValue::EnumNumber(42);
1888        let i32_val = DataValue::I32(42);
1889        assert_ne!(
1890            crate::hash_datavalue(&enum_val),
1891            crate::hash_datavalue(&i32_val)
1892        );
1893    }
1894
1895    #[rstest]
1896    fn get_single_column_typed_f64_from_i32() {
1897        let df = crate::df! {
1898            "a" => [1i32, 2i32, 3i32]
1899        };
1900        let key: Key = "a".into();
1901        let col = df.get_single_column_typed::<f64>(&key).unwrap();
1902        assert_eq!(col, ndarray::arr1(&[1.0f64, 2.0, 3.0]));
1903    }
1904
1905    #[rstest]
1906    fn get_single_column_typed_string() {
1907        let df = crate::df! {
1908            "name" => ["alice", "bob"]
1909        };
1910        let key: Key = "name".into();
1911        let col = df.get_single_column_typed::<String>(&key).unwrap();
1912        assert_eq!(
1913            col,
1914            ndarray::arr1(&["alice".to_string(), "bob".to_string()])
1915        );
1916    }
1917
1918    #[rstest]
1919    fn get_single_column_typed_missing_key() {
1920        let df = crate::df! {
1921            "a" => [1u64, 2u64]
1922        };
1923        let missing: Key = "z".into();
1924        assert!(df.get_single_column_typed::<u64>(&missing).is_none());
1925    }
1926
1927    #[rstest]
1928    fn get_single_column_typed_matches_untyped() {
1929        let df = crate::df! {
1930            "v" => [10u64, 20u64, 30u64]
1931        };
1932        let key: Key = "v".into();
1933        let typed = df.get_single_column_typed::<u64>(&key).unwrap();
1934        let untyped = df.get_single_column(&key).unwrap();
1935        for (t, u) in typed.iter().zip(untyped.iter()) {
1936            assert_eq!(*t, u64::extract(u));
1937        }
1938    }
1939
1940    #[rstest]
1941    fn get_single_column_typed_bool_from_i32() {
1942        let df = crate::df! {
1943            "flag" => [1i32, 0i32, 1i32, 0i32]
1944        };
1945        let key: Key = "flag".into();
1946        let col = df.get_single_column_typed::<bool>(&key).unwrap();
1947        assert_eq!(col, ndarray::arr1(&[true, false, true, false]));
1948    }
1949
1950    #[rstest]
1951    fn get_single_column_typed_i64_from_u32() {
1952        let df = crate::df! {
1953            "x" => [10u32, 20u32, 30u32]
1954        };
1955        let key: Key = "x".into();
1956        let col = df.get_single_column_typed::<i64>(&key).unwrap();
1957        assert_eq!(col, ndarray::arr1(&[10i64, 20i64, 30i64]));
1958    }
1959
1960    #[rstest]
1961    fn get_single_column_typed_f64_truncation_to_i32() {
1962        let df = crate::df! {
1963            "v" => [1.9f64, 2.1f64, 3.7f64]
1964        };
1965        let key: Key = "v".into();
1966        let col = df.get_single_column_typed::<i32>(&key).unwrap();
1967        assert_eq!(col, ndarray::arr1(&[1i32, 2i32, 3i32]));
1968    }
1969
1970    #[rstest]
1971    fn get_single_column_typed_single_element() {
1972        let df = crate::df! {
1973            "solo" => [42u64]
1974        };
1975        let key: Key = "solo".into();
1976        let col = df.get_single_column_typed::<f64>(&key).unwrap();
1977        assert_eq!(col.len(), 1);
1978        assert_eq!(col[0], 42.0);
1979    }
1980
1981    #[rstest]
1982    fn select_typed_all_columns() {
1983        let df = crate::df! {
1984            "a" => [1i32, 2i32, 3i32],
1985            "b" => [4i32, 5i32, 6i32]
1986        };
1987        let result = df.select_typed::<f64>(None).unwrap();
1988        assert_eq!(result.nrows(), 3);
1989        assert_eq!(result.ncols(), 2);
1990        assert_eq!(result[[0, 0]], 1.0);
1991        assert_eq!(result[[0, 1]], 4.0);
1992        assert_eq!(result[[2, 0]], 3.0);
1993        assert_eq!(result[[2, 1]], 6.0);
1994    }
1995
1996    #[rstest]
1997    fn select_typed_specific_keys() {
1998        let df = crate::df! {
1999            "x" => [10u64, 20u64],
2000            "y" => [30u64, 40u64],
2001            "z" => [50u64, 60u64]
2002        };
2003        let keys: Vec<Key> = vec!["x".into(), "z".into()];
2004        let result = df.select_typed::<i64>(Some(&keys)).unwrap();
2005        assert_eq!(result.nrows(), 2);
2006        assert_eq!(result.ncols(), 2);
2007        assert_eq!(result[[0, 0]], 10i64);
2008        assert_eq!(result[[0, 1]], 50i64);
2009        assert_eq!(result[[1, 0]], 20i64);
2010        assert_eq!(result[[1, 1]], 60i64);
2011    }
2012
2013    #[rstest]
2014    fn select_typed_nonexistent_key_gives_empty() {
2015        let df = crate::df! {
2016            "a" => [1i32, 2i32]
2017        };
2018        let keys: Vec<Key> = vec!["missing".into()];
2019        let result = df.select_typed::<f64>(Some(&keys)).unwrap();
2020        assert_eq!(result.shape(), &[0, 0]);
2021    }
2022
2023    #[rstest]
2024    fn select_typed_matches_select_with_extract() {
2025        let df = crate::df! {
2026            "a" => [1u64, 2u64, 3u64],
2027            "b" => [4u64, 5u64, 6u64]
2028        };
2029        let typed = df.select_typed::<f64>(None).unwrap();
2030        let manual = df.select(None).unwrap().mapv(|v| f64::extract(&v));
2031        assert_eq!(typed, manual);
2032    }
2033
2034    #[rstest]
2035    fn select_typed_string_values() {
2036        let df = crate::df! {
2037            "name" => ["alice", "bob", "carol"]
2038        };
2039        let result = df.select_typed::<String>(None).unwrap();
2040        assert_eq!(result[[0, 0]], "alice");
2041        assert_eq!(result[[1, 0]], "bob");
2042        assert_eq!(result[[2, 0]], "carol");
2043    }
2044
2045    #[rstest]
2046    fn select_typed_cross_numeric_coercion() {
2047        // i32 values extracted as u64
2048        let df = crate::df! {
2049            "a" => [1i32, 2i32, 3i32]
2050        };
2051        let result = df.select_typed::<u64>(None).unwrap();
2052        assert_eq!(result[[0, 0]], 1u64);
2053        assert_eq!(result[[1, 0]], 2u64);
2054        assert_eq!(result[[2, 0]], 3u64);
2055    }
2056}