Skip to main content

trs_dataframe/
dataframe.rs

1use column_store::sorted_df::SortedDataFrame;
2use data_value::{DataValue, Extract};
3use halfbrown::HashMap;
4use ndarray::{Array1, Array2};
5use std::fmt;
6/// Column-oriented storage, typed arrays, and frame operations.
7pub mod column_store;
8/// Row-level hashing and indexing utilities.
9pub mod index;
10/// Join strategies and relation descriptors.
11pub mod join;
12/// Column key type with name, id, and data-type metadata.
13pub mod key;
14use crate::{error::Error, CandidateData};
15#[cfg(feature = "python")]
16pub mod python;
17
18#[cfg(feature = "python")]
19use pyo3::prelude::*;
20
21use crate::{
22    dataframe::{
23        column_store::typed_array::TypedDataArray, column_store::ColumnFrame,
24        column_store::MaybeView, join::JoinRelation, key::Key,
25    },
26    MLChefMap,
27};
28
29/// Controls how many rows to take from a sorted dataframe.
30///
31/// Used with [`SortedDataFrame::topn`] to retrieve a fixed number of rows
32/// from the top or bottom of a sorted result.
33#[derive(Debug, Clone, PartialEq, Eq, Copy)]
34pub enum TopN {
35    /// Take the first `n` rows (smallest values).
36    First(usize),
37    /// Take the last `n` rows (largest values).
38    Last(usize),
39}
40
41/// User-facing dataframe: a [`ColumnFrame`] with attached constants and
42/// metadata.
43///
44/// # Storage
45/// The underlying [`ColumnFrame`] is column-oriented — each column lives in
46/// its own [`crate::TypedData`] variant. Materialize a 2-D view
47/// with [`DataFrame::select`] (row-major) or
48/// [`DataFrame::select_view`] (zero-copy where possible).
49///
50/// # Example
51/// ```
52/// use trs_dataframe::{DataFrame, column_frame};
53///
54/// let df = DataFrame::new(column_frame! {
55///     "a" => [1, 2, 3],
56///     "b" => [4, 5, 6]
57/// });
58///
59/// // Materialize all columns as a row-major 2-D array (rows × columns).
60/// let all_data = df.select(None);
61///
62/// // Materialize a specific subset of columns.
63/// let keys = vec!["a".into(), "b".into()];
64/// let selected = df.select(Some(&keys));
65/// ```
66#[derive(Debug, Clone, PartialEq, Default, serde::Serialize, serde::Deserialize)]
67#[cfg_attr(feature = "python", pyclass)]
68pub struct DataFrame {
69    /// Whole-frame constants — values that logically apply to every row but
70    /// are not stored per-row. Useful for shared metadata that joins should
71    /// preserve. They do not appear in [`select`](Self::select) results.
72    pub constants: HashMap<Key, DataValue>,
73    /// Column-oriented storage backing this dataframe.
74    pub dataframe: ColumnFrame,
75    /// Free-form user metadata. Does not participate in any data operation.
76    pub metadata: HashMap<String, DataValue>,
77}
78
79impl fmt::Display for DataFrame {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        self.dataframe.fmt(f)
82    }
83}
84
85impl DataFrame {
86    /// Creates a new [`DataFrame`] from anything that can be converted into a [`ColumnFrame`].
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// use trs_dataframe::{DataFrame, column_frame};
92    ///
93    /// let df = DataFrame::new(column_frame! {
94    ///     "a" => [1, 2, 3],
95    ///     "b" => [4, 5, 6]
96    /// });
97    /// assert_eq!(df.n_rows(), 3);
98    /// assert_eq!(df.n_columns(), 2);
99    /// ```
100    pub fn new<C: Into<ColumnFrame>>(dataframe: C) -> Self {
101        Self {
102            constants: HashMap::new(),
103            dataframe: dataframe.into(),
104            metadata: HashMap::new(),
105        }
106    }
107
108    /// Returns the number of columns which dataframe contains.
109    pub fn n_columns(&self) -> usize {
110        self.dataframe.ncolumns()
111    }
112
113    /// Returns the number of rows which dataframe contains.
114    pub fn n_rows(&self) -> usize {
115        self.dataframe.nrows()
116    }
117
118    /// Compacts the internal storage to reclaim memory after row deletions or
119    /// filter operations that may leave excess capacity allocated.
120    pub fn shrink(&mut self) {
121        self.dataframe.shrink();
122    }
123
124    /// Attaches a key-value metadata entry to this dataframe.
125    ///
126    /// Metadata does not participate in data operations (select, join, filter, etc.)
127    /// and is intended for user-defined annotations such as source info or timestamps.
128    pub fn add_metadata(&mut self, key: String, value: DataValue) {
129        self.metadata.insert(key, value);
130    }
131
132    /// Returns a reference to the metadata value for the given key, or `None` if absent.
133    pub fn get_metadata(&self, key: &str) -> Option<&DataValue> {
134        self.metadata.get(key)
135    }
136
137    /// Joins another dataframe into this one according to the given [`JoinRelation`].
138    ///
139    /// The join strategy is determined by the variant inside `join_type`:
140    /// - [`crate::dataframe::join::JoinBy::AddColumns`] — adds non-existing columns from `other`
141    /// - [`crate::dataframe::join::JoinBy::Replace`] — replaces the entire frame with `other`
142    /// - [`crate::dataframe::join::JoinBy::Extend`] — appends rows from `other`
143    /// - [`crate::dataframe::join::JoinBy::Broadcast`] — replicates a single-row `other` across all rows
144    /// - [`crate::dataframe::join::JoinBy::CartesianProduct`] — produces all row combinations
145    /// - [`crate::dataframe::join::JoinBy::JoinById`] — hash-based join on shared key columns
146    ///
147    /// Constants from `other` are merged into this dataframe's constants map.
148    pub fn join(&mut self, other: Self, join_type: &JoinRelation) -> Result<(), Error> {
149        for (key, value) in other.constants {
150            self.constants.insert(key, value);
151        }
152        self.dataframe.join(other.dataframe, join_type)
153    }
154
155    /// Applies a user-defined function to the underlying [`ColumnFrame`].
156    ///
157    /// The closure receives the provided `keys` and a mutable reference to the
158    /// internal [`ColumnFrame`], allowing arbitrary in-place transformations.
159    pub fn apply_function<F>(&mut self, keys: &[Key], mut func: F) -> Result<(), Error>
160    where
161        F: FnMut(&[Key], &mut ColumnFrame) -> Result<(), Error>,
162    {
163        self.dataframe.apply_function(keys, &mut func)
164    }
165
166    /// Selects columns and returns their data as a 2D array of [`DataValue`] in row-major order.
167    ///
168    /// If `keys` is `None`, all columns are returned. If a requested key does not
169    /// exist, its cells are filled with [`DataValue::Null`].
170    ///
171    /// # Examples
172    ///
173    /// ```
174    /// use trs_dataframe::{df, Key};
175    ///
176    /// let df = df! { "a" => [1, 2], "b" => [3, 4] };
177    /// let data = df.select(None).unwrap();
178    /// assert_eq!(data.nrows(), 2);
179    /// ```
180    pub fn select(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
181        Ok(self.dataframe.select(keys))
182    }
183
184    /// Returns selected columns as a typed 2D array, converting each [`DataValue`]
185    /// via the [`Extract`] trait.
186    ///
187    /// This is the typed counterpart of [`select`](Self::select). If `keys` is `None`,
188    /// all columns are returned. The data is in row-major order (rows × columns).
189    ///
190    /// # Type coercion
191    ///
192    /// The [`Extract`] trait performs best-effort numeric coercion (e.g. `I32 -> f64`).
193    /// Values that cannot be meaningfully converted yield the type's default
194    /// (0 for numbers, `false` for bool, empty string for `String`).
195    ///
196    /// # Examples
197    ///
198    /// ```
199    /// use trs_dataframe::{df, Key};
200    ///
201    /// let df = df! {
202    ///     "a" => [1i32, 2i32, 3i32],
203    ///     "b" => [4i32, 5i32, 6i32]
204    /// };
205    /// let keys = vec![Key::from("a"), Key::from("b")];
206    /// let arr = df.select_typed::<f64>(Some(&keys)).unwrap();
207    /// assert_eq!(arr[[0, 0]], 1.0);
208    /// assert_eq!(arr[[1, 1]], 5.0);
209    /// ```
210    pub fn select_typed<T: Extract + Clone>(
211        &self,
212        keys: Option<&[Key]>,
213    ) -> Result<Array2<T>, Error> {
214        Ok(self.dataframe.select_typed(keys))
215    }
216
217    /// Returns selected columns wrapped in a [`MaybeView`].
218    ///
219    /// View-oriented counterpart of [`select`](Self::select). The selected
220    /// columns are stacked into an owned [`Array2`] of shape
221    /// `(ncols, nrows)`. When `keys` is `None`, every column from the
222    /// underlying [`crate::KeyIndex`] is included.
223    ///
224    /// Call [`MaybeView::row_view`] on the result to obtain a uniform
225    /// `(nrows, ncols)` read-only view regardless of which variant was
226    /// produced.
227    ///
228    /// # Errors
229    ///
230    /// Returns an error when `keys` resolves to an empty or entirely unknown
231    /// key set.
232    ///
233    /// # Examples
234    ///
235    /// ```
236    /// use trs_dataframe::{df, Key};
237    ///
238    /// let frame = df! { "a" => [1i32, 2i32], "b" => [3i32, 4i32] };
239    /// let keys = vec![Key::from("a"), Key::from("b")];
240    /// let view = frame.select_view(Some(&keys)).unwrap();
241    /// // row_view() yields a (nrows, ncols) ArrayView2.
242    /// assert_eq!(view.row_view().nrows(), 2);
243    /// ```
244    pub fn select_view(&self, keys: Option<&[Key]>) -> Result<MaybeView<'_>, Error> {
245        self.dataframe.select_view(keys)
246    }
247
248    /// Returns selected columns as borrowed [`crate::TypedData`] views.
249    ///
250    /// Each entry of the returned `Vec` corresponds to one requested column,
251    /// in the same order as `keys`. Missing keys yield `None`; present keys
252    /// yield `Some(&TypedData)` borrowed directly from the underlying column
253    /// store — no allocation, no per-element [`DataValue`] boxing.
254    ///
255    /// Use [`crate::TypedData::as_slice_i32`] (and friends) for zero-copy access to
256    /// the native primitive storage, or [`crate::TypedData::iter`] for a generic
257    /// [`DataValue`] iterator.
258    ///
259    /// # Errors
260    ///
261    /// Returns an error when `keys` resolves to an empty or entirely unknown
262    /// key set.
263    ///
264    /// # Examples
265    ///
266    /// ```
267    /// use trs_dataframe::{df, Key};
268    ///
269    /// let frame = df! {
270    ///     "score" => [1i32, 2i32, 3i32],
271    ///     "rank"  => [10i32, 20i32, 30i32]
272    /// };
273    /// let cols = frame.select_vec_view(Some(&["score".into()])).unwrap();
274    /// assert_eq!(cols.len(), 1);
275    /// assert_eq!(cols[0].as_ref().unwrap().len(), 3);
276    /// ```
277    pub fn select_vec_view(
278        &self,
279        keys: Option<&[Key]>,
280    ) -> Result<Vec<Option<&TypedDataArray>>, Error> {
281        self.dataframe.select_vec_view(keys)
282    }
283
284    /// Returns the requested columns as `Vec<Vec<D>>` in row-major order —
285    /// outer `Vec` is rows, inner `Vec` is one cell per selected key, with
286    /// each cell coerced to `D` via [`Extract`].
287    ///
288    /// Despite the name, the result is **not** transposed.
289    pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>> {
290        self.dataframe.select_transposed_typed::<D>(keys)
291    }
292
293    /// Returns a single column materialized as an owned [`Array1<DataValue>`],
294    /// or `None` if the key is absent.
295    ///
296    /// Typed columns allocate a [`DataValue`] per element on the fly. For
297    /// zero-copy typed access, use [`Self::get_column`].
298    pub fn select_column(&self, key: Key) -> Option<ndarray::Array1<DataValue>> {
299        #[allow(deprecated)]
300        self.dataframe.select_column(&key)
301    }
302
303    /// Stacks the selected columns into an [`Array2`] of shape
304    /// `(ncols, nrows)` — each row of the output is one column from the
305    /// dataframe.
306    ///
307    /// If `keys` is `None`, all columns are included.
308    pub fn select_transposed(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
309        self.dataframe.select_transposed(keys)
310    }
311
312    /// Stores a constant value that logically applies to every row without
313    /// being physically stored per-row.
314    ///
315    /// Constants are carried through joins but do not appear in
316    /// [`select`](Self::select) results.
317    pub fn insert_constant(&mut self, key: Key, value: DataValue) {
318        self.constants.insert(key, value);
319    }
320
321    /// Appends a single row to the dataframe.
322    ///
323    /// The row is supplied as any type implementing [`CandidateData`]
324    /// (e.g. `HashMap<Key, DataValue>`). New columns are added automatically
325    /// if the row contains keys not yet present in the frame.
326    pub fn push<C: CandidateData>(&mut self, item: C) -> Result<(), Error> {
327        self.dataframe.push(item)
328    }
329
330    /// Removes the specified columns from this dataframe and returns them as a
331    /// new [`DataFrame`].
332    pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error> {
333        self.dataframe.remove_column(keys).map(|x| x.into())
334    }
335
336    /// Appends all rows from `items` to this dataframe.
337    ///
338    /// If the two frames have different column sets, missing columns are filled
339    /// with [`DataValue::Null`].
340    pub fn extend(&mut self, items: Self) -> Result<(), Error> {
341        self.dataframe.extend(items.dataframe)
342    }
343
344    /// Returns the number of rows in the dataframe.
345    pub fn len(&self) -> usize {
346        self.dataframe.nrows()
347    }
348
349    /// Returns `true` if the dataframe contains no rows.
350    pub fn is_empty(&self) -> bool {
351        self.dataframe.is_empty()
352    }
353
354    /// Adds a new column to the dataframe.
355    ///
356    /// The column accepts anything that converts into a
357    /// [`crate::TypedData`] — e.g. `Vec<DataValue>`,
358    /// `Vec<T>` for any supported primitive, `Array1<DataValue>`, or a raw
359    /// `TypedData`.
360    ///
361    /// Returns an error if the column key already exists or if the length of
362    /// the supplied column does not match the current row count.
363    pub fn add_single_column<K, V>(&mut self, key: K, values: V) -> Result<(), Error>
364    where
365        K: Into<Key>,
366        V: Into<TypedDataArray>,
367    {
368        self.dataframe.add_single_column(key, values)
369    }
370
371    /// Returns a reference to the underlying typed column storage, or `None`
372    /// if `key` is absent.
373    ///
374    /// This is the zero-copy counterpart of
375    /// [`get_single_column`](Self::get_single_column): callers can use
376    /// [`TypedData::as_slice_i32`](crate::TypedData::as_slice_i32) (and its
377    /// siblings for other primitives) to borrow the native storage without
378    /// allocating a `DataValue` per element.
379    pub fn get_column(&self, key: &Key) -> Option<&TypedDataArray> {
380        self.dataframe.get_column(key).ok()
381    }
382
383    /// Returns a single column materialized as an owned [`Array1<DataValue>`],
384    /// or `None` if the key is absent.
385    ///
386    /// Typed columns allocate a [`DataValue`] per element on the fly. For
387    /// zero-copy typed access, use [`Self::get_column`].
388    pub fn get_single_column(&self, key: &Key) -> Option<Array1<DataValue>> {
389        #[allow(deprecated)]
390        self.dataframe.get_single_column(key)
391    }
392
393    /// Returns a column extracted into a typed [`Array1<T>`], where each [`DataValue`]
394    /// is converted via the [`Extract`] trait.
395    ///
396    /// This is a convenience wrapper around [`get_single_column`](Self::get_single_column)
397    /// that maps every element through `T::extract`, producing an owned array of the
398    /// target type. Returns `None` if the key does not exist in the dataframe.
399    ///
400    /// # Type coercion
401    ///
402    /// The [`Extract`] trait performs best-effort numeric coercion (e.g. `I32 -> f64`).
403    /// Values that cannot be meaningfully converted yield the type's default
404    /// (0 for numbers, `false` for bool, empty string for `String`).
405    ///
406    /// # Examples
407    ///
408    /// ```
409    /// use trs_dataframe::{df, Key};
410    ///
411    /// let df = df! {
412    ///     "score" => [1.5f64, 2.5f64, 3.5f64]
413    /// };
414    /// let key: Key = "score".into();
415    /// let col = df.get_single_column_typed::<f64>(&key).unwrap();
416    /// assert_eq!(col.len(), 3);
417    /// assert_eq!(col[0], 1.5);
418    /// ```
419    pub fn get_single_column_typed<T: Extract>(&self, key: &Key) -> Option<Array1<T>> {
420        self.dataframe.get_single_column_typed(key)
421    }
422
423    /// Returns a [`SortedDataFrame`] view sorted by the given column key.
424    ///
425    /// The sort is ascending with `Null` values pushed to the end.
426    /// Use [`SortedDataFrame::topn`] to efficiently extract the first/last N rows.
427    pub fn sorted(&self, key: &Key) -> Result<SortedDataFrame<'_>, Error> {
428        self.dataframe.sorted(key)
429    }
430
431    /// Returns a new dataframe containing only rows that satisfy the filter expression.
432    ///
433    /// Filter expressions are parsed from strings — see [`FilterRules`](crate::filter::FilterRules)
434    /// for the supported grammar (comparison, regex, set membership, logical combinators).
435    ///
436    /// Constants and metadata are cloned into the result.
437    pub fn filter(&self, filter: &crate::filter::FilterRules) -> Result<Self, Error> {
438        let filtered_df = self.dataframe.filter(filter)?;
439        Ok(Self {
440            constants: self.constants.clone(),
441            dataframe: filtered_df,
442            metadata: self.metadata.clone(),
443        })
444    }
445
446    /// Converts this dataframe into a Polars [`DataFrame`](polars::prelude::DataFrame).
447    ///
448    /// Each column is mapped to its Polars equivalent via [`into_polars_value`].
449    /// Requires the `polars-df` feature.
450    #[cfg(feature = "polars-df")]
451    #[allow(deprecated)]
452    pub fn as_polars(&self) -> Result<polars::prelude::DataFrame, Error> {
453        let mut columns = vec![];
454        for key in self.dataframe.keys() {
455            let values = self
456                .dataframe
457                .get_single_column(key)
458                .ok_or_else(|| Error::NotFound(key.clone()))?
459                .into_iter()
460                .map(|x| into_polars_value(key, x.clone()))
461                .collect::<Vec<_>>();
462            let s = polars::prelude::Column::new(key.name().into(), values);
463
464            columns.push(s);
465        }
466
467        Ok(polars::prelude::DataFrame::new(columns)?)
468    }
469
470    /// Deserializes a dataframe from MessagePack bytes.
471    ///
472    /// This is the inverse of [`store_into_messagepack`](Self::store_into_messagepack)
473    /// and is useful for compact binary serialization in IPC or storage scenarios.
474    pub fn load_from_messagepack(bytes: &[u8]) -> Result<Self, Error> {
475        rmp_serde::decode::from_slice(bytes).map_err(|e| Error::UnknownError(format!("{e:?}")))
476    }
477
478    /// Serializes this dataframe into MessagePack bytes.
479    ///
480    /// The resulting bytes can be deserialized back with
481    /// [`load_from_messagepack`](Self::load_from_messagepack).
482    pub fn store_into_messagepack(&self) -> Result<Vec<u8>, Error> {
483        rmp_serde::encode::to_vec(&self).map_err(|e| Error::UnknownError(format!("{e:?}")))
484    }
485}
486
487/// Converts a [`DataType`](crate::DataType) to its Polars equivalent.
488///
489/// Requires the `polars-df` feature.
490#[cfg(feature = "polars-df")]
491pub fn polars_dtype(dtype: crate::DataType) -> polars::prelude::DataType {
492    use crate::DataType::*;
493    use polars::prelude::DataType::*;
494    match dtype {
495        Bool => Boolean,
496        U32 => UInt32,
497        I32 => Int32,
498        U8 => UInt8,
499        U64 => UInt64,
500        I64 => Int64,
501        F32 => Float32,
502        F64 => Float64,
503        U128 => UInt128,
504        I128 => Int128,
505        crate::DataType::String => polars::prelude::DataType::String,
506        Bytes => Binary,
507        crate::DataType::Unknown => Null,
508        Vec => List(Box::new(polars::prelude::DataType::Unknown(
509            polars::prelude::UnknownKind::Any,
510        ))),
511        Map => Struct(vec![]),
512    }
513}
514
515/// Converts a [`DataValue`] into a Polars [`AnyValue`](polars::prelude::AnyValue),
516/// applying type coercion based on the column's [`Key`] dtype.
517///
518/// Requires the `polars-df` feature.
519#[cfg(feature = "polars-df")]
520pub fn into_polars_value(key: &Key, dv: DataValue) -> polars::prelude::AnyValue<'static> {
521    use polars::prelude::AnyValue::*;
522    use polars::prelude::Field;
523
524    use crate::dataframe::column_store::convert_dv_to_dtype;
525    let dv = convert_dv_to_dtype(key, dv);
526    match dv {
527        DataValue::String(smart_string) => StringOwned(smart_string.as_str().into()),
528        DataValue::Bytes(items) => BinaryOwned(items),
529        DataValue::U8(x) => UInt32(x as _),
530        DataValue::Bool(x) => Boolean(x),
531        DataValue::I32(x) => Int32(x),
532        DataValue::U32(x) => UInt32(x),
533        DataValue::I64(x) => Int64(x),
534        DataValue::U64(x) => UInt64(x),
535        DataValue::I128(x) => Int128(x),
536        DataValue::F32(x) => Float32(x),
537        DataValue::F64(x) => Float64(x),
538        DataValue::Null => Null,
539        DataValue::Vec(data_values) => {
540            let mut dt = crate::DataType::Unknown;
541            for d in data_values.iter() {
542                match crate::detect_dtype(d) {
543                    crate::DataType::Unknown => continue,
544                    e => {
545                        dt = e;
546                        break;
547                    }
548                }
549            }
550            let vec_key = Key::new(key.name(), dt);
551            let s = polars::series::Series::from_any_values(
552                key.name().into(),
553                &data_values
554                    .into_iter()
555                    .map(|x| into_polars_value(&vec_key, x))
556                    .collect::<Vec<_>>(),
557                true,
558            );
559            List(s.expect(&format!("Cannot create series for {key:?}")))
560        }
561        DataValue::EnumNumber(x) => Int32(x),
562        DataValue::U128(x) => UInt128(x),
563        DataValue::Map(x) => {
564            let mut values = vec![];
565            let mut fields = vec![];
566            let mut sorted_keys = x.keys().collect::<Vec<_>>();
567            sorted_keys.sort();
568            for k in sorted_keys {
569                let value = x.get(k).expect(&format!("Key {key:?} should exists in hm"));
570                let dtype = crate::detect_dtype(value);
571                let k = Key::new(k, dtype);
572                values.push(into_polars_value(&k, value.to_owned()));
573                fields.push(Field::new(k.name().into(), polars_dtype(dtype)));
574            }
575            StructOwned(Box::new((values, fields)))
576        }
577    }
578}
579
580/// Converts a Polars [`AnyValue`](polars::prelude::AnyValue) back into a [`DataValue`].
581///
582/// Requires the `polars-df` feature.
583#[cfg(feature = "polars-df")]
584pub fn from_polars_value(dv: polars::prelude::AnyValue<'_>) -> DataValue {
585    use polars::prelude::AnyValue::*;
586    match dv {
587        Null => DataValue::Null,
588        Boolean(v) => v.into(),
589        String(v) => DataValue::String(v.into()),
590        UInt8(v) => DataValue::U8(v),
591        UInt16(v) => DataValue::U32(v as u32),
592        UInt32(v) => v.into(),
593        UInt64(v) => v.into(),
594        Int8(v) => (v as i32).into(),
595        Int16(v) => (v as i32).into(),
596        Int32(v) => v.into(),
597        Int64(v) => v.into(),
598        Float32(v) => v.into(),
599        Float64(v) => v.into(),
600        Int128(v) => v.into(),
601        List(series) => DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>()),
602        // Array(series, _) => {
603        //     DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>())
604        // }
605        StringOwned(v) => DataValue::String(v.as_str().into()),
606        Binary(v) => DataValue::Bytes(v.to_owned()),
607        BinaryOwned(v) => DataValue::Bytes(v),
608        StructOwned(m) => {
609            let mut hm: std::collections::HashMap<smartstring::alias::String, DataValue> =
610                std::collections::HashMap::new();
611            for (k, v) in m.1.into_iter().zip(m.0.into_iter()) {
612                hm.insert(k.name.as_str().into(), from_polars_value(v));
613            }
614            DataValue::Map(hm)
615        }
616        e => {
617            tracing::warn!("Unsupported polars value: {e:?}");
618            DataValue::Null
619        }
620    }
621}
622
623impl From<ColumnFrame> for DataFrame {
624    fn from(dataframe: ColumnFrame) -> Self {
625        Self::new(dataframe)
626    }
627}
628
629impl From<Vec<std::collections::HashMap<Key, DataValue>>> for DataFrame {
630    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
631        Self::new(ColumnFrame::from(dataframe))
632    }
633}
634
635impl From<Vec<HashMap<Key, DataValue>>> for DataFrame {
636    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
637        Self::new(ColumnFrame::from(dataframe))
638    }
639}
640
641impl From<std::collections::HashMap<String, Vec<DataValue>>> for DataFrame {
642    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
643        Self::new(ColumnFrame::from(dataframe))
644    }
645}
646
647impl From<MLChefMap> for DataFrame {
648    fn from(dataframe: MLChefMap) -> Self {
649        Self::new(ColumnFrame::from(dataframe))
650    }
651}
652impl From<Vec<(Key, Vec<DataValue>)>> for DataFrame {
653    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
654        Self::new(ColumnFrame::from(dataframe))
655    }
656}
657
658impl From<std::collections::HashMap<String, Array1<DataValue>>> for DataFrame {
659    fn from(dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
660        Self::new(ColumnFrame::from(dataframe))
661    }
662}
663
664#[cfg(feature = "polars-df")]
665impl From<polars::prelude::DataFrame> for DataFrame {
666    fn from(dataframe: polars::prelude::DataFrame) -> Self {
667        Self::new(ColumnFrame::from(dataframe))
668    }
669}
670#[cfg(test)]
671#[allow(deprecated)]
672mod test {
673    use crate::filter::FilterRules;
674
675    use super::*;
676    use halfbrown::hashmap;
677    #[cfg(feature = "polars-df")]
678    use polars::prelude::NamedFrom as _;
679    use rstest::*;
680    use tracing_test::traced_test;
681    #[fixture]
682    fn dummy_candidates() -> ColumnFrame {
683        ColumnFrame::from(vec![
684            hashmap! {
685                "key1".into() => 1.into(),
686                "key2".into() => "a".into(),
687            },
688            hashmap! {
689                "key1".into() => 2.into(),
690                "key2".into() => "b".into(),
691            },
692        ])
693    }
694
695    #[rstest]
696    fn test_serde() {
697        let df = crate::df! {
698            "a" => [1u64, 2u64, 3u64],
699            "b" => [4u64, 5u64, 6u64],
700            "c" => [7u64, 8u64, 9u64]
701        };
702
703        let serialized = serde_json::to_string(&df).expect("BUG: Unable to serialize dataframe");
704
705        let deserialized =
706            serde_json::from_str(&serialized).expect("BUG: Unable to deserialize dataframe");
707
708        assert_eq!(df, deserialized);
709    }
710
711    #[cfg(feature = "polars-df")]
712    #[rstest]
713    fn test_polars() {
714        let expected = crate::df! {
715            "a" => [1u64, 2u64, 3u64],
716            "b" => [4f64, 5f64, 6f64],
717            "c" => [7i64, 8i64, 9i64]
718        };
719
720        let polars_df = polars::df!(
721            "a" => [1u64, 2u64, 3u64],
722            "b" => [4f64, 5f64, 6f64],
723            "c" => [7i64, 8i64, 9i64]
724        )
725        .expect("BUG: should be ok");
726        let as_df: DataFrame = polars_df.into();
727        let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into()];
728        assert_eq!(
729            as_df.select(Some(keys.as_slice())),
730            expected.select(Some(keys.as_slice()))
731        );
732    }
733    #[cfg(feature = "polars-df")]
734    use crate::DataType;
735    #[cfg(feature = "polars-df")]
736    #[rstest]
737    #[case::str(Key::new("a", DataType::String), DataValue::String("test".into()), polars::prelude::AnyValue::String("test".into()))]
738    #[case::u32(
739        Key::new("a", DataType::U32),
740        DataValue::U32(u32::MAX),
741        polars::prelude::AnyValue::UInt32(u32::MAX)
742    )]
743    #[case::i32(
744        Key::new("a", DataType::I32),
745        DataValue::I32(i32::MIN),
746        polars::prelude::AnyValue::Int32(i32::MIN)
747    )]
748    #[case::i64(
749        Key::new("a", DataType::I64),
750        DataValue::I64(i64::MIN),
751        polars::prelude::AnyValue::Int64(i64::MIN)
752    )]
753    #[case::u64(
754        Key::new("a", DataType::U64),
755        DataValue::U64(u64::MIN),
756        polars::prelude::AnyValue::UInt64(u64::MIN)
757    )]
758    #[case::f32(
759        Key::new("a", DataType::F32),
760        DataValue::F32(f32::MIN),
761        polars::prelude::AnyValue::Float32(f32::MIN)
762    )]
763    #[case::f64(
764        Key::new("a", DataType::F64),
765        DataValue::F64(f64::MIN),
766        polars::prelude::AnyValue::Float64(f64::MIN)
767    )]
768    #[case::null(
769        Key::new("a", DataType::Unknown),
770        DataValue::Null,
771        polars::prelude::AnyValue::Null
772    )]
773    #[case::i128(
774        Key::new("a", DataType::I128),
775        DataValue::I128(i128::MIN),
776        polars::prelude::AnyValue::Int128(i128::MIN)
777    )]
778    #[case::u8(
779        Key::new("a", DataType::U8),
780        DataValue::U8(255),
781        polars::prelude::AnyValue::UInt8(255)
782    )]
783    #[case::bool(
784        Key::new("a", DataType::Bool),
785        DataValue::Bool(true),
786        polars::prelude::AnyValue::Boolean(true)
787    )]
788    #[case::bytes(Key::new("a", DataType::Bytes), DataValue::Bytes("aaaaa".as_bytes().to_vec()), polars::prelude::AnyValue::BinaryOwned("aaaaa".as_bytes().to_vec()))]
789    #[case::vec_uints(Key::new("a", DataType::Vec), DataValue::Vec(vec![DataValue::U32(0), DataValue::U32(1)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::UInt32(1)])))]
790    #[case::map(Key::new("a", DataType::Map), DataValue::Map(data_value::stdhashmap!("a" => 0u64, "b" => "s")), polars::prelude::AnyValue::StructOwned(Box::new((
791        vec![polars::prelude::AnyValue::UInt64(0u64), polars::prelude::AnyValue::String("s".into())],
792        vec![polars::prelude::Field::new("a".into(), polars::prelude::DataType::UInt64), polars::prelude::Field::new("b".into(), polars::prelude::DataType::String)]))))]
793    // polars converts all by first element type
794    // #[case::vec_diff_int(DataValue::Vec(vec![ DataValue::I32(1), DataValue::U32(0)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::Int32(1i32), polars::prelude::AnyValue::UInt32(0u32)])))]
795    //#[case::vec_int_str(DataValue::Vec(vec![DataValue::U32(0), DataValue::String("1".into())]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::StringOwned("1".into())])))]
796    fn into_polars_value_test(
797        #[case] key: Key,
798        #[case] input: DataValue,
799        #[case] output: polars::prelude::AnyValue<'static>,
800    ) {
801        assert_eq!(into_polars_value(&key, input.clone()), output);
802        assert_eq!(from_polars_value(output), input);
803    }
804
805    // #[cfg(feature = "polars-df")]
806    // #[rstest]
807    // fn as_polars() {
808    //     let state = include_bytes!("../part_00330.dfb");
809    //     let df: Result<DataFrame, _> = rmp_serde::decode::from_slice(state);
810    //     assert!(df.is_ok());
811    //     let df = df.unwrap();
812    //     println!("{df}");
813    //     let polars_df = df.as_polars();
814    //     assert!(polars_df.is_ok(), "{polars_df:?}");
815    // }
816    #[rstest]
817    #[case(
818        DataFrame::new(crate::column_frame! {
819            "a" => [1f64, 2f64, 3f64],
820            "b" => [4i64, 5i64, 6i64],
821            "c" => [7i64, 8i64, 9i64]
822        }),
823        DataFrame::new(crate::column_frame! {
824            "a" => [1f64, 2f64],
825            "b" => [4i64, 5i64],
826            "c" => [7i64, 8i64]
827        }),
828        FilterRules::try_from("a >= 1f64 && (b <= 5 || c <= 8) && b >= 4").expect("BUG: cannot create filter rules"),
829    )]
830    #[case(
831        DataFrame::new(crate::column_frame! {
832            "a" => [1f64, 2f64, 3f64],
833            "b" => [4i64, 5i64, 6i64],
834            "c" => [7i64, 8i64, 9i64]
835        }),
836        DataFrame::new(crate::column_frame! {
837            "a" => [2f64],
838            "b" => [5i64],
839            "c" => [8i64]
840        }),
841        FilterRules::try_from("a % 2f64 == 0f64").expect("BUG: cannot create filter rules"),
842    )]
843    #[traced_test]
844    fn filter_test(
845        #[case] df: DataFrame,
846        #[case] expected: DataFrame,
847        #[case] filter: FilterRules,
848    ) {
849        let filtered = df.filter(&filter).expect("BUG: cannot filter");
850        assert_eq!(filtered, expected);
851    }
852
853    #[rstest]
854    fn test_serde_complex() {
855        let simple = r#"
856{
857    "constants": {},
858    "dataframe": {
859        "index": {
860            "keys": [
861                {
862                    "key": 3162770485,
863                    "name": "a",
864                    "ctype": "U32"
865                },
866                {
867                    "key": 2279056742,
868                    "name": "b",
869                    "ctype": "F64"
870                },
871                {
872                    "key": 2994984227,
873                    "name": "c",
874                    "ctype": "U64"
875                },
876                {
877                    "key": 3319645144,
878                    "name": "d",
879                    "ctype": "F64"
880                },
881                {
882                    "key": 1291847470,
883                    "name": "e",
884                    "ctype": "U32"
885                },
886                {
887                    "key": 874241070,
888                    "name": "f",
889                    "ctype": "Bool"
890                }
891            ],
892            "indexes": {
893                "a": 0,
894                "b": 1,
895                "c": 2,
896                "d": 3,
897                "e": 4,
898                "f": 5
899            },
900            "alias": {}
901        },
902        "data_frame": {
903            "v": 1,
904            "dim": [
905                2,
906                6
907            ],
908            "data": [
909                253780,
910                0.009369421750307085,
911                1633222860381359,
912                8,
913                5,
914                true,
915                64512,
916                0.003391335718333721,
917                1633222860810557,
918                8,
919                5,
920                null
921            ]
922        }
923    },
924    "metadata": {}
925}
926        "#;
927
928        let simple_deserialized: DataFrame =
929            serde_json::from_str(simple).expect("BUG: Unable to deserialize dataframe");
930
931        println!("deserialized: {simple_deserialized:?}");
932        let array = format!("[{}, {}, {}]", simple, simple, simple);
933        let deserialized: Vec<DataFrame> =
934            serde_json::from_str(&array).expect("BUG: Unable to deserialize dataframe");
935
936        println!("deserialized: {deserialized:?}");
937        assert_eq!(deserialized.len(), 3);
938        assert_eq!(simple_deserialized, deserialized[0]);
939    }
940
941    #[rstest]
942    #[case(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
943    #[case(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
944    #[case(vec![hashmap! {
945        "key1".into() => 1.into(),
946        "key2".into() => "a".into(),
947    },
948    hashmap! {
949        "key1".into() => 2.into(),
950    },])]
951    #[case(vec![data_value::stdhashmap! {
952        "key1" => DataValue::from(1),
953        "key2" => DataValue::from("a"),
954    },data_value::stdhashmap! {
955        "key1" => DataValue::from(2),
956    },])]
957    #[case(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(),
958    vec![DataValue::from("a"), DataValue::Null])])]
959    fn test_select_column<T: Into<DataFrame>>(#[case] input: T) {
960        let df: DataFrame = input.into();
961        assert_eq!(
962            df,
963            DataFrame {
964                constants: HashMap::new(),
965                dataframe: ColumnFrame::from(vec![
966                    hashmap! {
967                        "key1".into() => 1.into(),
968                        "key2".into() => "a".into(),
969                    },
970                    hashmap! {
971                        "key1".into() => 2.into(),
972                    },
973                ]),
974                metadata: HashMap::new(),
975            }
976        );
977        let selected_transposed = df.select_column("key1".into());
978        assert!(selected_transposed.is_some());
979        let selected_transposed = selected_transposed.unwrap();
980        assert_eq!(selected_transposed.len(), 2);
981        assert_eq!(selected_transposed, ndarray::array![1.into(), 2.into()]);
982    }
983
984    #[rstest]
985    #[case::hhm(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
986    #[case::stdhm(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
987    #[case::hm({
988        let hm: std::collections::HashMap<String, Array1<DataValue>> = data_value::stdhashmap!("key1".to_string() => Array1::from_vec(vec![DataValue::from(1), DataValue::from(2)]), "key2".to_string() => Array1::from_vec(vec![DataValue::from("a"), DataValue::Null]));
989        hm
990    })]
991    #[case::vec_hhm(vec![hashmap! {
992        "key1".into() => 1.into(),
993        "key2".into() => "a".into(),
994    },
995    hashmap! {
996        "key1".into() => 2.into(),
997    },])]
998    #[case::vec_hme(vec![data_value::stdhashmap! {
999        "key1" => DataValue::from(1),
1000        "key2" => DataValue::from("a"),
1001    },data_value::stdhashmap! {
1002        "key1" => DataValue::from(2),
1003    },])]
1004    #[case::vec_vec(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(), vec![DataValue::from("a"), DataValue::Null])])]
1005    fn test_from_conversion<T: Into<DataFrame>>(#[case] input: T) {
1006        let df: DataFrame = input.into();
1007        let expected: DataFrame = DataFrame {
1008            constants: HashMap::new(),
1009            dataframe: ColumnFrame::from(vec![
1010                hashmap! {
1011                    "key1".into() => 1.into(),
1012                    "key2".into() => "a".into(),
1013                },
1014                hashmap! {
1015                    "key1".into() => 2.into(),
1016                },
1017            ]),
1018            metadata: HashMap::new(),
1019        };
1020        assert_eq!(
1021            df.select(Some(&["key1".into(), "key2".into()])),
1022            expected.select(Some(&["key1".into(), "key2".into()])),
1023            "{df} vs {expected}"
1024        );
1025        let selected_transposed = df.select_transposed_typed::<i32>(&["key1".into()]);
1026        assert_eq!(selected_transposed.len(), 2);
1027        println!("{:?}", selected_transposed);
1028        assert_eq!(selected_transposed, vec![vec![1], vec![2]]);
1029    }
1030    #[rstest]
1031    fn test_dataframe(dummy_candidates: ColumnFrame) {
1032        let mut dataframe: DataFrame = DataFrame::default();
1033        assert!(dataframe.is_empty());
1034        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
1035        assert_eq!(dataframe.len(), 2);
1036
1037        let candidate = hashmap! {
1038            "key1".into() => 3.into(),
1039            "key2".into() => "c".into(),
1040        };
1041
1042        assert!(dataframe.push(candidate).is_ok());
1043        assert_eq!(dataframe.len(), 3);
1044        assert!(!dataframe.is_empty());
1045
1046        dataframe.insert_constant("key3".into(), 4.into());
1047        assert_eq!(dataframe.constants.len(), 1);
1048        assert!(dataframe
1049            .apply_function(&["key1".into()], |keys, df| {
1050                let key = keys[0].clone();
1051                let s = df
1052                    .get_single_column(&key)
1053                    .expect("BUG: Cannot get column")
1054                    .to_owned();
1055                let s = s.mapv(|x| x + DataValue::from(1));
1056                df.add_single_column("key5", s)?;
1057                Ok(())
1058            })
1059            .is_ok());
1060        let original = dataframe.clone();
1061        dataframe.shrink();
1062        let remove_df = dataframe.remove_column(&["key1".into()]);
1063        assert!(remove_df.is_ok());
1064        let mut remove_df = remove_df.unwrap();
1065        assert_eq!(remove_df.len(), 3);
1066        let selected = dataframe.select(Some(&["key2".into()]));
1067        assert!(selected.is_ok());
1068        let selected = selected.unwrap();
1069        println!("{:?}", selected);
1070
1071        // fixme later
1072        let joined_result =
1073            remove_df.join(dataframe, &JoinRelation::new(crate::JoinBy::AddColumns));
1074        assert!(joined_result.is_ok(), "{:?}", joined_result);
1075        let keys = vec!["key1".into(), "key2".into(), "key5".into()];
1076        assert_eq!(
1077            original.select(Some(keys.as_slice())),
1078            remove_df.select(Some(keys.as_slice()))
1079        );
1080    }
1081
1082    #[rstest]
1083    fn test_size_methods() {
1084        let candidate = hashmap! {
1085            "key1".into() => 3.into(),
1086            "key2".into() => "c".into(),
1087            "key3".into() => false.into()
1088        };
1089
1090        let dataframe: DataFrame = vec![candidate].into();
1091
1092        assert_eq!(dataframe.n_columns(), 3);
1093        assert_eq!(dataframe.n_rows(), 1);
1094    }
1095
1096    #[rstest]
1097    fn test_metadata(dummy_candidates: ColumnFrame) {
1098        let mut dataframe: DataFrame = DataFrame::default();
1099        assert!(dataframe.is_empty());
1100        println!("{:?}", dataframe);
1101        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
1102        println!("{:?}", dataframe);
1103        assert_eq!(dataframe.len(), 2);
1104
1105        dataframe.add_metadata("test".into(), 1.into());
1106        assert_eq!(dataframe.get_metadata("test"), Some(&1.into()));
1107        let dataframe = DataFrame::new(ColumnFrame::from(vec![
1108            hashmap! {
1109                "key1".into() => 1.into(),
1110                "key2".into() => "a".into(),
1111            },
1112            hashmap! {
1113                "key1".into() => 2.into(),
1114                "key2".into() => "b".into(),
1115            },
1116        ]));
1117        assert_eq!(dataframe.get_metadata("test"), None);
1118        let tt = dataframe.select_transposed(None);
1119        assert!(tt.is_ok());
1120        let tt = tt.unwrap();
1121        assert_eq!(tt.shape(), [2, 2]);
1122        assert_eq!(
1123            tt,
1124            Array2::from_shape_vec((2, 2), vec![1.into(), 2.into(), "a".into(), "b".into()])
1125                .unwrap()
1126        );
1127    }
1128
1129    #[rstest]
1130    #[traced_test]
1131    fn add_single_column_test() {
1132        let mut dataframe = DataFrame::default();
1133        let values: Array1<DataValue> = Array1::from_vec(vec![1.into(), 2.into(), 3.into()]);
1134        let r = dataframe.add_single_column("key1", values);
1135        assert!(r.is_ok(), "{r:?}");
1136        let selected = dataframe.select(None);
1137        assert!(selected.is_ok());
1138        let selected = selected.unwrap();
1139        assert_eq!(selected.shape(), [3, 1]);
1140        assert_eq!(
1141            selected,
1142            Array2::from_shape_vec((3, 1), vec![1.into(), 2.into(), 3.into()]).unwrap()
1143        );
1144        let values: Array1<i32> = Array1::from_vec(vec![1, 2]);
1145        assert!(dataframe.add_single_column("key1", values).is_err());
1146        let values: Vec<i32> = vec![3i32, 4, 5];
1147        assert!(dataframe.add_single_column("key2", values).is_ok());
1148        let values: Array1<i32> = Array1::from_vec(vec![3i32]);
1149        assert!(dataframe.add_single_column("key3", values).is_err());
1150    }
1151
1152    #[rstest]
1153    #[traced_test]
1154    fn add_single_column_empty_test() {
1155        let mut dataframe = DataFrame::default();
1156        let values: Array1<DataValue> = Array1::from(vec![]);
1157        let r = dataframe.add_single_column("key1", values);
1158        assert!(r.is_ok(), "{r:?}");
1159        let selected = dataframe.select(None);
1160        assert!(selected.is_ok());
1161        let selected = selected.unwrap();
1162        assert_eq!(selected.shape(), [0, 1]);
1163        assert_eq!(selected, Array2::from_shape_vec((0, 1), vec![]).unwrap());
1164        let values: Array1<DataValue> = Array1::from(vec![1.into(), 2.into()]);
1165        assert!(dataframe.add_single_column("key1", values).is_err());
1166        let values: Array1<DataValue> = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1167        assert!(dataframe.add_single_column("key2", values).is_ok());
1168        let values: Array1<DataValue> = Array1::from(vec![3.into(), 4.into()]);
1169        assert!(dataframe.add_single_column("key3", values).is_err());
1170        let values: Array1<DataValue> = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1171        assert!(dataframe.add_single_column("key3", values).is_ok());
1172
1173        assert_eq!(
1174            dataframe
1175                .select_column("key1".into())
1176                .expect("BUG: has to exists"),
1177            ndarray::arr1(&[DataValue::Null, DataValue::Null, DataValue::Null]),
1178        );
1179        assert_eq!(
1180            dataframe
1181                .select_column("key2".into())
1182                .expect("BUG: has to exists"),
1183            ndarray::arr1(&[3.into(), 4.into(), 5.into()]),
1184        );
1185        assert_eq!(
1186            dataframe.select(None).expect("BUG: cannot get data"),
1187            ndarray::arr2(&[
1188                [DataValue::Null, 3.into(), 3.into()],
1189                [DataValue::Null, 4.into(), 4.into()],
1190                [DataValue::Null, 5.into(), 5.into()],
1191            ])
1192        );
1193    }
1194
1195    #[rstest]
1196    #[case(
1197        DataFrame::new(ColumnFrame::from(vec![
1198            hashmap! {
1199                "k".into() => 1.into(),
1200                "k2".into() => 2.into(),
1201                "k3".into() => 2.2.into(),
1202            },
1203            hashmap! {
1204                "k".into() => 11.into(),
1205                "k2".into() => 3.into(),
1206            },
1207            hashmap! {
1208                "k".into() => 4.into(),
1209                "k2".into() => 5.into(),
1210                "k3".into() => 2.3.into(),
1211            },
1212            hashmap! {
1213                "k".into() => 4.into(),
1214                "k2".into() => 5.into(),
1215                "k3".into() => 2.4.into(),
1216            },
1217        ])),
1218        vec!["k".into(), "k2".into()],
1219        Array2::from_shape_vec((4, 2), vec![1.into(), 2.into(), 11.into(), 3.into(), 4.into(), 5.into(), 4.into(), 5.into()]).unwrap()
1220    )]
1221    #[case(
1222        DataFrame::new(ColumnFrame::from(vec![
1223            hashmap! {
1224                "k".into() => 1.into(),
1225                "k2".into() => 2.into(),
1226                "k3".into() => 2.2.into(),
1227            },
1228            hashmap! {
1229                "k".into() => 11.into(),
1230                "k2".into() => 3.into(),
1231            },
1232            hashmap! {
1233                "k".into() => 4.into(),
1234                "k2".into() => 5.into(),
1235                "k3".into() => 2.3.into(),
1236            },
1237            hashmap! {
1238                "k".into() => 4.into(),
1239                "k2".into() => 5.into(),
1240                "k3".into() => 2.4.into(),
1241            },
1242        ])),
1243        vec!["k2".into(), "k3".into(), "nonexist1".into(), "nonexists2".into(), "k".into()],
1244        Array2::from_shape_vec((4, 5), vec![
1245            2.into(), 2.2.into(), DataValue::Null, DataValue::Null, 1.into(),
1246            3.into(), DataValue::Null, DataValue::Null, DataValue::Null, 11.into(),
1247            5.into(), 2.3.into(),  DataValue::Null, DataValue::Null, 4.into(),
1248            5.into(), 2.4.into(), DataValue::Null, DataValue::Null, 4.into()]).unwrap()
1249    )]
1250    #[traced_test]
1251    fn select_multiple(
1252        #[case] input: DataFrame,
1253        #[case] columns: Vec<Key>,
1254        #[case] expected: Array2<DataValue>,
1255    ) {
1256        let selected = input.select(Some(&columns));
1257        assert!(selected.is_ok());
1258        let selected = selected.unwrap();
1259
1260        assert_eq!(selected, expected);
1261    }
1262
1263    #[rstest]
1264    #[case(
1265        DataFrame::new(ColumnFrame::from(vec![
1266            hashmap! {
1267                "k".into() => 1.into(),
1268                "k2".into() => 2.into(),
1269                "k3".into() => 2.2.into(),
1270            },
1271            hashmap! {
1272                "k".into() => 11.into(),
1273                "k2".into() => 3.into(),
1274            },
1275            hashmap! {
1276                "k".into() => 4.into(),
1277                "k2".into() => 5.into(),
1278                "k3".into() => 2.3.into(),
1279            },
1280            hashmap! {
1281                "k".into() => 4.into(),
1282                "k2".into() => 5.into(),
1283                "k3".into() => 2.4.into(),
1284            },
1285        ])),
1286        "k".into(),
1287        Array2::from_shape_vec((4, 3), vec![
1288            1.into(), 2.into(), 2.2.into(),
1289            4.into(), 5.into(), 2.3.into(),
1290            4.into(), 5.into(), 2.4.into(),
1291            11.into(), 3.into(), DataValue::Null,
1292            ]
1293        ).unwrap(),
1294        vec!["k".into(), "k2".into(), "k3".into()],
1295    )]
1296    #[rstest]
1297    #[case(
1298        DataFrame::new(ColumnFrame::from(vec![
1299            hashmap! {
1300                "k".into() => 1.into(),
1301                "k2".into() => 2.into(),
1302                "k3".into() => 2.2.into(),
1303            },
1304            hashmap! {
1305                "k".into() => 11.into(),
1306                "k2".into() => 3.into(),
1307            },
1308            hashmap! {
1309                "k".into() => 4.into(),
1310                "k2".into() => 5.into(),
1311                "k3".into() => 2.3.into(),
1312            },
1313            hashmap! {
1314                "k".into() => 4.into(),
1315                "k2".into() => 5.into(),
1316                "k3".into() => 2.4.into(),
1317            },
1318        ])),
1319        "k3".into(),
1320        Array2::from_shape_vec((4, 3), vec![
1321            11.into(), 3.into(), DataValue::Null,
1322            1.into(), 2.into(), 2.2.into(),
1323            4.into(), 5.into(), 2.3.into(),
1324            4.into(), 5.into(), 2.4.into(),
1325            ]
1326        ).unwrap(),
1327        vec!["k".into(), "k2".into(), "k3".into()],
1328    )]
1329    #[case(
1330        DataFrame::new(ColumnFrame::from(vec![
1331            hashmap! {
1332                "k".into() => 2.into(),
1333                "k2".into() => 0.000001.into(),
1334            },
1335            hashmap! {
1336                "k".into() => 1.into(),
1337                "k2".into() =>0.0000001.into(),
1338            },
1339            hashmap! {
1340                "k".into() => 3.into(),
1341                "k2".into() => 0.00001.into(),
1342            },
1343            hashmap! {
1344                "k".into() => 4.into(),
1345                "k2".into() => 0.001.into(),
1346            },
1347        ])),
1348        "k2".into(),
1349        Array2::from_shape_vec((4, 2), vec![
1350            1.into(), 0.0000001.into(),
1351            2.into(), 0.000001.into(),
1352            3.into(), 0.00001.into(),
1353            4.into(), 0.001.into(),
1354            ]
1355        ).unwrap(),
1356        vec!["k".into(), "k2".into()],
1357    )]
1358    #[case(
1359        DataFrame::new(ColumnFrame::from(vec![
1360            hashmap! {
1361                "k".into() => 2.into(),
1362                "k2".into() => "b".into(),
1363            },
1364            hashmap! {
1365                "k".into() => 1.into(),
1366                "k2".into() =>"a".into(),
1367            },
1368            hashmap! {
1369                "k".into() => 3.into(),
1370                "k2".into() =>"c".into(),
1371            },
1372            hashmap! {
1373                "k".into() => 4.into(),
1374                "k2".into() =>"z".into(),
1375            },
1376        ])),
1377        "k2".into(),
1378        Array2::from_shape_vec((4, 2), vec![
1379            1.into(),"a".into(),
1380            2.into(), "b".into(),
1381            3.into(), "c".into(),
1382            4.into(), "z".into(),
1383            ]
1384        ).unwrap(),
1385        vec!["k".into(), "k2".into()],
1386    )]
1387    #[traced_test]
1388    fn sort_by(
1389        #[case] input: DataFrame,
1390        #[case] column: Key,
1391        #[case] expected: Array2<DataValue>,
1392        #[case] columns: Vec<Key>,
1393    ) {
1394        let result = input.sorted(&column);
1395        assert!(result.is_ok(), "{result:?}");
1396        let result = result.unwrap().get_sorted();
1397        let selected = result.select(Some(&columns));
1398
1399        assert_eq!(selected, expected);
1400    }
1401    #[rstest]
1402    #[case(
1403        DataFrame::new(ColumnFrame::from(vec![
1404            hashmap! {
1405                "k".into() => 2.into(),
1406                "k2".into() => 0.000001.into(),
1407            },
1408            hashmap! {
1409                "k".into() => 1.into(),
1410                "k2".into() =>0.0000001.into(),
1411            },
1412            hashmap! {
1413                "k".into() => 3.into(),
1414                "k2".into() => 0.00001.into(),
1415            },
1416            hashmap! {
1417                "k".into() => 4.into(),
1418                "k2".into() => 0.001.into(),
1419            },
1420        ])),
1421        "k2".into(),
1422        TopN::Last(1),
1423        Array2::from_shape_vec((1, 2), vec![
1424            4.into(), 0.001.into(),
1425            ]
1426        ).unwrap(),
1427        vec!["k".into(), "k2".into()],
1428    )]
1429    #[case(
1430        DataFrame::new(ColumnFrame::from(vec![
1431            hashmap! {
1432                "k".into() => 2.into(),
1433                "k2".into() => 0.000001.into(),
1434            },
1435            hashmap! {
1436                "k".into() => 1.into(),
1437                "k2".into() =>0.0000001.into(),
1438            },
1439            hashmap! {
1440                "k".into() => 3.into(),
1441                "k2".into() => 0.00001.into(),
1442            },
1443            hashmap! {
1444                "k".into() => 4.into(),
1445                "k2".into() => 0.001.into(),
1446            },
1447        ])),
1448        "k2".into(),
1449        TopN::Last(2),
1450        Array2::from_shape_vec((2, 2), vec![
1451            4.into(), 0.001.into(),
1452            3.into(), 0.00001.into(),
1453            ]
1454        ).unwrap(),
1455        vec!["k".into(), "k2".into()],
1456    )]
1457    #[case(
1458        DataFrame::new(ColumnFrame::from(vec![
1459            hashmap! {
1460                "k".into() => 2.into(),
1461                "k2".into() => "b".into(),
1462            },
1463            hashmap! {
1464                "k".into() => 1.into(),
1465                "k2".into() =>"a".into(),
1466            },
1467            hashmap! {
1468                "k".into() => 3.into(),
1469                "k2".into() =>"c".into(),
1470            },
1471            hashmap! {
1472                "k".into() => 4.into(),
1473                "k2".into() =>"z".into(),
1474            },
1475        ])),
1476        "k2".into(),
1477        TopN::First(1),
1478        Array2::from_shape_vec((1, 2), vec![
1479            1.into(),"a".into(),
1480            ]
1481        ).unwrap(),
1482        vec!["k".into(), "k2".into()],
1483    )]
1484    #[case(
1485        DataFrame::new(ColumnFrame::from(vec![
1486            hashmap! {
1487                "k".into() => 2.into(),
1488                "k2".into() => "b".into(),
1489            },
1490            hashmap! {
1491                "k".into() => 1.into(),
1492                "k2".into() =>"a".into(),
1493            },
1494            hashmap! {
1495                "k".into() => 3.into(),
1496                "k2".into() =>"c".into(),
1497            },
1498            hashmap! {
1499                "k".into() => 4.into(),
1500                "k2".into() =>"z".into(),
1501            },
1502        ])),
1503        "k2".into(),
1504        TopN::First(2),
1505        Array2::from_shape_vec((2, 2), vec![
1506            1.into(),"a".into(),
1507            2.into(),"b".into(),
1508            ]
1509        ).unwrap(),
1510        vec!["k".into(), "k2".into()],
1511    )]
1512    #[traced_test]
1513    fn top_n(
1514        #[case] input: DataFrame,
1515        #[case] column: Key,
1516        #[case] topn: TopN,
1517        #[case] expected: Array2<DataValue>,
1518        #[case] columns: Vec<Key>,
1519    ) {
1520        let result = input.sorted(&column);
1521        assert!(result.is_ok(), "{result:?}");
1522        let result = result.unwrap();
1523        let first = result.topn(topn).unwrap();
1524        let selected = first.select(Some(&columns));
1525        assert_eq!(selected, expected);
1526    }
1527
1528    #[rstest]
1529    fn test_messagepack_roundtrip_empty_dataframe() {
1530        let df = DataFrame::default();
1531
1532        let bytes = df
1533            .store_into_messagepack()
1534            .expect("failed to serialize empty df");
1535        let restored =
1536            DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize empty df");
1537        assert_eq!(df, restored);
1538        assert!(restored.is_empty());
1539    }
1540
1541    #[rstest]
1542    fn test_messagepack_roundtrip_strings_and_bools() {
1543        // Strings and bools are preserved exactly by messagepack
1544        let df = DataFrame::new(ColumnFrame::from(vec![
1545            hashmap! {
1546                "str".into() => DataValue::String("hello".into()),
1547                "bool".into() => DataValue::Bool(true),
1548            },
1549            hashmap! {
1550                "str".into() => DataValue::String("".into()),
1551                "bool".into() => DataValue::Bool(false),
1552            },
1553        ]));
1554
1555        let bytes = df.store_into_messagepack().expect("failed to serialize");
1556        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1557        assert_eq!(df, restored);
1558    }
1559
1560    #[rstest]
1561    fn test_messagepack_roundtrip_f64_values() {
1562        let df = DataFrame::new(ColumnFrame::from(vec![
1563            hashmap! {
1564                "a".into() => DataValue::F64(3.14),
1565            },
1566            hashmap! {
1567                "a".into() => DataValue::F64(-2.718),
1568            },
1569        ]));
1570
1571        let bytes = df.store_into_messagepack().expect("failed to serialize");
1572        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1573        assert_eq!(df, restored);
1574    }
1575
1576    #[rstest]
1577    fn test_messagepack_f64_special_values_survive_roundtrip() {
1578        // f64::INFINITY serializes/deserializes but PartialEq may differ due to
1579        // DataValue Eq semantics; verify at the value level
1580        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1581            "a".into() => DataValue::F64(f64::INFINITY),
1582        }]));
1583
1584        let bytes = df.store_into_messagepack().expect("failed to serialize");
1585        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1586        assert_eq!(restored.len(), 1);
1587        let col = restored.select_column("a".into()).expect("col exists");
1588        match &col[0] {
1589            DataValue::F64(v) => assert!(v.is_infinite() && v.is_sign_positive()),
1590            other => panic!("expected F64, got {other:?}"),
1591        }
1592    }
1593
1594    #[rstest]
1595    fn test_messagepack_roundtrip_with_nulls() {
1596        let df = DataFrame::new(ColumnFrame::from(vec![
1597            hashmap! {
1598                "a".into() => DataValue::String("x".into()),
1599                "b".into() => DataValue::String("y".into()),
1600            },
1601            hashmap! {
1602                "a".into() => DataValue::String("z".into()),
1603                // "b" missing => Null
1604            },
1605        ]));
1606
1607        let bytes = df.store_into_messagepack().expect("failed to serialize");
1608        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1609        assert_eq!(df, restored);
1610    }
1611
1612    #[rstest]
1613    fn test_messagepack_roundtrip_with_metadata() {
1614        let mut df = DataFrame::new(crate::column_frame! {
1615            "col" => ["a", "b"]
1616        });
1617        df.add_metadata("name".into(), DataValue::String("test_df".into()));
1618        df.add_metadata("flag".into(), DataValue::Bool(true));
1619
1620        let bytes = df.store_into_messagepack().expect("failed to serialize");
1621        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1622        assert_eq!(df, restored);
1623        assert_eq!(
1624            restored.get_metadata("name"),
1625            Some(&DataValue::String("test_df".into()))
1626        );
1627        assert_eq!(restored.get_metadata("flag"), Some(&DataValue::Bool(true)));
1628    }
1629
1630    #[rstest]
1631    fn test_messagepack_roundtrip_with_constants() {
1632        let mut df = DataFrame::new(crate::column_frame! {
1633            "x" => ["a", "b"]
1634        });
1635        df.insert_constant("const_key".into(), DataValue::String("const_val".into()));
1636        df.insert_constant("const_flag".into(), DataValue::Bool(false));
1637
1638        let bytes = df.store_into_messagepack().expect("failed to serialize");
1639        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1640        assert_eq!(df, restored);
1641        assert_eq!(
1642            restored.constants.get(&"const_key".into()),
1643            Some(&DataValue::String("const_val".into()))
1644        );
1645    }
1646
1647    #[rstest]
1648    fn test_messagepack_integer_type_coercion() {
1649        // MessagePack uses compact integer encoding: small I64 values may
1650        // deserialize as U8/U32 etc. This test documents this lossy behavior.
1651        let df = crate::df! {
1652            "a" => [1i64, 2i64, 3i64]
1653        };
1654
1655        let bytes = df.store_into_messagepack().expect("failed to serialize");
1656        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1657
1658        // The row count is preserved even if integer types differ
1659        assert_eq!(restored.len(), 3);
1660
1661        // Values that fit in u8 get coerced to U8 by messagepack
1662        let col = restored
1663            .select_column("a".into())
1664            .expect("column should exist");
1665        // Values are semantically equivalent but may be different DataValue variants
1666        assert_ne!(
1667            col[0],
1668            DataValue::I64(1),
1669            "messagepack coerces small ints to compact types"
1670        );
1671    }
1672
1673    #[rstest]
1674    fn test_messagepack_large_i64_preserved() {
1675        // Values that exceed u32 range stay as large integer types
1676        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1677            "big".into() => DataValue::I64(i64::MIN),
1678        }]));
1679
1680        let bytes = df.store_into_messagepack().expect("failed to serialize");
1681        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1682        assert_eq!(df, restored);
1683    }
1684
1685    #[rstest]
1686    fn test_messagepack_load_invalid_bytes() {
1687        let result = DataFrame::load_from_messagepack(&[0xFF, 0xFE, 0xFD, 0x00]);
1688        assert!(result.is_err());
1689    }
1690
1691    #[rstest]
1692    fn test_messagepack_load_empty_bytes() {
1693        let result = DataFrame::load_from_messagepack(&[]);
1694        assert!(result.is_err());
1695    }
1696
1697    #[rstest]
1698    fn test_messagepack_load_truncated_bytes() {
1699        let df = DataFrame::new(ColumnFrame::from(vec![
1700            hashmap! {
1701                "a".into() => DataValue::String("hello world".into()),
1702                "b".into() => DataValue::Bool(true),
1703            },
1704            hashmap! {
1705                "a".into() => DataValue::String("test".into()),
1706                "b".into() => DataValue::Bool(false),
1707            },
1708        ]));
1709        let bytes = df.store_into_messagepack().expect("failed to serialize");
1710        // Truncate to half
1711        let truncated = &bytes[..bytes.len() / 2];
1712        let result = DataFrame::load_from_messagepack(truncated);
1713        assert!(result.is_err());
1714    }
1715
1716    #[rstest]
1717    fn test_messagepack_roundtrip_with_nested_vec_data() {
1718        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1719            "vec_col".into() => DataValue::Vec(vec![
1720                DataValue::String("a".into()),
1721                DataValue::String("b".into()),
1722            ]),
1723            "bytes_col".into() => DataValue::Bytes(vec![0, 1, 255]),
1724        }]));
1725
1726        let bytes = df.store_into_messagepack().expect("failed to serialize");
1727        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1728        assert_eq!(df, restored);
1729    }
1730
1731    #[rstest]
1732    fn test_messagepack_roundtrip_preserves_row_count() {
1733        let df = DataFrame::new(ColumnFrame::from(vec![
1734            hashmap! { "a".into() => DataValue::String("x".into()) },
1735            hashmap! { "a".into() => DataValue::String("y".into()) },
1736            hashmap! { "a".into() => DataValue::String("z".into()) },
1737        ]));
1738
1739        let bytes = df.store_into_messagepack().expect("failed to serialize");
1740        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1741        assert_eq!(restored.len(), 3);
1742        assert_eq!(restored.n_rows(), 3);
1743        assert_eq!(restored.n_columns(), 1);
1744    }
1745
1746    #[rstest]
1747    fn test_messagepack_idempotent_double_roundtrip() {
1748        // Use types that survive messagepack coercion (strings, bools, bytes)
1749        let mut df = DataFrame::new(ColumnFrame::from(vec![
1750            hashmap! {
1751                "a".into() => DataValue::String("hello".into()),
1752                "b".into() => DataValue::Bool(true),
1753            },
1754            hashmap! {
1755                "a".into() => DataValue::String("world".into()),
1756                "b".into() => DataValue::Bool(false),
1757            },
1758        ]));
1759        df.add_metadata("meta".into(), DataValue::Bool(true));
1760        df.insert_constant("c".into(), DataValue::String("const".into()));
1761
1762        let bytes1 = df.store_into_messagepack().expect("first serialize");
1763        let restored1 = DataFrame::load_from_messagepack(&bytes1).expect("first deserialize");
1764        let bytes2 = restored1
1765            .store_into_messagepack()
1766            .expect("second serialize");
1767        let restored2 = DataFrame::load_from_messagepack(&bytes2).expect("second deserialize");
1768
1769        assert_eq!(df, restored2);
1770        assert_eq!(bytes1, bytes2);
1771    }
1772
1773    #[rstest]
1774    fn test_messagepack_single_byte_payload() {
1775        // A single valid msgpack byte (e.g. fixint) should fail as incomplete DataFrame
1776        let result = DataFrame::load_from_messagepack(&[0x01]);
1777        assert!(result.is_err());
1778    }
1779
1780    // === hash_datavalue public API edge case tests ===
1781
1782    #[rstest]
1783    fn test_hash_datavalue_public_api_accessible() {
1784        // Verify the re-exported function works from the crate root
1785        let val = DataValue::I32(42);
1786        let h = crate::hash_datavalue(&val);
1787        // Deterministic
1788        assert_eq!(h, crate::hash_datavalue(&DataValue::I32(42)));
1789    }
1790
1791    #[rstest]
1792    fn test_hash_datavalue_vec_length_matters() {
1793        // [1] and [1, Null] should produce different hashes
1794        let short = DataValue::Vec(vec![DataValue::I32(1)]);
1795        let long = DataValue::Vec(vec![DataValue::I32(1), DataValue::Null]);
1796        assert_ne!(crate::hash_datavalue(&short), crate::hash_datavalue(&long));
1797    }
1798
1799    #[rstest]
1800    fn test_hash_datavalue_map_different_keys_same_values() {
1801        let mut m1 = std::collections::HashMap::new();
1802        m1.insert("a".into(), DataValue::I32(1));
1803        let mut m2 = std::collections::HashMap::new();
1804        m2.insert("b".into(), DataValue::I32(1));
1805
1806        assert_ne!(
1807            crate::hash_datavalue(&DataValue::Map(m1)),
1808            crate::hash_datavalue(&DataValue::Map(m2))
1809        );
1810    }
1811
1812    #[rstest]
1813    fn test_hash_datavalue_empty_string_vs_empty_bytes() {
1814        let empty_str = DataValue::String("".into());
1815        let empty_bytes = DataValue::Bytes(vec![]);
1816        assert_ne!(
1817            crate::hash_datavalue(&empty_str),
1818            crate::hash_datavalue(&empty_bytes)
1819        );
1820    }
1821
1822    #[rstest]
1823    fn test_hash_datavalue_empty_vec_vs_empty_map() {
1824        let empty_vec = DataValue::Vec(vec![]);
1825        let empty_map = DataValue::Map(std::collections::HashMap::new());
1826        assert_ne!(
1827            crate::hash_datavalue(&empty_vec),
1828            crate::hash_datavalue(&empty_map)
1829        );
1830    }
1831
1832    #[rstest]
1833    fn test_hash_datavalue_i128_boundary_values() {
1834        let max = DataValue::I128(i128::MAX);
1835        let min = DataValue::I128(i128::MIN);
1836        let zero = DataValue::I128(0);
1837        let neg_one = DataValue::I128(-1);
1838
1839        // All distinct
1840        let hashes: std::collections::HashSet<u64> = [&max, &min, &zero, &neg_one]
1841            .iter()
1842            .map(|v| crate::hash_datavalue(v))
1843            .collect();
1844        assert_eq!(hashes.len(), 4);
1845    }
1846
1847    #[rstest]
1848    fn test_hash_datavalue_u128_boundary_values() {
1849        let max = DataValue::U128(u128::MAX);
1850        let zero = DataValue::U128(0);
1851        let one = DataValue::U128(1);
1852        // u128::MAX is all bits set; ensure it differs from i128(-1) which is also all bits
1853        let i128_neg1 = DataValue::I128(-1);
1854
1855        assert_ne!(
1856            crate::hash_datavalue(&max),
1857            crate::hash_datavalue(&i128_neg1)
1858        );
1859        let hashes: std::collections::HashSet<u64> = [&max, &zero, &one]
1860            .iter()
1861            .map(|v| crate::hash_datavalue(v))
1862            .collect();
1863        assert_eq!(hashes.len(), 3);
1864    }
1865
1866    #[rstest]
1867    fn test_hash_datavalue_f64_special_values() {
1868        // NaN bit patterns: NaN == NaN for hashing since we use to_bits()
1869        let nan1 = DataValue::F64(f64::NAN);
1870        let nan2 = DataValue::F64(f64::NAN);
1871        assert_eq!(crate::hash_datavalue(&nan1), crate::hash_datavalue(&nan2));
1872
1873        // subnormal
1874        let subnormal = DataValue::F64(f64::MIN_POSITIVE / 2.0);
1875        let normal = DataValue::F64(f64::MIN_POSITIVE);
1876        assert_ne!(
1877            crate::hash_datavalue(&subnormal),
1878            crate::hash_datavalue(&normal)
1879        );
1880    }
1881
1882    #[rstest]
1883    fn test_hash_datavalue_enum_number_vs_i32_same_value() {
1884        // EnumNumber(42) and I32(42) should hash differently (different discriminant)
1885        let enum_val = DataValue::EnumNumber(42);
1886        let i32_val = DataValue::I32(42);
1887        assert_ne!(
1888            crate::hash_datavalue(&enum_val),
1889            crate::hash_datavalue(&i32_val)
1890        );
1891    }
1892
1893    #[rstest]
1894    fn get_single_column_typed_f64_from_i32() {
1895        let df = crate::df! {
1896            "a" => [1i32, 2i32, 3i32]
1897        };
1898        let key: Key = "a".into();
1899        let col = df.get_single_column_typed::<f64>(&key).unwrap();
1900        assert_eq!(col, ndarray::arr1(&[1.0f64, 2.0, 3.0]));
1901    }
1902
1903    #[rstest]
1904    fn get_single_column_typed_string() {
1905        let df = crate::df! {
1906            "name" => ["alice", "bob"]
1907        };
1908        let key: Key = "name".into();
1909        let col = df.get_single_column_typed::<String>(&key).unwrap();
1910        assert_eq!(
1911            col,
1912            ndarray::arr1(&["alice".to_string(), "bob".to_string()])
1913        );
1914    }
1915
1916    #[rstest]
1917    fn get_single_column_typed_missing_key() {
1918        let df = crate::df! {
1919            "a" => [1u64, 2u64]
1920        };
1921        let missing: Key = "z".into();
1922        assert!(df.get_single_column_typed::<u64>(&missing).is_none());
1923    }
1924
1925    #[rstest]
1926    fn get_single_column_typed_matches_untyped() {
1927        let df = crate::df! {
1928            "v" => [10u64, 20u64, 30u64]
1929        };
1930        let key: Key = "v".into();
1931        let typed = df.get_single_column_typed::<u64>(&key).unwrap();
1932        let untyped = df.get_single_column(&key).unwrap();
1933        for (t, u) in typed.iter().zip(untyped.iter()) {
1934            assert_eq!(*t, u64::extract(u));
1935        }
1936    }
1937
1938    #[rstest]
1939    fn get_single_column_typed_bool_from_i32() {
1940        let df = crate::df! {
1941            "flag" => [1i32, 0i32, 1i32, 0i32]
1942        };
1943        let key: Key = "flag".into();
1944        let col = df.get_single_column_typed::<bool>(&key).unwrap();
1945        assert_eq!(col, ndarray::arr1(&[true, false, true, false]));
1946    }
1947
1948    #[rstest]
1949    fn get_single_column_typed_i64_from_u32() {
1950        let df = crate::df! {
1951            "x" => [10u32, 20u32, 30u32]
1952        };
1953        let key: Key = "x".into();
1954        let col = df.get_single_column_typed::<i64>(&key).unwrap();
1955        assert_eq!(col, ndarray::arr1(&[10i64, 20i64, 30i64]));
1956    }
1957
1958    #[rstest]
1959    fn get_single_column_typed_f64_truncation_to_i32() {
1960        let df = crate::df! {
1961            "v" => [1.9f64, 2.1f64, 3.7f64]
1962        };
1963        let key: Key = "v".into();
1964        let col = df.get_single_column_typed::<i32>(&key).unwrap();
1965        assert_eq!(col, ndarray::arr1(&[1i32, 2i32, 3i32]));
1966    }
1967
1968    #[rstest]
1969    fn get_single_column_typed_single_element() {
1970        let df = crate::df! {
1971            "solo" => [42u64]
1972        };
1973        let key: Key = "solo".into();
1974        let col = df.get_single_column_typed::<f64>(&key).unwrap();
1975        assert_eq!(col.len(), 1);
1976        assert_eq!(col[0], 42.0);
1977    }
1978
1979    #[rstest]
1980    fn select_typed_all_columns() {
1981        let df = crate::df! {
1982            "a" => [1i32, 2i32, 3i32],
1983            "b" => [4i32, 5i32, 6i32]
1984        };
1985        let result = df.select_typed::<f64>(None).unwrap();
1986        assert_eq!(result.nrows(), 3);
1987        assert_eq!(result.ncols(), 2);
1988        assert_eq!(result[[0, 0]], 1.0);
1989        assert_eq!(result[[0, 1]], 4.0);
1990        assert_eq!(result[[2, 0]], 3.0);
1991        assert_eq!(result[[2, 1]], 6.0);
1992    }
1993
1994    #[rstest]
1995    fn select_typed_specific_keys() {
1996        let df = crate::df! {
1997            "x" => [10u64, 20u64],
1998            "y" => [30u64, 40u64],
1999            "z" => [50u64, 60u64]
2000        };
2001        let keys: Vec<Key> = vec!["x".into(), "z".into()];
2002        let result = df.select_typed::<i64>(Some(&keys)).unwrap();
2003        assert_eq!(result.nrows(), 2);
2004        assert_eq!(result.ncols(), 2);
2005        assert_eq!(result[[0, 0]], 10i64);
2006        assert_eq!(result[[0, 1]], 50i64);
2007        assert_eq!(result[[1, 0]], 20i64);
2008        assert_eq!(result[[1, 1]], 60i64);
2009    }
2010
2011    #[rstest]
2012    fn select_typed_nonexistent_key_gives_empty() {
2013        let df = crate::df! {
2014            "a" => [1i32, 2i32]
2015        };
2016        let keys: Vec<Key> = vec!["missing".into()];
2017        let result = df.select_typed::<f64>(Some(&keys)).unwrap();
2018        assert_eq!(result.shape(), &[0, 0]);
2019    }
2020
2021    #[rstest]
2022    fn select_typed_matches_select_with_extract() {
2023        let df = crate::df! {
2024            "a" => [1u64, 2u64, 3u64],
2025            "b" => [4u64, 5u64, 6u64]
2026        };
2027        let typed = df.select_typed::<f64>(None).unwrap();
2028        let manual = df.select(None).unwrap().mapv(|v| f64::extract(&v));
2029        assert_eq!(typed, manual);
2030    }
2031
2032    #[rstest]
2033    fn select_typed_string_values() {
2034        let df = crate::df! {
2035            "name" => ["alice", "bob", "carol"]
2036        };
2037        let result = df.select_typed::<String>(None).unwrap();
2038        assert_eq!(result[[0, 0]], "alice");
2039        assert_eq!(result[[1, 0]], "bob");
2040        assert_eq!(result[[2, 0]], "carol");
2041    }
2042
2043    #[rstest]
2044    fn select_typed_cross_numeric_coercion() {
2045        // i32 values extracted as u64
2046        let df = crate::df! {
2047            "a" => [1i32, 2i32, 3i32]
2048        };
2049        let result = df.select_typed::<u64>(None).unwrap();
2050        assert_eq!(result[[0, 0]], 1u64);
2051        assert_eq!(result[[1, 0]], 2u64);
2052        assert_eq!(result[[2, 0]], 3u64);
2053    }
2054}