Skip to main content

trs_dataframe/
dataframe.rs

1use column_store::sorted_df::SortedDataFrame;
2use data_value::{DataValue, Extract};
3use halfbrown::HashMap;
4use ndarray::{Array1, Array2, ArrayView1};
5use std::fmt;
6pub mod column_store;
7pub mod index;
8pub mod join;
9pub mod key;
10use crate::{error::Error, CandidateData};
11#[cfg(feature = "python")]
12pub mod python;
13
14#[cfg(feature = "python")]
15use pyo3::prelude::*;
16
17use crate::{
18    dataframe::{column_store::ColumnFrame, join::JoinRelation, key::Key},
19    MLChefMap,
20};
21
22/// Controls how many rows to take from a sorted dataframe.
23///
24/// Used with [`SortedDataFrame::topn`] to retrieve a fixed number of rows
25/// from the top or bottom of a sorted result.
26#[derive(Debug, Clone, PartialEq, Eq, Copy)]
27pub enum TopN {
28    /// Take the first `n` rows (smallest values).
29    First(usize),
30    /// Take the last `n` rows (largest values).
31    Last(usize),
32}
33
34/// DataFrame holds information about [`ColumnFrame`].
35/// This is used to store the data and the metadata for the candidates.
36///
37/// # Columns Storage
38/// The underlying data is stored in row-major order using ndarray's Array2.
39/// Use `select()` for row-oriented access and `select_transposed()` for column-oriented access.
40///
41/// # Example
42/// ```
43/// use trs_dataframe::{DataFrame, column_frame};
44///
45/// let df = DataFrame::new(column_frame! {
46///     "a" => [1, 2, 3],
47///     "b" => [4, 5, 6]
48/// });
49///
50/// // Get all data as 2D array (rows x columns)
51/// let all_data = df.select(None);
52///
53/// // Get specific columns
54/// let keys = vec!["a".into(), "b".into()];
55/// let selected = df.select(Some(&keys));
56/// ```
57#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
58#[cfg_attr(feature = "python", pyclass)]
59pub struct DataFrame {
60    /// Constants for the dataframe - mikro optimization for the data
61    /// Values which is constant for the whole dataframe are stored here
62    /// These values are applied to all rows without storing them per-row
63    pub constants: HashMap<Key, DataValue>,
64    /// Internal columnar storage for row data
65    pub dataframe: ColumnFrame,
66    /// Metadata for the dataframe. Here you can store the information about the dataframe
67    /// This is user-defined key-value metadata that doesn't affect data operations
68    pub metadata: HashMap<String, DataValue>,
69}
70
71impl fmt::Display for DataFrame {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        self.dataframe.fmt(f)
74    }
75}
76
77impl DataFrame {
78    /// Creates a new [`DataFrame`] from anything that can be converted into a [`ColumnFrame`].
79    ///
80    /// # Examples
81    ///
82    /// ```
83    /// use trs_dataframe::{DataFrame, column_frame};
84    ///
85    /// let df = DataFrame::new(column_frame! {
86    ///     "a" => [1, 2, 3],
87    ///     "b" => [4, 5, 6]
88    /// });
89    /// assert_eq!(df.n_rows(), 3);
90    /// assert_eq!(df.n_columns(), 2);
91    /// ```
92    pub fn new<C: Into<ColumnFrame>>(dataframe: C) -> Self {
93        Self {
94            constants: HashMap::new(),
95            dataframe: dataframe.into(),
96            metadata: HashMap::new(),
97        }
98    }
99
100    /// Returns the number of columns which dataframe contains.
101    pub fn n_columns(&self) -> usize {
102        self.dataframe.data_frame.ncols()
103    }
104
105    /// Returns the number of rows which dataframe contains.
106    pub fn n_rows(&self) -> usize {
107        self.dataframe.data_frame.nrows()
108    }
109
110    /// Compacts the internal storage to reclaim memory after row deletions or
111    /// filter operations that may leave excess capacity allocated.
112    pub fn shrink(&mut self) {
113        self.dataframe.shrink();
114    }
115
116    /// Attaches a key-value metadata entry to this dataframe.
117    ///
118    /// Metadata does not participate in data operations (select, join, filter, etc.)
119    /// and is intended for user-defined annotations such as source info or timestamps.
120    pub fn add_metadata(&mut self, key: String, value: DataValue) {
121        self.metadata.insert(key, value);
122    }
123
124    /// Returns a reference to the metadata value for the given key, or `None` if absent.
125    pub fn get_metadata(&self, key: &str) -> Option<&DataValue> {
126        self.metadata.get(key)
127    }
128
129    /// Joins another dataframe into this one according to the given [`JoinRelation`].
130    ///
131    /// The join strategy is determined by the variant inside `join_type`:
132    /// - [`crate::dataframe::join::JoinBy::AddColumns`] — adds non-existing columns from `other`
133    /// - [`crate::dataframe::join::JoinBy::Replace`] — replaces the entire frame with `other`
134    /// - [`crate::dataframe::join::JoinBy::Extend`] — appends rows from `other`
135    /// - [`crate::dataframe::join::JoinBy::Broadcast`] — replicates a single-row `other` across all rows
136    /// - [`crate::dataframe::join::JoinBy::CartesianProduct`] — produces all row combinations
137    /// - [`crate::dataframe::join::JoinBy::JoinById`] — hash-based join on shared key columns
138    ///
139    /// Constants from `other` are merged into this dataframe's constants map.
140    pub fn join(&mut self, other: Self, join_type: &JoinRelation) -> Result<(), Error> {
141        for (key, value) in other.constants {
142            self.constants.insert(key, value);
143        }
144        self.dataframe.join(other.dataframe, join_type)
145    }
146
147    /// Applies a user-defined function to the underlying [`ColumnFrame`].
148    ///
149    /// The closure receives the provided `keys` and a mutable reference to the
150    /// internal [`ColumnFrame`], allowing arbitrary in-place transformations.
151    pub fn apply_function<F>(&mut self, keys: &[Key], mut func: F) -> Result<(), Error>
152    where
153        F: FnMut(&[Key], &mut ColumnFrame) -> Result<(), Error>,
154    {
155        self.dataframe.apply_function(keys, &mut func)
156    }
157
158    /// Selects columns and returns their data as a 2D array of [`DataValue`] in row-major order.
159    ///
160    /// If `keys` is `None`, all columns are returned. If a requested key does not
161    /// exist, its cells are filled with [`DataValue::Null`].
162    ///
163    /// # Examples
164    ///
165    /// ```
166    /// use trs_dataframe::{df, Key};
167    ///
168    /// let df = df! { "a" => [1, 2], "b" => [3, 4] };
169    /// let data = df.select(None).unwrap();
170    /// assert_eq!(data.nrows(), 2);
171    /// ```
172    pub fn select(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
173        Ok(self.dataframe.select(keys))
174    }
175
176    /// Returns selected columns as a typed 2D array, converting each [`DataValue`]
177    /// via the [`Extract`] trait.
178    ///
179    /// This is the typed counterpart of [`select`](Self::select). If `keys` is `None`,
180    /// all columns are returned. The data is in row-major order (rows × columns).
181    ///
182    /// # Type coercion
183    ///
184    /// The [`Extract`] trait performs best-effort numeric coercion (e.g. `I32 -> f64`).
185    /// Values that cannot be meaningfully converted yield the type's default
186    /// (0 for numbers, `false` for bool, empty string for `String`).
187    ///
188    /// # Examples
189    ///
190    /// ```
191    /// use trs_dataframe::{df, Key};
192    ///
193    /// let df = df! {
194    ///     "a" => [1i32, 2i32, 3i32],
195    ///     "b" => [4i32, 5i32, 6i32]
196    /// };
197    /// let keys = vec![Key::from("a"), Key::from("b")];
198    /// let arr = df.select_typed::<f64>(Some(&keys)).unwrap();
199    /// assert_eq!(arr[[0, 0]], 1.0);
200    /// assert_eq!(arr[[1, 1]], 5.0);
201    /// ```
202    pub fn select_typed<T: Extract>(&self, keys: Option<&[Key]>) -> Result<Array2<T>, Error> {
203        Ok(self.dataframe.select_typed(keys))
204    }
205
206    // pub fn select_view(&self, keys: Option<&[Key]>) -> Result<ArrayView2<'_, DataValue>, Error> {
207    //     Ok(self.dataframe.select_view(keys))
208    // }
209
210    /// Returns selected columns as typed row vectors (`Vec<Vec<D>>`), where each
211    /// inner vector is one row and each [`DataValue`] is converted via [`Extract`].
212    ///
213    /// Despite the name, the returned data is in row-major order (rows × selected columns).
214    pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>> {
215        self.dataframe.select_transposed_typed::<D>(keys)
216    }
217
218    /// Returns a read-only view of a single column, or `None` if the key is absent.
219    ///
220    /// This is a zero-copy operation — the returned [`ArrayView1`] borrows directly
221    /// from the underlying storage.
222    pub fn select_column(&self, key: Key) -> Option<ndarray::ArrayView1<'_, DataValue>> {
223        self.dataframe.select_column(&key)
224    }
225
226    /// Selects columns and returns them in column-major layout (transposed).
227    ///
228    /// If `keys` is `None`, all columns are included. Each column becomes a row
229    /// in the returned [`Array2`].
230    pub fn select_transposed(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
231        self.dataframe.select_transposed(keys)
232    }
233
234    /// Stores a constant value that logically applies to every row without
235    /// being physically stored per-row.
236    ///
237    /// Constants are carried through joins but do not appear in
238    /// [`select`](Self::select) results.
239    pub fn insert_constant(&mut self, key: Key, value: DataValue) {
240        self.constants.insert(key, value);
241    }
242
243    /// Appends a single row to the dataframe.
244    ///
245    /// The row is supplied as any type implementing [`CandidateData`]
246    /// (e.g. `HashMap<Key, DataValue>`). New columns are added automatically
247    /// if the row contains keys not yet present in the frame.
248    pub fn push<C: CandidateData>(&mut self, item: C) -> Result<(), Error> {
249        self.dataframe.push(item)
250    }
251
252    /// Removes the specified columns from this dataframe and returns them as a
253    /// new [`DataFrame`].
254    pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error> {
255        self.dataframe.remove_column(keys).map(|x| x.into())
256    }
257
258    /// Appends all rows from `items` to this dataframe.
259    ///
260    /// If the two frames have different column sets, missing columns are filled
261    /// with [`DataValue::Null`].
262    pub fn extend(&mut self, items: Self) -> Result<(), Error> {
263        self.dataframe.extend(items.dataframe)
264    }
265
266    /// Returns the number of rows in the dataframe.
267    pub fn len(&self) -> usize {
268        self.dataframe.len()
269    }
270
271    /// Returns `true` if the dataframe contains no rows.
272    pub fn is_empty(&self) -> bool {
273        self.dataframe.is_empty()
274    }
275
276    /// Adds a new column to the dataframe.
277    ///
278    /// Returns an error if the column key already exists or if the length of
279    /// `values` does not match the current row count.
280    pub fn add_single_column<K: Into<Key>>(
281        &mut self,
282        key: K,
283        values: Array1<DataValue>,
284    ) -> Result<(), Error> {
285        self.dataframe.add_single_column(key, values)
286    }
287
288    /// Returns a read-only view of a single column, or `None` if the key is absent.
289    ///
290    /// This is a zero-copy operation — the returned [`ArrayView1`] borrows directly
291    /// from the underlying storage.
292    pub fn get_single_column(&self, key: &Key) -> Option<ArrayView1<'_, DataValue>> {
293        self.dataframe.get_single_column(key)
294    }
295
296    /// Returns a column extracted into a typed [`Array1<T>`], where each [`DataValue`]
297    /// is converted via the [`Extract`] trait.
298    ///
299    /// This is a convenience wrapper around [`get_single_column`](Self::get_single_column)
300    /// that maps every element through `T::extract`, producing an owned array of the
301    /// target type. Returns `None` if the key does not exist in the dataframe.
302    ///
303    /// # Type coercion
304    ///
305    /// The [`Extract`] trait performs best-effort numeric coercion (e.g. `I32 -> f64`).
306    /// Values that cannot be meaningfully converted yield the type's default
307    /// (0 for numbers, `false` for bool, empty string for `String`).
308    ///
309    /// # Examples
310    ///
311    /// ```
312    /// use trs_dataframe::{df, Key};
313    ///
314    /// let df = df! {
315    ///     "score" => [1.5f64, 2.5f64, 3.5f64]
316    /// };
317    /// let key: Key = "score".into();
318    /// let col = df.get_single_column_typed::<f64>(&key).unwrap();
319    /// assert_eq!(col.len(), 3);
320    /// assert_eq!(col[0], 1.5);
321    /// ```
322    pub fn get_single_column_typed<T: Extract>(&self, key: &Key) -> Option<Array1<T>> {
323        self.dataframe.get_single_column_typed(key)
324    }
325
326    /// Returns a [`SortedDataFrame`] view sorted by the given column key.
327    ///
328    /// The sort is ascending with `Null` values pushed to the end.
329    /// Use [`SortedDataFrame::topn`] to efficiently extract the first/last N rows.
330    pub fn sorted(&self, key: &Key) -> Result<SortedDataFrame<'_>, Error> {
331        self.dataframe.sorted(key)
332    }
333
334    /// Returns a new dataframe containing only rows that satisfy the filter expression.
335    ///
336    /// Filter expressions are parsed from strings — see [`FilterRules`](crate::filter::FilterRules)
337    /// for the supported grammar (comparison, regex, set membership, logical combinators).
338    ///
339    /// Constants and metadata are cloned into the result.
340    pub fn filter(&self, filter: &crate::filter::FilterRules) -> Result<Self, Error> {
341        let filtered_df = self.dataframe.filter(filter)?;
342        Ok(Self {
343            constants: self.constants.clone(),
344            dataframe: filtered_df,
345            metadata: self.metadata.clone(),
346        })
347    }
348
349    /// Converts this dataframe into a Polars [`DataFrame`](polars::prelude::DataFrame).
350    ///
351    /// Each column is mapped to its Polars equivalent via [`into_polars_value`].
352    /// Requires the `polars-df` feature.
353    #[cfg(feature = "polars-df")]
354    pub fn as_polars(&self) -> Result<polars::prelude::DataFrame, Error> {
355        let mut columns = vec![];
356        for key in self.dataframe.keys() {
357            let values = self
358                .dataframe
359                .get_single_column(key)
360                .ok_or_else(|| Error::NotFound(key.clone()))?
361                .into_iter()
362                .map(|x| into_polars_value(key, x.clone()))
363                .collect::<Vec<_>>();
364            let s = polars::prelude::Column::new(key.name().into(), values);
365
366            columns.push(s);
367        }
368
369        Ok(polars::prelude::DataFrame::new(columns)?)
370    }
371
372    /// Deserializes a dataframe from MessagePack bytes.
373    ///
374    /// This is the inverse of [`store_into_messagepack`](Self::store_into_messagepack)
375    /// and is useful for compact binary serialization in IPC or storage scenarios.
376    pub fn load_from_messagepack(bytes: &[u8]) -> Result<Self, Error> {
377        rmp_serde::decode::from_slice(bytes).map_err(|e| Error::UnknownError(format!("{e:?}")))
378    }
379
380    /// Serializes this dataframe into MessagePack bytes.
381    ///
382    /// The resulting bytes can be deserialized back with
383    /// [`load_from_messagepack`](Self::load_from_messagepack).
384    pub fn store_into_messagepack(&self) -> Result<Vec<u8>, Error> {
385        rmp_serde::encode::to_vec(&self).map_err(|e| Error::UnknownError(format!("{e:?}")))
386    }
387}
388
389/// Converts a [`DataType`](crate::DataType) to its Polars equivalent.
390///
391/// Requires the `polars-df` feature.
392#[cfg(feature = "polars-df")]
393pub fn polars_dtype(dtype: crate::DataType) -> polars::prelude::DataType {
394    use crate::DataType::*;
395    use polars::prelude::DataType::*;
396    match dtype {
397        Bool => Boolean,
398        U32 => UInt32,
399        I32 => Int32,
400        U8 => UInt8,
401        U64 => UInt64,
402        I64 => Int64,
403        F32 => Float32,
404        F64 => Float64,
405        U128 => UInt128,
406        I128 => Int128,
407        crate::DataType::String => polars::prelude::DataType::String,
408        Bytes => Binary,
409        crate::DataType::Unknown => Null,
410        Vec => List(Box::new(polars::prelude::DataType::Unknown(
411            polars::prelude::UnknownKind::Any,
412        ))),
413        Map => Struct(vec![]),
414    }
415}
416
417/// Converts a [`DataValue`] into a Polars [`AnyValue`](polars::prelude::AnyValue),
418/// applying type coercion based on the column's [`Key`] dtype.
419///
420/// Requires the `polars-df` feature.
421#[cfg(feature = "polars-df")]
422pub fn into_polars_value(key: &Key, dv: DataValue) -> polars::prelude::AnyValue<'static> {
423    use polars::prelude::AnyValue::*;
424    use polars::prelude::Field;
425
426    use crate::dataframe::column_store::convert_dv_to_dtype;
427    let dv = convert_dv_to_dtype(key, dv);
428    match dv {
429        DataValue::String(smart_string) => StringOwned(smart_string.as_str().into()),
430        DataValue::Bytes(items) => BinaryOwned(items),
431        DataValue::U8(x) => UInt32(x as _),
432        DataValue::Bool(x) => Boolean(x),
433        DataValue::I32(x) => Int32(x),
434        DataValue::U32(x) => UInt32(x),
435        DataValue::I64(x) => Int64(x),
436        DataValue::U64(x) => UInt64(x),
437        DataValue::I128(x) => Int128(x),
438        DataValue::F32(x) => Float32(x),
439        DataValue::F64(x) => Float64(x),
440        DataValue::Null => Null,
441        DataValue::Vec(data_values) => {
442            let mut dt = crate::DataType::Unknown;
443            for d in data_values.iter() {
444                match crate::detect_dtype(d) {
445                    crate::DataType::Unknown => continue,
446                    e => {
447                        dt = e;
448                        break;
449                    }
450                }
451            }
452            let vec_key = Key::new(key.name(), dt);
453            let s = polars::series::Series::from_any_values(
454                key.name().into(),
455                &data_values
456                    .into_iter()
457                    .map(|x| into_polars_value(&vec_key, x))
458                    .collect::<Vec<_>>(),
459                true,
460            );
461            List(s.expect(&format!("Cannot create series for {key:?}")))
462        }
463        DataValue::EnumNumber(x) => Int32(x),
464        DataValue::U128(x) => UInt128(x),
465        DataValue::Map(x) => {
466            let mut values = vec![];
467            let mut fields = vec![];
468            let mut sorted_keys = x.keys().collect::<Vec<_>>();
469            sorted_keys.sort();
470            for k in sorted_keys {
471                let value = x.get(k).expect(&format!("Key {key:?} should exists in hm"));
472                let dtype = crate::detect_dtype(value);
473                let k = Key::new(k, dtype);
474                values.push(into_polars_value(&k, value.to_owned()));
475                fields.push(Field::new(k.name().into(), polars_dtype(dtype)));
476            }
477            StructOwned(Box::new((values, fields)))
478        }
479    }
480}
481
482/// Converts a Polars [`AnyValue`](polars::prelude::AnyValue) back into a [`DataValue`].
483///
484/// Requires the `polars-df` feature.
485#[cfg(feature = "polars-df")]
486pub fn from_polars_value(dv: polars::prelude::AnyValue<'_>) -> DataValue {
487    use polars::prelude::AnyValue::*;
488    match dv {
489        Null => DataValue::Null,
490        Boolean(v) => v.into(),
491        String(v) => DataValue::String(v.into()),
492        UInt8(v) => DataValue::U8(v),
493        UInt16(v) => DataValue::U32(v as u32),
494        UInt32(v) => v.into(),
495        UInt64(v) => v.into(),
496        Int8(v) => (v as i32).into(),
497        Int16(v) => (v as i32).into(),
498        Int32(v) => v.into(),
499        Int64(v) => v.into(),
500        Float32(v) => v.into(),
501        Float64(v) => v.into(),
502        Int128(v) => v.into(),
503        List(series) => DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>()),
504        // Array(series, _) => {
505        //     DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>())
506        // }
507        StringOwned(v) => DataValue::String(v.as_str().into()),
508        Binary(v) => DataValue::Bytes(v.to_owned()),
509        BinaryOwned(v) => DataValue::Bytes(v),
510        StructOwned(m) => {
511            let mut hm: std::collections::HashMap<smartstring::alias::String, DataValue> =
512                std::collections::HashMap::new();
513            for (k, v) in m.1.into_iter().zip(m.0.into_iter()) {
514                hm.insert(k.name.as_str().into(), from_polars_value(v));
515            }
516            DataValue::Map(hm)
517        }
518        e => {
519            tracing::warn!("Unsupported polars value: {e:?}");
520            DataValue::Null
521        }
522    }
523}
524
525impl From<ColumnFrame> for DataFrame {
526    fn from(dataframe: ColumnFrame) -> Self {
527        Self::new(dataframe)
528    }
529}
530
531impl From<Vec<std::collections::HashMap<Key, DataValue>>> for DataFrame {
532    fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
533        Self::new(ColumnFrame::from(dataframe))
534    }
535}
536
537impl From<Vec<HashMap<Key, DataValue>>> for DataFrame {
538    fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
539        Self::new(ColumnFrame::from(dataframe))
540    }
541}
542
543impl From<std::collections::HashMap<String, Vec<DataValue>>> for DataFrame {
544    fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
545        Self::new(ColumnFrame::from(dataframe))
546    }
547}
548
549impl From<MLChefMap> for DataFrame {
550    fn from(dataframe: MLChefMap) -> Self {
551        Self::new(ColumnFrame::from(dataframe))
552    }
553}
554impl From<Vec<(Key, Vec<DataValue>)>> for DataFrame {
555    fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
556        Self::new(ColumnFrame::from(dataframe))
557    }
558}
559
560impl From<std::collections::HashMap<String, Array1<DataValue>>> for DataFrame {
561    fn from(dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
562        Self::new(ColumnFrame::from(dataframe))
563    }
564}
565
566#[cfg(feature = "polars-df")]
567impl From<polars::prelude::DataFrame> for DataFrame {
568    fn from(dataframe: polars::prelude::DataFrame) -> Self {
569        Self::new(ColumnFrame::from(dataframe))
570    }
571}
572#[cfg(test)]
573mod test {
574    use crate::filter::FilterRules;
575
576    use super::*;
577    use halfbrown::hashmap;
578    #[cfg(feature = "polars-df")]
579    use polars::prelude::NamedFrom as _;
580    use rstest::*;
581    use tracing_test::traced_test;
582    #[fixture]
583    fn dummy_candidates() -> ColumnFrame {
584        ColumnFrame::from(vec![
585            hashmap! {
586                "key1".into() => 1.into(),
587                "key2".into() => "a".into(),
588            },
589            hashmap! {
590                "key1".into() => 2.into(),
591                "key2".into() => "b".into(),
592            },
593        ])
594    }
595
596    #[rstest]
597    fn test_serde() {
598        let df = crate::df! {
599            "a" => [1u64, 2u64, 3u64],
600            "b" => [4u64, 5u64, 6u64],
601            "c" => [7u64, 8u64, 9u64]
602        };
603
604        let serialized = serde_json::to_string(&df).expect("BUG: Unable to serialize dataframe");
605
606        let deserialized =
607            serde_json::from_str(&serialized).expect("BUG: Unable to deserialize dataframe");
608
609        assert_eq!(df, deserialized);
610    }
611
612    #[cfg(feature = "polars-df")]
613    #[rstest]
614    fn test_polars() {
615        let expected = crate::df! {
616            "a" => [1u64, 2u64, 3u64],
617            "b" => [4f64, 5f64, 6f64],
618            "c" => [7i64, 8i64, 9i64]
619        };
620
621        let polars_df = polars::df!(
622            "a" => [1u64, 2u64, 3u64],
623            "b" => [4f64, 5f64, 6f64],
624            "c" => [7i64, 8i64, 9i64]
625        )
626        .expect("BUG: should be ok");
627        let as_df: DataFrame = polars_df.into();
628        let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into()];
629        assert_eq!(
630            as_df.select(Some(keys.as_slice())),
631            expected.select(Some(keys.as_slice()))
632        );
633    }
634    #[cfg(feature = "polars-df")]
635    use crate::DataType;
636    #[cfg(feature = "polars-df")]
637    #[rstest]
638    #[case::str(Key::new("a", DataType::String), DataValue::String("test".into()), polars::prelude::AnyValue::String("test".into()))]
639    #[case::u32(
640        Key::new("a", DataType::U32),
641        DataValue::U32(u32::MAX),
642        polars::prelude::AnyValue::UInt32(u32::MAX)
643    )]
644    #[case::i32(
645        Key::new("a", DataType::I32),
646        DataValue::I32(i32::MIN),
647        polars::prelude::AnyValue::Int32(i32::MIN)
648    )]
649    #[case::i64(
650        Key::new("a", DataType::I64),
651        DataValue::I64(i64::MIN),
652        polars::prelude::AnyValue::Int64(i64::MIN)
653    )]
654    #[case::u64(
655        Key::new("a", DataType::U64),
656        DataValue::U64(u64::MIN),
657        polars::prelude::AnyValue::UInt64(u64::MIN)
658    )]
659    #[case::f32(
660        Key::new("a", DataType::F32),
661        DataValue::F32(f32::MIN),
662        polars::prelude::AnyValue::Float32(f32::MIN)
663    )]
664    #[case::f64(
665        Key::new("a", DataType::F64),
666        DataValue::F64(f64::MIN),
667        polars::prelude::AnyValue::Float64(f64::MIN)
668    )]
669    #[case::null(
670        Key::new("a", DataType::Unknown),
671        DataValue::Null,
672        polars::prelude::AnyValue::Null
673    )]
674    #[case::i128(
675        Key::new("a", DataType::I128),
676        DataValue::I128(i128::MIN),
677        polars::prelude::AnyValue::Int128(i128::MIN)
678    )]
679    #[case::u8(
680        Key::new("a", DataType::U8),
681        DataValue::U8(255),
682        polars::prelude::AnyValue::UInt8(255)
683    )]
684    #[case::bool(
685        Key::new("a", DataType::Bool),
686        DataValue::Bool(true),
687        polars::prelude::AnyValue::Boolean(true)
688    )]
689    #[case::bytes(Key::new("a", DataType::Bytes), DataValue::Bytes("aaaaa".as_bytes().to_vec()), polars::prelude::AnyValue::BinaryOwned("aaaaa".as_bytes().to_vec()))]
690    #[case::vec_uints(Key::new("a", DataType::Vec), DataValue::Vec(vec![DataValue::U32(0), DataValue::U32(1)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::UInt32(1)])))]
691    #[case::map(Key::new("a", DataType::Map), DataValue::Map(data_value::stdhashmap!("a" => 0u64, "b" => "s")), polars::prelude::AnyValue::StructOwned(Box::new((
692        vec![polars::prelude::AnyValue::UInt64(0u64), polars::prelude::AnyValue::String("s".into())],
693        vec![polars::prelude::Field::new("a".into(), polars::prelude::DataType::UInt64), polars::prelude::Field::new("b".into(), polars::prelude::DataType::String)]))))]
694    // polars converts all by first element type
695    // #[case::vec_diff_int(DataValue::Vec(vec![ DataValue::I32(1), DataValue::U32(0)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::Int32(1i32), polars::prelude::AnyValue::UInt32(0u32)])))]
696    //#[case::vec_int_str(DataValue::Vec(vec![DataValue::U32(0), DataValue::String("1".into())]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::StringOwned("1".into())])))]
697    fn into_polars_value_test(
698        #[case] key: Key,
699        #[case] input: DataValue,
700        #[case] output: polars::prelude::AnyValue<'static>,
701    ) {
702        assert_eq!(into_polars_value(&key, input.clone()), output);
703        assert_eq!(from_polars_value(output), input);
704    }
705
706    // #[cfg(feature = "polars-df")]
707    // #[rstest]
708    // fn as_polars() {
709    //     let state = include_bytes!("../part_00330.dfb");
710    //     let df: Result<DataFrame, _> = rmp_serde::decode::from_slice(state);
711    //     assert!(df.is_ok());
712    //     let df = df.unwrap();
713    //     println!("{df}");
714    //     let polars_df = df.as_polars();
715    //     assert!(polars_df.is_ok(), "{polars_df:?}");
716    // }
717    #[rstest]
718    #[case(
719        DataFrame::new(crate::column_frame! {
720            "a" => [1f64, 2f64, 3f64],
721            "b" => [4i64, 5i64, 6i64],
722            "c" => [7i64, 8i64, 9i64]
723        }),
724        DataFrame::new(crate::column_frame! {
725            "a" => [1f64, 2f64],
726            "b" => [4i64, 5i64],
727            "c" => [7i64, 8i64]
728        }),
729        FilterRules::try_from("a >= 1f64 && (b <= 5 || c <= 8) && b >= 4").expect("BUG: cannot create filter rules"),
730    )]
731    #[case(
732        DataFrame::new(crate::column_frame! {
733            "a" => [1f64, 2f64, 3f64],
734            "b" => [4i64, 5i64, 6i64],
735            "c" => [7i64, 8i64, 9i64]
736        }),
737        DataFrame::new(crate::column_frame! {
738            "a" => [2f64],
739            "b" => [5i64],
740            "c" => [8i64]
741        }),
742        FilterRules::try_from("a % 2f64 == 0f64").expect("BUG: cannot create filter rules"),
743    )]
744    #[traced_test]
745    fn filter_test(
746        #[case] df: DataFrame,
747        #[case] expected: DataFrame,
748        #[case] filter: FilterRules,
749    ) {
750        let filtered = df.filter(&filter).expect("BUG: cannot filter");
751        assert_eq!(filtered, expected);
752    }
753
754    #[rstest]
755    fn test_serde_complex() {
756        let simple = r#"
757{
758    "constants": {},
759    "dataframe": {
760        "index": {
761            "keys": [
762                {
763                    "key": 3162770485,
764                    "name": "a",
765                    "ctype": "U32"
766                },
767                {
768                    "key": 2279056742,
769                    "name": "b",
770                    "ctype": "F64"
771                },
772                {
773                    "key": 2994984227,
774                    "name": "c",
775                    "ctype": "U64"
776                },
777                {
778                    "key": 3319645144,
779                    "name": "d",
780                    "ctype": "F64"
781                },
782                {
783                    "key": 1291847470,
784                    "name": "e",
785                    "ctype": "U32"
786                },
787                {
788                    "key": 874241070,
789                    "name": "f",
790                    "ctype": "Bool"
791                }
792            ],
793            "indexes": {
794                "a": 0,
795                "b": 1,
796                "c": 2,
797                "d": 3,
798                "e": 4,
799                "f": 5
800            },
801            "alias": {}
802        },
803        "data_frame": {
804            "v": 1,
805            "dim": [
806                2,
807                6
808            ],
809            "data": [
810                253780,
811                0.009369421750307085,
812                1633222860381359,
813                8,
814                5,
815                true,
816                64512,
817                0.003391335718333721,
818                1633222860810557,
819                8,
820                5,
821                null
822            ]
823        }
824    },
825    "metadata": {}
826}
827        "#;
828
829        let simple_deserialized: DataFrame =
830            serde_json::from_str(simple).expect("BUG: Unable to deserialize dataframe");
831
832        println!("deserialized: {simple_deserialized:?}");
833        let array = format!("[{}, {}, {}]", simple, simple, simple);
834        let deserialized: Vec<DataFrame> =
835            serde_json::from_str(&array).expect("BUG: Unable to deserialize dataframe");
836
837        println!("deserialized: {deserialized:?}");
838        assert_eq!(deserialized.len(), 3);
839        assert_eq!(simple_deserialized, deserialized[0]);
840    }
841
842    #[rstest]
843    #[case(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
844    #[case(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
845    #[case(vec![hashmap! {
846        "key1".into() => 1.into(),
847        "key2".into() => "a".into(),
848    },
849    hashmap! {
850        "key1".into() => 2.into(),
851    },])]
852    #[case(vec![data_value::stdhashmap! {
853        "key1" => DataValue::from(1),
854        "key2" => DataValue::from("a"),
855    },data_value::stdhashmap! {
856        "key1" => DataValue::from(2),
857    },])]
858    #[case(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(),
859    vec![DataValue::from("a"), DataValue::Null])])]
860    fn test_select_column<T: Into<DataFrame>>(#[case] input: T) {
861        let df: DataFrame = input.into();
862        assert_eq!(
863            df,
864            DataFrame {
865                constants: HashMap::new(),
866                dataframe: ColumnFrame::from(vec![
867                    hashmap! {
868                        "key1".into() => 1.into(),
869                        "key2".into() => "a".into(),
870                    },
871                    hashmap! {
872                        "key1".into() => 2.into(),
873                    },
874                ]),
875                metadata: HashMap::new(),
876            }
877        );
878        let selected_transposed = df.select_column("key1".into());
879        assert!(selected_transposed.is_some());
880        let selected_transposed = selected_transposed.unwrap();
881        assert_eq!(selected_transposed.len(), 2);
882        assert_eq!(selected_transposed, ndarray::array![1.into(), 2.into()]);
883    }
884
885    #[rstest]
886    #[case::hhm(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
887    #[case::stdhm(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
888    #[case::hm({
889        let hm: std::collections::HashMap<String, Array1<DataValue>> = data_value::stdhashmap!("key1".to_string() => Array1::from_vec(vec![DataValue::from(1), DataValue::from(2)]), "key2".to_string() => Array1::from_vec(vec![DataValue::from("a"), DataValue::Null]));
890        hm
891    })]
892    #[case::vec_hhm(vec![hashmap! {
893        "key1".into() => 1.into(),
894        "key2".into() => "a".into(),
895    },
896    hashmap! {
897        "key1".into() => 2.into(),
898    },])]
899    #[case::vec_hme(vec![data_value::stdhashmap! {
900        "key1" => DataValue::from(1),
901        "key2" => DataValue::from("a"),
902    },data_value::stdhashmap! {
903        "key1" => DataValue::from(2),
904    },])]
905    #[case::vec_vec(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(), vec![DataValue::from("a"), DataValue::Null])])]
906    fn test_from_conversion<T: Into<DataFrame>>(#[case] input: T) {
907        let df: DataFrame = input.into();
908        let expected: DataFrame = DataFrame {
909            constants: HashMap::new(),
910            dataframe: ColumnFrame::from(vec![
911                hashmap! {
912                    "key1".into() => 1.into(),
913                    "key2".into() => "a".into(),
914                },
915                hashmap! {
916                    "key1".into() => 2.into(),
917                },
918            ]),
919            metadata: HashMap::new(),
920        };
921        assert_eq!(
922            df.select(Some(&["key1".into(), "key2".into()])),
923            expected.select(Some(&["key1".into(), "key2".into()])),
924            "{df} vs {expected}"
925        );
926        let selected_transposed = df.select_transposed_typed::<i32>(&["key1".into()]);
927        assert_eq!(selected_transposed.len(), 2);
928        println!("{:?}", selected_transposed);
929        assert_eq!(selected_transposed, vec![vec![1], vec![2]]);
930    }
931    #[rstest]
932    fn test_dataframe(dummy_candidates: ColumnFrame) {
933        let mut dataframe: DataFrame = DataFrame::default();
934        assert!(dataframe.is_empty());
935        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
936        assert_eq!(dataframe.len(), 2);
937
938        let candidate = hashmap! {
939            "key1".into() => 3.into(),
940            "key2".into() => "c".into(),
941        };
942
943        assert!(dataframe.push(candidate).is_ok());
944        assert_eq!(dataframe.len(), 3);
945        assert!(!dataframe.is_empty());
946
947        dataframe.insert_constant("key3".into(), 4.into());
948        assert_eq!(dataframe.constants.len(), 1);
949        assert!(dataframe
950            .apply_function(&["key1".into()], |keys, df| {
951                let key = keys[0].clone();
952                let s = df
953                    .get_single_column(&key)
954                    .expect("BUG: Cannot get column")
955                    .to_owned();
956                let s = s.mapv(|x| x + DataValue::from(1));
957                df.add_single_column("key5", s)?;
958                Ok(())
959            })
960            .is_ok());
961        let original = dataframe.clone();
962        dataframe.shrink();
963        let remove_df = dataframe.remove_column(&["key1".into()]);
964        assert!(remove_df.is_ok());
965        let mut remove_df = remove_df.unwrap();
966        assert_eq!(remove_df.len(), 3);
967        let selected = dataframe.select(Some(&["key2".into()]));
968        assert!(selected.is_ok());
969        let selected = selected.unwrap();
970        println!("{:?}", selected);
971
972        // fixme later
973        let joined_result =
974            remove_df.join(dataframe, &JoinRelation::new(crate::JoinBy::AddColumns));
975        assert!(joined_result.is_ok(), "{:?}", joined_result);
976        let keys = vec!["key1".into(), "key2".into(), "key5".into()];
977        assert_eq!(
978            original.select(Some(keys.as_slice())),
979            remove_df.select(Some(keys.as_slice()))
980        );
981    }
982
983    #[rstest]
984    fn test_size_methods() {
985        let candidate = hashmap! {
986            "key1".into() => 3.into(),
987            "key2".into() => "c".into(),
988            "key3".into() => false.into()
989        };
990
991        let dataframe: DataFrame = vec![candidate].into();
992
993        assert_eq!(dataframe.n_columns(), 3);
994        assert_eq!(dataframe.n_rows(), 1);
995    }
996
997    #[rstest]
998    fn test_metadata(dummy_candidates: ColumnFrame) {
999        let mut dataframe: DataFrame = DataFrame::default();
1000        assert!(dataframe.is_empty());
1001        println!("{:?}", dataframe);
1002        assert!(dataframe.extend(dummy_candidates.into()).is_ok());
1003        println!("{:?}", dataframe);
1004        assert_eq!(dataframe.len(), 2);
1005
1006        dataframe.add_metadata("test".into(), 1.into());
1007        assert_eq!(dataframe.get_metadata("test"), Some(&1.into()));
1008        let dataframe = DataFrame::new(ColumnFrame::from(vec![
1009            hashmap! {
1010                "key1".into() => 1.into(),
1011                "key2".into() => "a".into(),
1012            },
1013            hashmap! {
1014                "key1".into() => 2.into(),
1015                "key2".into() => "b".into(),
1016            },
1017        ]));
1018        assert_eq!(dataframe.get_metadata("test"), None);
1019        let tt = dataframe.select_transposed(None);
1020        assert!(tt.is_ok());
1021        let tt = tt.unwrap();
1022        assert_eq!(tt.shape(), [2, 2]);
1023        assert_eq!(
1024            tt,
1025            Array2::from_shape_vec((2, 2), vec![1.into(), 2.into(), "a".into(), "b".into()])
1026                .unwrap()
1027        );
1028    }
1029
1030    #[rstest]
1031    #[traced_test]
1032    fn add_single_column_test() {
1033        let mut dataframe = DataFrame::default();
1034        let values = Array1::from(vec![1.into(), 2.into(), 3.into()]);
1035        let r = dataframe.add_single_column("key1", values);
1036        assert!(r.is_ok(), "{r:?}");
1037        let selected = dataframe.select(None);
1038        assert!(selected.is_ok());
1039        let selected = selected.unwrap();
1040        assert_eq!(selected.shape(), [3, 1]);
1041        assert_eq!(
1042            selected,
1043            Array2::from_shape_vec((3, 1), vec![1.into(), 2.into(), 3.into()]).unwrap()
1044        );
1045        let values = Array1::from(vec![1.into(), 2.into()]);
1046        assert!(dataframe.add_single_column("key1", values).is_err());
1047        let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1048        assert!(dataframe.add_single_column("key2", values).is_ok());
1049        let values = Array1::from(vec![3.into()]);
1050        assert!(dataframe.add_single_column("key3", values).is_err());
1051    }
1052
1053    #[rstest]
1054    #[traced_test]
1055    fn add_single_column_empty_test() {
1056        let mut dataframe = DataFrame::default();
1057        let values = Array1::from(vec![]);
1058        let r = dataframe.add_single_column("key1", values);
1059        assert!(r.is_ok(), "{r:?}");
1060        let selected = dataframe.select(None);
1061        assert!(selected.is_ok());
1062        let selected = selected.unwrap();
1063        assert_eq!(selected.shape(), [0, 1]);
1064        assert_eq!(selected, Array2::from_shape_vec((0, 1), vec![]).unwrap());
1065        let values = Array1::from(vec![1.into(), 2.into()]);
1066        assert!(dataframe.add_single_column("key1", values).is_err());
1067        let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1068        assert!(dataframe.add_single_column("key2", values).is_ok());
1069        let values = Array1::from(vec![3.into(), 4.into()]);
1070        assert!(dataframe.add_single_column("key3", values).is_err());
1071        let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
1072        assert!(dataframe.add_single_column("key3", values).is_ok());
1073
1074        assert_eq!(
1075            dataframe
1076                .select_column("key1".into())
1077                .expect("BUG: has to exists"),
1078            ndarray::arr1(&[DataValue::Null, DataValue::Null, DataValue::Null]),
1079        );
1080        assert_eq!(
1081            dataframe
1082                .select_column("key2".into())
1083                .expect("BUG: has to exists"),
1084            ndarray::arr1(&[3.into(), 4.into(), 5.into()]),
1085        );
1086        assert_eq!(
1087            dataframe.select(None).expect("BUG: cannot get data"),
1088            ndarray::arr2(&[
1089                [DataValue::Null, 3.into(), 3.into()],
1090                [DataValue::Null, 4.into(), 4.into()],
1091                [DataValue::Null, 5.into(), 5.into()],
1092            ])
1093        );
1094    }
1095
1096    #[rstest]
1097    #[case(
1098        DataFrame::new(ColumnFrame::from(vec![
1099            hashmap! {
1100                "k".into() => 1.into(),
1101                "k2".into() => 2.into(),
1102                "k3".into() => 2.2.into(),
1103            },
1104            hashmap! {
1105                "k".into() => 11.into(),
1106                "k2".into() => 3.into(),
1107            },
1108            hashmap! {
1109                "k".into() => 4.into(),
1110                "k2".into() => 5.into(),
1111                "k3".into() => 2.3.into(),
1112            },
1113            hashmap! {
1114                "k".into() => 4.into(),
1115                "k2".into() => 5.into(),
1116                "k3".into() => 2.4.into(),
1117            },
1118        ])),
1119        vec!["k".into(), "k2".into()],
1120        Array2::from_shape_vec((4, 2), vec![1.into(), 2.into(), 11.into(), 3.into(), 4.into(), 5.into(), 4.into(), 5.into()]).unwrap()
1121    )]
1122    #[case(
1123        DataFrame::new(ColumnFrame::from(vec![
1124            hashmap! {
1125                "k".into() => 1.into(),
1126                "k2".into() => 2.into(),
1127                "k3".into() => 2.2.into(),
1128            },
1129            hashmap! {
1130                "k".into() => 11.into(),
1131                "k2".into() => 3.into(),
1132            },
1133            hashmap! {
1134                "k".into() => 4.into(),
1135                "k2".into() => 5.into(),
1136                "k3".into() => 2.3.into(),
1137            },
1138            hashmap! {
1139                "k".into() => 4.into(),
1140                "k2".into() => 5.into(),
1141                "k3".into() => 2.4.into(),
1142            },
1143        ])),
1144        vec!["k2".into(), "k3".into(), "nonexist1".into(), "nonexists2".into(), "k".into()],
1145        Array2::from_shape_vec((4, 5), vec![
1146            2.into(), 2.2.into(), DataValue::Null, DataValue::Null, 1.into(),
1147            3.into(), DataValue::Null, DataValue::Null, DataValue::Null, 11.into(),
1148            5.into(), 2.3.into(),  DataValue::Null, DataValue::Null, 4.into(),
1149            5.into(), 2.4.into(), DataValue::Null, DataValue::Null, 4.into()]).unwrap()
1150    )]
1151    #[traced_test]
1152    fn select_multiple(
1153        #[case] input: DataFrame,
1154        #[case] columns: Vec<Key>,
1155        #[case] expected: Array2<DataValue>,
1156    ) {
1157        let selected = input.select(Some(&columns));
1158        assert!(selected.is_ok());
1159        let selected = selected.unwrap();
1160
1161        assert_eq!(selected, expected);
1162    }
1163
1164    #[rstest]
1165    #[case(
1166        DataFrame::new(ColumnFrame::from(vec![
1167            hashmap! {
1168                "k".into() => 1.into(),
1169                "k2".into() => 2.into(),
1170                "k3".into() => 2.2.into(),
1171            },
1172            hashmap! {
1173                "k".into() => 11.into(),
1174                "k2".into() => 3.into(),
1175            },
1176            hashmap! {
1177                "k".into() => 4.into(),
1178                "k2".into() => 5.into(),
1179                "k3".into() => 2.3.into(),
1180            },
1181            hashmap! {
1182                "k".into() => 4.into(),
1183                "k2".into() => 5.into(),
1184                "k3".into() => 2.4.into(),
1185            },
1186        ])),
1187        "k".into(),
1188        Array2::from_shape_vec((4, 3), vec![
1189            1.into(), 2.into(), 2.2.into(),
1190            4.into(), 5.into(), 2.3.into(),
1191            4.into(), 5.into(), 2.4.into(),
1192            11.into(), 3.into(), DataValue::Null,
1193            ]
1194        ).unwrap(),
1195        vec!["k".into(), "k2".into(), "k3".into()],
1196    )]
1197    #[rstest]
1198    #[case(
1199        DataFrame::new(ColumnFrame::from(vec![
1200            hashmap! {
1201                "k".into() => 1.into(),
1202                "k2".into() => 2.into(),
1203                "k3".into() => 2.2.into(),
1204            },
1205            hashmap! {
1206                "k".into() => 11.into(),
1207                "k2".into() => 3.into(),
1208            },
1209            hashmap! {
1210                "k".into() => 4.into(),
1211                "k2".into() => 5.into(),
1212                "k3".into() => 2.3.into(),
1213            },
1214            hashmap! {
1215                "k".into() => 4.into(),
1216                "k2".into() => 5.into(),
1217                "k3".into() => 2.4.into(),
1218            },
1219        ])),
1220        "k3".into(),
1221        Array2::from_shape_vec((4, 3), vec![
1222            11.into(), 3.into(), DataValue::Null,
1223            1.into(), 2.into(), 2.2.into(),
1224            4.into(), 5.into(), 2.3.into(),
1225            4.into(), 5.into(), 2.4.into(),
1226            ]
1227        ).unwrap(),
1228        vec!["k".into(), "k2".into(), "k3".into()],
1229    )]
1230    #[case(
1231        DataFrame::new(ColumnFrame::from(vec![
1232            hashmap! {
1233                "k".into() => 2.into(),
1234                "k2".into() => 0.000001.into(),
1235            },
1236            hashmap! {
1237                "k".into() => 1.into(),
1238                "k2".into() =>0.0000001.into(),
1239            },
1240            hashmap! {
1241                "k".into() => 3.into(),
1242                "k2".into() => 0.00001.into(),
1243            },
1244            hashmap! {
1245                "k".into() => 4.into(),
1246                "k2".into() => 0.001.into(),
1247            },
1248        ])),
1249        "k2".into(),
1250        Array2::from_shape_vec((4, 2), vec![
1251            1.into(), 0.0000001.into(),
1252            2.into(), 0.000001.into(),
1253            3.into(), 0.00001.into(),
1254            4.into(), 0.001.into(),
1255            ]
1256        ).unwrap(),
1257        vec!["k".into(), "k2".into()],
1258    )]
1259    #[case(
1260        DataFrame::new(ColumnFrame::from(vec![
1261            hashmap! {
1262                "k".into() => 2.into(),
1263                "k2".into() => "b".into(),
1264            },
1265            hashmap! {
1266                "k".into() => 1.into(),
1267                "k2".into() =>"a".into(),
1268            },
1269            hashmap! {
1270                "k".into() => 3.into(),
1271                "k2".into() =>"c".into(),
1272            },
1273            hashmap! {
1274                "k".into() => 4.into(),
1275                "k2".into() =>"z".into(),
1276            },
1277        ])),
1278        "k2".into(),
1279        Array2::from_shape_vec((4, 2), vec![
1280            1.into(),"a".into(),
1281            2.into(), "b".into(),
1282            3.into(), "c".into(),
1283            4.into(), "z".into(),
1284            ]
1285        ).unwrap(),
1286        vec!["k".into(), "k2".into()],
1287    )]
1288    #[traced_test]
1289    fn sort_by(
1290        #[case] input: DataFrame,
1291        #[case] column: Key,
1292        #[case] expected: Array2<DataValue>,
1293        #[case] columns: Vec<Key>,
1294    ) {
1295        let result = input.sorted(&column);
1296        assert!(result.is_ok(), "{result:?}");
1297        let result = result.unwrap().get_sorted();
1298        let selected = result.select(Some(&columns));
1299
1300        assert_eq!(selected, expected);
1301    }
1302    #[rstest]
1303    #[case(
1304        DataFrame::new(ColumnFrame::from(vec![
1305            hashmap! {
1306                "k".into() => 2.into(),
1307                "k2".into() => 0.000001.into(),
1308            },
1309            hashmap! {
1310                "k".into() => 1.into(),
1311                "k2".into() =>0.0000001.into(),
1312            },
1313            hashmap! {
1314                "k".into() => 3.into(),
1315                "k2".into() => 0.00001.into(),
1316            },
1317            hashmap! {
1318                "k".into() => 4.into(),
1319                "k2".into() => 0.001.into(),
1320            },
1321        ])),
1322        "k2".into(),
1323        TopN::Last(1),
1324        Array2::from_shape_vec((1, 2), vec![
1325            4.into(), 0.001.into(),
1326            ]
1327        ).unwrap(),
1328        vec!["k".into(), "k2".into()],
1329    )]
1330    #[case(
1331        DataFrame::new(ColumnFrame::from(vec![
1332            hashmap! {
1333                "k".into() => 2.into(),
1334                "k2".into() => 0.000001.into(),
1335            },
1336            hashmap! {
1337                "k".into() => 1.into(),
1338                "k2".into() =>0.0000001.into(),
1339            },
1340            hashmap! {
1341                "k".into() => 3.into(),
1342                "k2".into() => 0.00001.into(),
1343            },
1344            hashmap! {
1345                "k".into() => 4.into(),
1346                "k2".into() => 0.001.into(),
1347            },
1348        ])),
1349        "k2".into(),
1350        TopN::Last(2),
1351        Array2::from_shape_vec((2, 2), vec![
1352            4.into(), 0.001.into(),
1353            3.into(), 0.00001.into(),
1354            ]
1355        ).unwrap(),
1356        vec!["k".into(), "k2".into()],
1357    )]
1358    #[case(
1359        DataFrame::new(ColumnFrame::from(vec![
1360            hashmap! {
1361                "k".into() => 2.into(),
1362                "k2".into() => "b".into(),
1363            },
1364            hashmap! {
1365                "k".into() => 1.into(),
1366                "k2".into() =>"a".into(),
1367            },
1368            hashmap! {
1369                "k".into() => 3.into(),
1370                "k2".into() =>"c".into(),
1371            },
1372            hashmap! {
1373                "k".into() => 4.into(),
1374                "k2".into() =>"z".into(),
1375            },
1376        ])),
1377        "k2".into(),
1378        TopN::First(1),
1379        Array2::from_shape_vec((1, 2), vec![
1380            1.into(),"a".into(),
1381            ]
1382        ).unwrap(),
1383        vec!["k".into(), "k2".into()],
1384    )]
1385    #[case(
1386        DataFrame::new(ColumnFrame::from(vec![
1387            hashmap! {
1388                "k".into() => 2.into(),
1389                "k2".into() => "b".into(),
1390            },
1391            hashmap! {
1392                "k".into() => 1.into(),
1393                "k2".into() =>"a".into(),
1394            },
1395            hashmap! {
1396                "k".into() => 3.into(),
1397                "k2".into() =>"c".into(),
1398            },
1399            hashmap! {
1400                "k".into() => 4.into(),
1401                "k2".into() =>"z".into(),
1402            },
1403        ])),
1404        "k2".into(),
1405        TopN::First(2),
1406        Array2::from_shape_vec((2, 2), vec![
1407            1.into(),"a".into(),
1408            2.into(),"b".into(),
1409            ]
1410        ).unwrap(),
1411        vec!["k".into(), "k2".into()],
1412    )]
1413    #[traced_test]
1414    fn top_n(
1415        #[case] input: DataFrame,
1416        #[case] column: Key,
1417        #[case] topn: TopN,
1418        #[case] expected: Array2<DataValue>,
1419        #[case] columns: Vec<Key>,
1420    ) {
1421        let result = input.sorted(&column);
1422        assert!(result.is_ok(), "{result:?}");
1423        let result = result.unwrap();
1424        let first = result.topn(topn).unwrap();
1425        let selected = first.select(Some(&columns));
1426        assert_eq!(selected, expected);
1427    }
1428
1429    #[rstest]
1430    fn test_messagepack_roundtrip_empty_dataframe() {
1431        let df = DataFrame::default();
1432
1433        let bytes = df
1434            .store_into_messagepack()
1435            .expect("failed to serialize empty df");
1436        let restored =
1437            DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize empty df");
1438        assert_eq!(df, restored);
1439        assert!(restored.is_empty());
1440    }
1441
1442    #[rstest]
1443    fn test_messagepack_roundtrip_strings_and_bools() {
1444        // Strings and bools are preserved exactly by messagepack
1445        let df = DataFrame::new(ColumnFrame::from(vec![
1446            hashmap! {
1447                "str".into() => DataValue::String("hello".into()),
1448                "bool".into() => DataValue::Bool(true),
1449            },
1450            hashmap! {
1451                "str".into() => DataValue::String("".into()),
1452                "bool".into() => DataValue::Bool(false),
1453            },
1454        ]));
1455
1456        let bytes = df.store_into_messagepack().expect("failed to serialize");
1457        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1458        assert_eq!(df, restored);
1459    }
1460
1461    #[rstest]
1462    fn test_messagepack_roundtrip_f64_values() {
1463        let df = DataFrame::new(ColumnFrame::from(vec![
1464            hashmap! {
1465                "a".into() => DataValue::F64(3.14),
1466            },
1467            hashmap! {
1468                "a".into() => DataValue::F64(-2.718),
1469            },
1470        ]));
1471
1472        let bytes = df.store_into_messagepack().expect("failed to serialize");
1473        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1474        assert_eq!(df, restored);
1475    }
1476
1477    #[rstest]
1478    fn test_messagepack_f64_special_values_survive_roundtrip() {
1479        // f64::INFINITY serializes/deserializes but PartialEq may differ due to
1480        // DataValue Eq semantics; verify at the value level
1481        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1482            "a".into() => DataValue::F64(f64::INFINITY),
1483        }]));
1484
1485        let bytes = df.store_into_messagepack().expect("failed to serialize");
1486        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1487        assert_eq!(restored.len(), 1);
1488        let col = restored.select_column("a".into()).expect("col exists");
1489        match &col[0] {
1490            DataValue::F64(v) => assert!(v.is_infinite() && v.is_sign_positive()),
1491            other => panic!("expected F64, got {other:?}"),
1492        }
1493    }
1494
1495    #[rstest]
1496    fn test_messagepack_roundtrip_with_nulls() {
1497        let df = DataFrame::new(ColumnFrame::from(vec![
1498            hashmap! {
1499                "a".into() => DataValue::String("x".into()),
1500                "b".into() => DataValue::String("y".into()),
1501            },
1502            hashmap! {
1503                "a".into() => DataValue::String("z".into()),
1504                // "b" missing => Null
1505            },
1506        ]));
1507
1508        let bytes = df.store_into_messagepack().expect("failed to serialize");
1509        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1510        assert_eq!(df, restored);
1511    }
1512
1513    #[rstest]
1514    fn test_messagepack_roundtrip_with_metadata() {
1515        let mut df = DataFrame::new(crate::column_frame! {
1516            "col" => ["a", "b"]
1517        });
1518        df.add_metadata("name".into(), DataValue::String("test_df".into()));
1519        df.add_metadata("flag".into(), DataValue::Bool(true));
1520
1521        let bytes = df.store_into_messagepack().expect("failed to serialize");
1522        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1523        assert_eq!(df, restored);
1524        assert_eq!(
1525            restored.get_metadata("name"),
1526            Some(&DataValue::String("test_df".into()))
1527        );
1528        assert_eq!(restored.get_metadata("flag"), Some(&DataValue::Bool(true)));
1529    }
1530
1531    #[rstest]
1532    fn test_messagepack_roundtrip_with_constants() {
1533        let mut df = DataFrame::new(crate::column_frame! {
1534            "x" => ["a", "b"]
1535        });
1536        df.insert_constant("const_key".into(), DataValue::String("const_val".into()));
1537        df.insert_constant("const_flag".into(), DataValue::Bool(false));
1538
1539        let bytes = df.store_into_messagepack().expect("failed to serialize");
1540        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1541        assert_eq!(df, restored);
1542        assert_eq!(
1543            restored.constants.get(&"const_key".into()),
1544            Some(&DataValue::String("const_val".into()))
1545        );
1546    }
1547
1548    #[rstest]
1549    fn test_messagepack_integer_type_coercion() {
1550        // MessagePack uses compact integer encoding: small I64 values may
1551        // deserialize as U8/U32 etc. This test documents this lossy behavior.
1552        let df = crate::df! {
1553            "a" => [1i64, 2i64, 3i64]
1554        };
1555
1556        let bytes = df.store_into_messagepack().expect("failed to serialize");
1557        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1558
1559        // The row count is preserved even if integer types differ
1560        assert_eq!(restored.len(), 3);
1561
1562        // Values that fit in u8 get coerced to U8 by messagepack
1563        let col = restored
1564            .select_column("a".into())
1565            .expect("column should exist");
1566        // Values are semantically equivalent but may be different DataValue variants
1567        assert_ne!(
1568            col[0],
1569            DataValue::I64(1),
1570            "messagepack coerces small ints to compact types"
1571        );
1572    }
1573
1574    #[rstest]
1575    fn test_messagepack_large_i64_preserved() {
1576        // Values that exceed u32 range stay as large integer types
1577        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1578            "big".into() => DataValue::I64(i64::MIN),
1579        }]));
1580
1581        let bytes = df.store_into_messagepack().expect("failed to serialize");
1582        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1583        assert_eq!(df, restored);
1584    }
1585
1586    #[rstest]
1587    fn test_messagepack_load_invalid_bytes() {
1588        let result = DataFrame::load_from_messagepack(&[0xFF, 0xFE, 0xFD, 0x00]);
1589        assert!(result.is_err());
1590    }
1591
1592    #[rstest]
1593    fn test_messagepack_load_empty_bytes() {
1594        let result = DataFrame::load_from_messagepack(&[]);
1595        assert!(result.is_err());
1596    }
1597
1598    #[rstest]
1599    fn test_messagepack_load_truncated_bytes() {
1600        let df = DataFrame::new(ColumnFrame::from(vec![
1601            hashmap! {
1602                "a".into() => DataValue::String("hello world".into()),
1603                "b".into() => DataValue::Bool(true),
1604            },
1605            hashmap! {
1606                "a".into() => DataValue::String("test".into()),
1607                "b".into() => DataValue::Bool(false),
1608            },
1609        ]));
1610        let bytes = df.store_into_messagepack().expect("failed to serialize");
1611        // Truncate to half
1612        let truncated = &bytes[..bytes.len() / 2];
1613        let result = DataFrame::load_from_messagepack(truncated);
1614        assert!(result.is_err());
1615    }
1616
1617    #[rstest]
1618    fn test_messagepack_roundtrip_with_nested_vec_data() {
1619        let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
1620            "vec_col".into() => DataValue::Vec(vec![
1621                DataValue::String("a".into()),
1622                DataValue::String("b".into()),
1623            ]),
1624            "bytes_col".into() => DataValue::Bytes(vec![0, 1, 255]),
1625        }]));
1626
1627        let bytes = df.store_into_messagepack().expect("failed to serialize");
1628        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1629        assert_eq!(df, restored);
1630    }
1631
1632    #[rstest]
1633    fn test_messagepack_roundtrip_preserves_row_count() {
1634        let df = DataFrame::new(ColumnFrame::from(vec![
1635            hashmap! { "a".into() => DataValue::String("x".into()) },
1636            hashmap! { "a".into() => DataValue::String("y".into()) },
1637            hashmap! { "a".into() => DataValue::String("z".into()) },
1638        ]));
1639
1640        let bytes = df.store_into_messagepack().expect("failed to serialize");
1641        let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
1642        assert_eq!(restored.len(), 3);
1643        assert_eq!(restored.n_rows(), 3);
1644        assert_eq!(restored.n_columns(), 1);
1645    }
1646
1647    #[rstest]
1648    fn test_messagepack_idempotent_double_roundtrip() {
1649        // Use types that survive messagepack coercion (strings, bools, bytes)
1650        let mut df = DataFrame::new(ColumnFrame::from(vec![
1651            hashmap! {
1652                "a".into() => DataValue::String("hello".into()),
1653                "b".into() => DataValue::Bool(true),
1654            },
1655            hashmap! {
1656                "a".into() => DataValue::String("world".into()),
1657                "b".into() => DataValue::Bool(false),
1658            },
1659        ]));
1660        df.add_metadata("meta".into(), DataValue::Bool(true));
1661        df.insert_constant("c".into(), DataValue::String("const".into()));
1662
1663        let bytes1 = df.store_into_messagepack().expect("first serialize");
1664        let restored1 = DataFrame::load_from_messagepack(&bytes1).expect("first deserialize");
1665        let bytes2 = restored1
1666            .store_into_messagepack()
1667            .expect("second serialize");
1668        let restored2 = DataFrame::load_from_messagepack(&bytes2).expect("second deserialize");
1669
1670        assert_eq!(df, restored2);
1671        assert_eq!(bytes1, bytes2);
1672    }
1673
1674    #[rstest]
1675    fn test_messagepack_single_byte_payload() {
1676        // A single valid msgpack byte (e.g. fixint) should fail as incomplete DataFrame
1677        let result = DataFrame::load_from_messagepack(&[0x01]);
1678        assert!(result.is_err());
1679    }
1680
1681    // === hash_datavalue public API edge case tests ===
1682
1683    #[rstest]
1684    fn test_hash_datavalue_public_api_accessible() {
1685        // Verify the re-exported function works from the crate root
1686        let val = DataValue::I32(42);
1687        let h = crate::hash_datavalue(&val);
1688        // Deterministic
1689        assert_eq!(h, crate::hash_datavalue(&DataValue::I32(42)));
1690    }
1691
1692    #[rstest]
1693    fn test_hash_datavalue_vec_length_matters() {
1694        // [1] and [1, Null] should produce different hashes
1695        let short = DataValue::Vec(vec![DataValue::I32(1)]);
1696        let long = DataValue::Vec(vec![DataValue::I32(1), DataValue::Null]);
1697        assert_ne!(crate::hash_datavalue(&short), crate::hash_datavalue(&long));
1698    }
1699
1700    #[rstest]
1701    fn test_hash_datavalue_map_different_keys_same_values() {
1702        let mut m1 = std::collections::HashMap::new();
1703        m1.insert("a".into(), DataValue::I32(1));
1704        let mut m2 = std::collections::HashMap::new();
1705        m2.insert("b".into(), DataValue::I32(1));
1706
1707        assert_ne!(
1708            crate::hash_datavalue(&DataValue::Map(m1)),
1709            crate::hash_datavalue(&DataValue::Map(m2))
1710        );
1711    }
1712
1713    #[rstest]
1714    fn test_hash_datavalue_empty_string_vs_empty_bytes() {
1715        let empty_str = DataValue::String("".into());
1716        let empty_bytes = DataValue::Bytes(vec![]);
1717        assert_ne!(
1718            crate::hash_datavalue(&empty_str),
1719            crate::hash_datavalue(&empty_bytes)
1720        );
1721    }
1722
1723    #[rstest]
1724    fn test_hash_datavalue_empty_vec_vs_empty_map() {
1725        let empty_vec = DataValue::Vec(vec![]);
1726        let empty_map = DataValue::Map(std::collections::HashMap::new());
1727        assert_ne!(
1728            crate::hash_datavalue(&empty_vec),
1729            crate::hash_datavalue(&empty_map)
1730        );
1731    }
1732
1733    #[rstest]
1734    fn test_hash_datavalue_i128_boundary_values() {
1735        let max = DataValue::I128(i128::MAX);
1736        let min = DataValue::I128(i128::MIN);
1737        let zero = DataValue::I128(0);
1738        let neg_one = DataValue::I128(-1);
1739
1740        // All distinct
1741        let hashes: std::collections::HashSet<u64> = [&max, &min, &zero, &neg_one]
1742            .iter()
1743            .map(|v| crate::hash_datavalue(v))
1744            .collect();
1745        assert_eq!(hashes.len(), 4);
1746    }
1747
1748    #[rstest]
1749    fn test_hash_datavalue_u128_boundary_values() {
1750        let max = DataValue::U128(u128::MAX);
1751        let zero = DataValue::U128(0);
1752        let one = DataValue::U128(1);
1753        // u128::MAX is all bits set; ensure it differs from i128(-1) which is also all bits
1754        let i128_neg1 = DataValue::I128(-1);
1755
1756        assert_ne!(
1757            crate::hash_datavalue(&max),
1758            crate::hash_datavalue(&i128_neg1)
1759        );
1760        let hashes: std::collections::HashSet<u64> = [&max, &zero, &one]
1761            .iter()
1762            .map(|v| crate::hash_datavalue(v))
1763            .collect();
1764        assert_eq!(hashes.len(), 3);
1765    }
1766
1767    #[rstest]
1768    fn test_hash_datavalue_f64_special_values() {
1769        // NaN bit patterns: NaN == NaN for hashing since we use to_bits()
1770        let nan1 = DataValue::F64(f64::NAN);
1771        let nan2 = DataValue::F64(f64::NAN);
1772        assert_eq!(crate::hash_datavalue(&nan1), crate::hash_datavalue(&nan2));
1773
1774        // subnormal
1775        let subnormal = DataValue::F64(f64::MIN_POSITIVE / 2.0);
1776        let normal = DataValue::F64(f64::MIN_POSITIVE);
1777        assert_ne!(
1778            crate::hash_datavalue(&subnormal),
1779            crate::hash_datavalue(&normal)
1780        );
1781    }
1782
1783    #[rstest]
1784    fn test_hash_datavalue_enum_number_vs_i32_same_value() {
1785        // EnumNumber(42) and I32(42) should hash differently (different discriminant)
1786        let enum_val = DataValue::EnumNumber(42);
1787        let i32_val = DataValue::I32(42);
1788        assert_ne!(
1789            crate::hash_datavalue(&enum_val),
1790            crate::hash_datavalue(&i32_val)
1791        );
1792    }
1793
1794    #[rstest]
1795    fn get_single_column_typed_f64_from_i32() {
1796        let df = crate::df! {
1797            "a" => [1i32, 2i32, 3i32]
1798        };
1799        let key: Key = "a".into();
1800        let col = df.get_single_column_typed::<f64>(&key).unwrap();
1801        assert_eq!(col, ndarray::arr1(&[1.0f64, 2.0, 3.0]));
1802    }
1803
1804    #[rstest]
1805    fn get_single_column_typed_string() {
1806        let df = crate::df! {
1807            "name" => ["alice", "bob"]
1808        };
1809        let key: Key = "name".into();
1810        let col = df.get_single_column_typed::<String>(&key).unwrap();
1811        assert_eq!(
1812            col,
1813            ndarray::arr1(&["alice".to_string(), "bob".to_string()])
1814        );
1815    }
1816
1817    #[rstest]
1818    fn get_single_column_typed_missing_key() {
1819        let df = crate::df! {
1820            "a" => [1u64, 2u64]
1821        };
1822        let missing: Key = "z".into();
1823        assert!(df.get_single_column_typed::<u64>(&missing).is_none());
1824    }
1825
1826    #[rstest]
1827    fn get_single_column_typed_matches_untyped() {
1828        let df = crate::df! {
1829            "v" => [10u64, 20u64, 30u64]
1830        };
1831        let key: Key = "v".into();
1832        let typed = df.get_single_column_typed::<u64>(&key).unwrap();
1833        let untyped = df.get_single_column(&key).unwrap();
1834        for (t, u) in typed.iter().zip(untyped.iter()) {
1835            assert_eq!(*t, u64::extract(u));
1836        }
1837    }
1838
1839    #[rstest]
1840    fn get_single_column_typed_bool_from_i32() {
1841        let df = crate::df! {
1842            "flag" => [1i32, 0i32, 1i32, 0i32]
1843        };
1844        let key: Key = "flag".into();
1845        let col = df.get_single_column_typed::<bool>(&key).unwrap();
1846        assert_eq!(col, ndarray::arr1(&[true, false, true, false]));
1847    }
1848
1849    #[rstest]
1850    fn get_single_column_typed_i64_from_u32() {
1851        let df = crate::df! {
1852            "x" => [10u32, 20u32, 30u32]
1853        };
1854        let key: Key = "x".into();
1855        let col = df.get_single_column_typed::<i64>(&key).unwrap();
1856        assert_eq!(col, ndarray::arr1(&[10i64, 20i64, 30i64]));
1857    }
1858
1859    #[rstest]
1860    fn get_single_column_typed_f64_truncation_to_i32() {
1861        let df = crate::df! {
1862            "v" => [1.9f64, 2.1f64, 3.7f64]
1863        };
1864        let key: Key = "v".into();
1865        let col = df.get_single_column_typed::<i32>(&key).unwrap();
1866        assert_eq!(col, ndarray::arr1(&[1i32, 2i32, 3i32]));
1867    }
1868
1869    #[rstest]
1870    fn get_single_column_typed_single_element() {
1871        let df = crate::df! {
1872            "solo" => [42u64]
1873        };
1874        let key: Key = "solo".into();
1875        let col = df.get_single_column_typed::<f64>(&key).unwrap();
1876        assert_eq!(col.len(), 1);
1877        assert_eq!(col[0], 42.0);
1878    }
1879
1880    #[rstest]
1881    fn select_typed_all_columns() {
1882        let df = crate::df! {
1883            "a" => [1i32, 2i32, 3i32],
1884            "b" => [4i32, 5i32, 6i32]
1885        };
1886        let result = df.select_typed::<f64>(None).unwrap();
1887        assert_eq!(result.nrows(), 3);
1888        assert_eq!(result.ncols(), 2);
1889        assert_eq!(result[[0, 0]], 1.0);
1890        assert_eq!(result[[0, 1]], 4.0);
1891        assert_eq!(result[[2, 0]], 3.0);
1892        assert_eq!(result[[2, 1]], 6.0);
1893    }
1894
1895    #[rstest]
1896    fn select_typed_specific_keys() {
1897        let df = crate::df! {
1898            "x" => [10u64, 20u64],
1899            "y" => [30u64, 40u64],
1900            "z" => [50u64, 60u64]
1901        };
1902        let keys: Vec<Key> = vec!["x".into(), "z".into()];
1903        let result = df.select_typed::<i64>(Some(&keys)).unwrap();
1904        assert_eq!(result.nrows(), 2);
1905        assert_eq!(result.ncols(), 2);
1906        assert_eq!(result[[0, 0]], 10i64);
1907        assert_eq!(result[[0, 1]], 50i64);
1908        assert_eq!(result[[1, 0]], 20i64);
1909        assert_eq!(result[[1, 1]], 60i64);
1910    }
1911
1912    #[rstest]
1913    fn select_typed_nonexistent_key_gives_empty() {
1914        let df = crate::df! {
1915            "a" => [1i32, 2i32]
1916        };
1917        let keys: Vec<Key> = vec!["missing".into()];
1918        let result = df.select_typed::<f64>(Some(&keys)).unwrap();
1919        assert_eq!(result.shape(), &[0, 0]);
1920    }
1921
1922    #[rstest]
1923    fn select_typed_matches_select_with_extract() {
1924        let df = crate::df! {
1925            "a" => [1u64, 2u64, 3u64],
1926            "b" => [4u64, 5u64, 6u64]
1927        };
1928        let typed = df.select_typed::<f64>(None).unwrap();
1929        let manual = df.select(None).unwrap().mapv(|v| f64::extract(&v));
1930        assert_eq!(typed, manual);
1931    }
1932
1933    #[rstest]
1934    fn select_typed_string_values() {
1935        let df = crate::df! {
1936            "name" => ["alice", "bob", "carol"]
1937        };
1938        let result = df.select_typed::<String>(None).unwrap();
1939        assert_eq!(result[[0, 0]], "alice");
1940        assert_eq!(result[[1, 0]], "bob");
1941        assert_eq!(result[[2, 0]], "carol");
1942    }
1943
1944    #[rstest]
1945    fn select_typed_cross_numeric_coercion() {
1946        // i32 values extracted as u64
1947        let df = crate::df! {
1948            "a" => [1i32, 2i32, 3i32]
1949        };
1950        let result = df.select_typed::<u64>(None).unwrap();
1951        assert_eq!(result[[0, 0]], 1u64);
1952        assert_eq!(result[[1, 0]], 2u64);
1953        assert_eq!(result[[2, 0]], 3u64);
1954    }
1955}