Skip to main content

trs_dataframe/dataframe/column_store/
sorted_df.rs

1use data_value::DataValue;
2use serde::Serialize;
3
4use crate::{dataframe::TopN, error::Error};
5
6use super::{typed_array::TypedDataArray, ColumnFrame};
7
8/// A sorted view over a [`ColumnFrame`], holding row indices in sort order.
9#[derive(Debug, Clone, Serialize, PartialEq)]
10pub struct SortedDataFrame<'a> {
11    /// Row indices in sorted order.
12    pub row_indicies: Vec<usize>,
13    /// The underlying frame being viewed.
14    pub df: &'a ColumnFrame,
15}
16
17impl<'a> SortedDataFrame<'a> {
18    /// Wraps `df` with pre-computed sorted `row_indicies`.
19    pub fn new(df: &'a ColumnFrame, row_indicies: Vec<usize>) -> Self {
20        Self { row_indicies, df }
21    }
22
23    fn gather(&self, indices: &[usize]) -> Vec<TypedDataArray> {
24        self.df
25            .data_frame
26            .iter()
27            .map(|col| {
28                let values: Vec<DataValue> =
29                    indices.iter().map(|&idx| col.get_or_null(idx)).collect();
30                TypedDataArray::new(col.data_type(), values)
31            })
32            .collect()
33    }
34
35    /// Materializes the sorted view into a new [`ColumnFrame`].
36    pub fn get_sorted(&self) -> ColumnFrame {
37        let new_data = self.gather(&self.row_indicies);
38        ColumnFrame::new(self.df.index.clone(), new_data)
39    }
40
41    /// Returns a [`ColumnFrame`] containing only the first or last `n` rows
42    /// according to the sort order.
43    pub fn topn(&self, topn: TopN) -> Result<ColumnFrame, Error> {
44        let nrows = self.df.nrows();
45        if nrows == 0 {
46            return Ok(ColumnFrame::new(
47                self.df.index.clone(),
48                Vec::<TypedDataArray>::new(),
49            ));
50        }
51
52        let indices: Vec<usize> = match topn {
53            TopN::First(n) => self.row_indicies.iter().take(n).copied().collect(),
54            TopN::Last(n) => self.row_indicies.iter().rev().take(n).copied().collect(),
55        };
56
57        let new_data = self.gather(&indices);
58        Ok(ColumnFrame::new(self.df.index.clone(), new_data))
59    }
60}