trs_dataframe/dataframe/column_store/
sorted_df.rs

1use ndarray::{s, Array2};
2use serde::Serialize;
3
4use crate::{dataframe::TopN, error::Error};
5
6use super::ColumnFrame;
7
8#[derive(Debug, Clone, Serialize, PartialEq)]
9pub struct SortedDataFrame<'a> {
10    pub row_indicies: Vec<usize>,
11    pub df: &'a ColumnFrame,
12}
13
14impl<'a> SortedDataFrame<'a> {
15    pub fn new(df: &'a ColumnFrame, row_indicies: Vec<usize>) -> Self {
16        Self { row_indicies, df }
17    }
18
19    pub fn get_sorted(&self) -> ColumnFrame {
20        let mut df = self.df.data_frame.clone(); //Array2::default((df.nrows(), df.ncols()));
21
22        self.row_indicies
23            .iter()
24            .enumerate()
25            .for_each(|(cur_idx, row_idx)| {
26                df.slice_mut(s![cur_idx, ..])
27                    .assign(&self.df.data_frame.slice(s![*row_idx, ..]));
28            });
29
30        ColumnFrame::new(self.df.index.clone(), df)
31    }
32
33    pub fn topn(&self, topn: TopN) -> Result<ColumnFrame, Error> {
34        let nrows = self.df.len();
35        let ncols = self.df.index.len();
36        if nrows == 0 {
37            return Ok(ColumnFrame::new(self.df.index.clone(), Default::default()));
38        }
39        // todo this is not efficient but for now it is ok
40        let arr = match topn {
41            TopN::First(topn) => {
42                let mut arr = Array2::default((topn, ncols));
43                let mut idx = 0;
44                for row_idx in self.row_indicies.iter().take(topn) {
45                    arr.row_mut(idx)
46                        .assign(&self.df.data_frame.slice(s![*row_idx, ..]));
47                    idx += 1;
48                    if idx == topn {
49                        break;
50                    }
51                }
52                arr
53            }
54            TopN::Last(topn) => {
55                let mut arr = Array2::default((topn, ncols));
56                let mut idx = 0;
57                for row_idx in self.row_indicies.iter().rev().take(topn) {
58                    arr.row_mut(idx)
59                        .assign(&self.df.data_frame.slice(s![*row_idx, ..]));
60                    idx += 1;
61                    if idx == topn {
62                        break;
63                    }
64                }
65                arr
66            }
67        };
68
69        Ok(ColumnFrame::new(self.df.index.clone(), arr))
70    }
71}