trs-dataframe 0.11.0

Dataframe library for Teiresias
Documentation
use data_value::DataValue;
use serde::Serialize;

use crate::{dataframe::TopN, error::Error};

use super::{typed_array::TypedDataArray, ColumnFrame};

/// A sorted view over a [`ColumnFrame`], holding row indices in sort order.
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct SortedDataFrame<'a> {
    /// Row indices in sorted order.
    pub row_indicies: Vec<usize>,
    /// The underlying frame being viewed.
    pub df: &'a ColumnFrame,
}

impl<'a> SortedDataFrame<'a> {
    /// Wraps `df` with pre-computed sorted `row_indicies`.
    pub fn new(df: &'a ColumnFrame, row_indicies: Vec<usize>) -> Self {
        Self { row_indicies, df }
    }

    fn gather(&self, indices: &[usize]) -> Vec<TypedDataArray> {
        self.df
            .data_frame
            .iter()
            .map(|col| {
                let values: Vec<DataValue> =
                    indices.iter().map(|&idx| col.get_or_null(idx)).collect();
                TypedDataArray::new(col.data_type(), values)
            })
            .collect()
    }

    /// Materializes the sorted view into a new [`ColumnFrame`].
    pub fn get_sorted(&self) -> ColumnFrame {
        let new_data = self.gather(&self.row_indicies);
        ColumnFrame::new(self.df.index.clone(), new_data)
    }

    /// Returns a [`ColumnFrame`] containing only the first or last `n` rows
    /// according to the sort order.
    pub fn topn(&self, topn: TopN) -> Result<ColumnFrame, Error> {
        let nrows = self.df.nrows();
        if nrows == 0 {
            return Ok(ColumnFrame::new(
                self.df.index.clone(),
                Vec::<TypedDataArray>::new(),
            ));
        }

        let indices: Vec<usize> = match topn {
            TopN::First(n) => self.row_indicies.iter().take(n).copied().collect(),
            TopN::Last(n) => self.row_indicies.iter().rev().take(n).copied().collect(),
        };

        let new_data = self.gather(&indices);
        Ok(ColumnFrame::new(self.df.index.clone(), new_data))
    }
}