[][src]Module peroxide::structure::dataframe

Pandas-like dataframe & series.

Series

1. Declare Series

  • To declare series, you should have Vec<T> where T is one of following types.
Primitive typeDType
usizeUSIZE
u8U8
u16U16
u32U32
u64U64
isizeISIZE
i8I8
i16I16
i32I32
i64I64
f32F32
f64F64
boolBool
charChar
StringStr
  • If you prepare Vec<T>, then Series::new(Vec<T>)

2. Methods for Series

  • TypedVector<T> trait for Series

    pub trait TypedVector<T> {
        fn new(v: Vec<T>) -> Self;
        fn to_vec(&self) -> Vec<T>;
        fn as_slice(&self) -> &[T];
        fn as_slice_mut(&mut self) -> &mut [T];
        fn at_raw(&self, i: usize) -> T;
        fn push(&mut self, elem: T);
    }
  • Series methods

    impl Series {
        pub fn at(&self, i: usize) -> Scalar;
        pub fn len(&self) -> usize;
        pub fn to_type(&self, dtype: DType) -> Series;
        pub fn as_type(&mut self, dtype: DType);
    }
    • at is simple getter for Series. It returns Scalar.
    • as_type is a method for mutable type casting.
      • All types can be changed to Str.
      • All integer & float types can be exchanged.
      • Bool, Char can be changed to Str or U8 only.
      • U8 can be changed to all types.

3. Example

extern crate peroxide;
use peroxide::fuga::*;

fn main() {
    let a = Series::new(vec![1, 2, 3, 4]);
    let b = Series::new(vec!['a', 'b', 'c', 'd']);
    let mut c = Series::new(vec![true, false, false, true]);

    a.print();       // print for Series
    b.dtype.print(); // print for dtype of Series (=Char)
    c.as_type(U8);   // Bool => U8 
     
    assert_eq!(c.dtype, U8);
}

DataFrame

1. Declare DataFrame

  • To declare dataframe, use constructor.
    • DataFrame::new(Vec<Series>)
extern crate peroxide;
use peroxide::fuga::*;

fn main() {
    // 1-1. Empty DataFrame
    let mut df = DataFrame::new(vec![]);

    // 1-2. Push Series
    df.push("a", Series::new(vec![1, 2, 3, 4]));
    df.push("b", Series::new(vec![0.1, 0.2, 0.3, 0.4]));
    df.push("c", Series::new(vec!['a', 'b', 'c', 'd']));

    // 1-3. Print
    df.print();

    // 2-1. Construct Series first
    let a = Series::new(vec![1, 2, 3, 4]);
    let b = Series::new(vec![0.1, 0.2, 0.3, 0.4]);
    let c = Series::new(vec!['a', 'b', 'c', 'd']);

    // 2-2. Declare DataFrame with exist Series
    let mut dg = DataFrame::new(vec![a, b, c]);

    // 2-3. Print or Set header
    dg.print();                         // But header: 0 1 2
    dg.set_header(vec!["a", "b", "c"]); // Change header
}

2. Methods for DataFrame

  • DataFrame method

    impl DataFrame {
        pub fn new(v: Vec<Series>) -> Self;
        pub fn header(&self) -> &Vec<String>;
        pub fn header_mut(&mut self) -> &mut Vec<String>;
        pub fn set_header(&mut self, new_header: Vec<&str>);
        pub fn push(&mut self, name: &str, series: Series);
        pub fn drop(&mut self, col_header: &str);
        pub fn row(&self, i: usize) -> DataFrame;
        pub fn spread(&self) -> String;
        pub fn as_types(&mut self, dtypes: Vec<DType>);
    }
    • push(&mut self, name: &str, series: Series): push head & Series pair
    • drop(&mut self, col_header: &str): drop specific column by header
    • row(&self, i: usize) -> DataFrame : Extract $i$-th row as new DataFrame
  • WithCSV trait

    pub trait WithCSV: Sized {
        fn write_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
        fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>>;
    }
    // Example for CSV
    #[macro_use]
    extern crate peroxide;
    use peroxide::fuga::*;
    
    fn main() -> Result<(), Box<dyn Error>> {
        // Write CSV
        let mut df = DataFrame::new(vec![]);
        df.push("a", Series::new(vec!['x', 'y', 'z']));
        df.push("b", Series::new(vec![0, 1, 2]));
        df.push("c", Series::new(c!(0.1, 0.2, 0.3)));
        df.write_csv("example_data/doc_csv.csv")?;
    
        // Read CSV
        let mut dg = DataFrame::read_csv("example_data/doc_csv.csv", ',')?;
        dg.as_types(vec![Char, I32, F64]);
    
        assert_eq!(df, dg);
    
        Ok(())
    }
  • WithNetCDF trait

    pub trait WithNetCDF: Sized {
        fn write_nc(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
        fn read_nc(file_path: &str) -> Result<Self, Box<dyn Error>>;
        fn read_nc_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>>;
    }
    • nc feature should be required
    • libnetcdf dependency should be required
    • Char, Bool are saved as U8 type. Thus, for reading Char or Bool type nc file, explicit type casting is required.
    #[macro_use]
    extern crate peroxide;
    use peroxide::fuga::*;
    
    fn main() -> Result<(), Box<dyn Error>> {
        // Write netcdf
        let mut df = DataFrame::new(vec![]);
        df.push("a", Series::new(vec!['x', 'y', 'z']));
        df.push("b", Series::new(vec![0, 1, 2]));
        df.push("c", Series::new(c!(0.1, 0.2, 0.3)));
        df.write_nc("example_data/doc_nc.nc")?;
    
        // Read netcdf
        let mut dg = DataFrame::read_nc("example_data/doc_nc.nc")?;
        dg[0].as_type(Char); // Char, Bool are only read/written as U8 type
    
        assert_eq!(df, dg);
    
        Ok(())
    }

Structs

DataFrame

Generic DataFrame structure

Scalar

Generic Scalar

Series

Generic Series

Enums

DType

Data Type enum

DTypeArray

Vector with DType

DTypeValue

Scalar with DType

Traits

TypedScalar
TypedVector
WithCSV

To handle CSV file format