plotlars_core/io/
parquet.rs1use std::fs::File;
2use std::path::{Path, PathBuf};
3
4use polars::frame::DataFrame;
5use polars::prelude::ParquetReader as PlParquetReader;
6use polars::prelude::*;
7
8use super::PlotlarsError;
9
10#[derive(Clone)]
24pub struct ParquetReader {
25 path: PathBuf,
26 columns: Option<Vec<String>>,
27 n_rows: Option<usize>,
28}
29
30impl ParquetReader {
31 pub fn new(path: impl AsRef<Path>) -> Self {
33 Self {
34 path: path.as_ref().to_path_buf(),
35 columns: None,
36 n_rows: None,
37 }
38 }
39
40 pub fn columns(mut self, columns: Vec<&str>) -> Self {
42 self.columns = Some(columns.into_iter().map(|s| s.to_string()).collect());
43 self
44 }
45
46 pub fn n_rows(mut self, n_rows: usize) -> Self {
48 self.n_rows = Some(n_rows);
49 self
50 }
51
52 pub fn finish(self) -> Result<DataFrame, PlotlarsError> {
58 let path_str = self.path.display().to_string();
59
60 let file = File::open(&self.path).map_err(|e| PlotlarsError::ParquetParse {
61 path: path_str.clone(),
62 source: Box::new(e),
63 })?;
64
65 let mut reader = PlParquetReader::new(file);
66
67 if let Some(n) = self.n_rows {
68 reader = reader.with_slice(Some((0, n)));
69 }
70
71 if let Some(cols) = self.columns {
72 reader = reader.with_columns(Some(cols));
73 }
74
75 reader.finish().map_err(|e| PlotlarsError::ParquetParse {
76 path: path_str,
77 source: Box::new(e),
78 })
79 }
80}
81
82#[cfg(test)]
83mod tests {
84 use super::*;
85
86 fn create_test_parquet(name: &str) -> PathBuf {
87 let path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
88 .join(format!("../../target/plotlars_test_{name}.parquet"));
89
90 let mut df = df!(
91 "a" => [1, 2, 3],
92 "b" => ["x", "y", "z"]
93 )
94 .unwrap();
95
96 let file = std::fs::File::create(&path).unwrap();
97 ParquetWriter::new(file).finish(&mut df).unwrap();
98 path
99 }
100
101 #[test]
102 fn read_parquet_default() {
103 let path = create_test_parquet("default");
104 let df = ParquetReader::new(&path).finish().unwrap();
105 assert_eq!(df.height(), 3);
106 assert_eq!(df.width(), 2);
107 }
108
109 #[test]
110 fn read_parquet_select_columns() {
111 let path = create_test_parquet("select_columns");
112 let df = ParquetReader::new(&path)
113 .columns(vec!["a"])
114 .finish()
115 .unwrap();
116 assert_eq!(df.width(), 1);
117 }
118
119 #[test]
120 fn read_parquet_n_rows() {
121 let path = create_test_parquet("n_rows");
122 let df = ParquetReader::new(&path).n_rows(2).finish().unwrap();
123 assert_eq!(df.height(), 2);
124 }
125
126 #[test]
127 fn read_parquet_file_not_found() {
128 let result = ParquetReader::new("nonexistent.parquet").finish();
129 assert!(result.is_err());
130 }
131}