1use crate::{standardize_collection, CollectionData, LblError};
2use polars::prelude::*;
3use std::collections::HashMap;
4use std::fs::File;
5use std::path::{Path, PathBuf};
6
7pub fn load_file(path: PathBuf, metadata: Option<&CollectionData>) -> Result<DataFrame, LblError> {
9 let metadata_owned;
10 let metadata_ref = match metadata {
11 Some(metadata) => metadata,
12 None => {
13 metadata_owned = parse_path_metadata(&path);
14 &metadata_owned
15 }
16 };
17 standardize_collection(load_raw_data(path.clone())?, metadata_ref)
18}
19
20pub fn parse_path_metadata(path: &Path) -> CollectionData {
22 let mut data = CollectionData::default();
23 if let Some(collection_dir) = path.parent() {
24 if let Some(collection_name) = collection_dir.file_name() {
25 data.collection = Some(collection_name.to_string_lossy().into_owned());
26 }
27
28 if let Some(network_dir) = collection_dir.parent() {
29 if let Some(network_name) = network_dir.file_name() {
30 data.network = Some(network_name.to_string_lossy().into_owned());
31 }
32 }
33 }
34
35 data
36}
37
38pub trait FlattenPathBufVec {
40 fn flatten_into_pathbuf(&self) -> Vec<PathBuf>;
42}
43
44impl FlattenPathBufVec for Vec<PathBuf> {
45 fn flatten_into_pathbuf(&self) -> Vec<PathBuf> {
46 self.clone()
47 }
48}
49
50impl FlattenPathBufVec for HashMap<String, Vec<PathBuf>> {
51 fn flatten_into_pathbuf(&self) -> Vec<PathBuf> {
52 self.iter()
53 .flat_map(|(_key, value)| value.clone())
54 .collect()
55 }
56}
57
58impl FlattenPathBufVec for HashMap<String, HashMap<String, Vec<PathBuf>>> {
59 fn flatten_into_pathbuf(&self) -> Vec<PathBuf> {
60 self.iter()
61 .flat_map(|(_outer_key, inner_map)| {
62 inner_map
63 .iter()
64 .flat_map(|(_inner_key, value)| value.clone())
65 })
66 .collect()
67 }
68}
69
70pub fn load_files<T: FlattenPathBufVec>(
72 paths: &T,
73 metadata: Option<&CollectionData>,
74) -> Result<DataFrame, LblError> {
75 let paths = paths.flatten_into_pathbuf();
76 let dfs: Result<Vec<DataFrame>, LblError> =
77 paths.into_iter().map(|p| load_file(p, metadata)).collect();
78 let mut dfs = dfs?;
79
80 let mut final_df = dfs.remove(0);
81 for df in dfs {
82 final_df = final_df.vstack(&df)?;
83 }
84 Ok(final_df)
85}
86
87pub fn load_raw_data(path: PathBuf) -> Result<DataFrame, LblError> {
89 let extension = path
90 .extension()
91 .and_then(std::ffi::OsStr::to_str)
92 .unwrap_or("");
93 match extension {
94 "csv" => {
95 let file = File::open(path)?;
96 let df = CsvReader::new(file).finish()?;
97 Ok(df)
98 }
99 "parquet" => {
100 let file = File::open(path)?;
101 let df = ParquetReader::new(file).finish()?;
102 Ok(df)
103 }
104 _ => Err(LblError::LblError(format!(
105 "Unsupported file type: {}",
106 extension
107 ))),
108 }
109}