use polars::prelude::{DataFrame, CsvReader, JsonReader, CsvWriter, JsonWriter, SerReader, SerWriter};
use polars::io::csv::read::CsvReadOptions;
use std::path::{Path, PathBuf};
use anyhow::{Result, anyhow};
use tokio::fs;
#[derive(Debug, PartialEq)]
pub enum FileFormat {
Csv,
Json,
Unsupported,
}
pub fn detect_file_format(path: &Path) -> FileFormat {
match path.extension().and_then(|s| s.to_str()) {
Some("csv") => FileFormat::Csv,
Some("json") => FileFormat::Json,
_ => FileFormat::Unsupported,
}
}
pub async fn load_data(path: PathBuf, format: FileFormat) -> Result<DataFrame> {
match format {
FileFormat::Csv => {
let file = fs::File::open(path).await?;
CsvReader::new(file.into_std().await)
.with_options(
CsvReadOptions {
has_header: true,
ignore_errors: true, ..Default::default()
}
)
.finish()
.map_err(|e| anyhow!("Failed to load CSV: {}", e))
}
FileFormat::Json => {
let file = fs::File::open(path).await?;
JsonReader::new(file.into_std().await)
.finish()
.map_err(|e| anyhow!("Failed to load JSON: {}", e))
}
FileFormat::Unsupported => Err(anyhow!("Unsupported file format")),
}
}
pub fn validate_dataframe(df: &DataFrame) -> Result<()> {
if df.get_column_names().iter().find(|&name| *name == "id").is_none() {
return Err(anyhow!("Missing required column: 'id'"));
}
Ok(())
}
pub fn validate_ids(df: &DataFrame) -> Result<bool> {
let id_series = df.column("id")?;
let ids = id_series.i64()?;
let mut last_id: Option<i64> = None;
for id in ids.into_iter().flatten() {
if let Some(last) = last_id {
if id != last + 1 {
return Ok(false);
}
}
last_id = Some(id);
}
Ok(true)
}
pub async fn save_data(df: &mut DataFrame, path: &Path) -> Result<()> {
let format = detect_file_format(path);
match format {
FileFormat::Csv => {
let file = fs::File::create(path).await?;
CsvWriter::new(file.into_std().await)
.finish(df)
.map_err(|e| anyhow!("Failed to save CSV: {}", e))?;
}
FileFormat::Json => {
let file = fs::File::create(path).await?;
JsonWriter::new(file.into_std().await)
.finish(df)
.map_err(|e| anyhow!("Failed to save JSON: {}", e))?;
}
FileFormat::Unsupported => return Err(anyhow!("Unsupported file format for saving")),
}
Ok(())
}