use anyhow::Result;
use clap::Parser;
use polars::prelude::*;
use std::path::PathBuf;
use tracing::{info, Level};
mod analysis;
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
#[clap(short, long, value_parser)]
input: PathBuf,
#[clap(short, long, value_parser)]
output: Option<PathBuf>,
#[clap(short, long, default_value = "summary")]
analysis: String,
#[clap(short, long)]
columns: Option<String>,
#[clap(short, long)]
visualize: bool,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_max_level(Level::INFO)
.init();
let args = Args::parse();
info!("Starting data analysis on file: {:?}", args.input);
let df = load_dataset(&args.input)?;
info!("Dataset loaded with shape: {} rows × {} columns", df.height(), df.width());
let columns = args.columns.as_ref()
.map(|cols| cols.split(',').map(|s| s.trim().to_string()).collect())
.unwrap_or_else(Vec::new);
let result = match args.analysis.as_str() {
"summary" => analysis::summarize(&df, &columns)?,
"correlation" => analysis::calculate_correlations(&df, &columns)?,
"cluster" => analysis::cluster_data(&df, &columns)?,
"timeseries" => analysis::analyze_timeseries(&df, &columns)?,
_ => return Err(anyhow::anyhow!("Unknown analysis type: {}", args.analysis)),
};
println!("{}", result);
if let Some(output_path) = args.output {
info!("Saving analysis results to {:?}", output_path);
std::fs::write(&output_path, result)?;
}
if args.visualize {
info!("Generating visualizations");
let viz_path = args.output
.map(|p| p.with_extension("png"))
.unwrap_or_else(|| PathBuf::from("visualization.png"));
analysis::visualize(&df, &columns, &viz_path)?;
info!("Visualization saved to {:?}", viz_path);
}
info!("Analysis completed successfully");
Ok(())
}
fn load_dataset(path: &PathBuf) -> Result<DataFrame> {
let extension = path.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("");
let df = match extension.to_lowercase().as_str() {
"csv" => {
let file = std::fs::File::open(path)?;
CsvReader::new(file)
.infer_schema(Some(100))
.has_header(true)
.finish()?
},
"json" => {
let file = std::fs::File::open(path)?;
JsonReader::new(file)
.finish()?
},
"parquet" => {
let file = std::fs::File::open(path)?;
ParquetReader::new(file)
.finish()?
},
_ => return Err(anyhow::anyhow!("Unsupported file format: {}", extension)),
};
Ok(df)
}