use lk_inside::data_core::loader;
use lk_inside::data_core::analyzer::DataFrameAnalyzer;
use std::path::PathBuf;
use anyhow::Result;
use polars::prelude::{col, lit, *};
#[tokio::test]
async fn test_csv_data_loading_and_validation() -> Result<()> {
let file_path = PathBuf::from("examples/large_sample.csv");
let format = loader::detect_file_format(&file_path);
let df = loader::load_data(file_path.clone(), format).await?;
loader::validate_dataframe(&df)?;
assert_eq!(df.height(), 20); assert_eq!(df.width(), 5); Ok(())
}
#[tokio::test]
async fn test_json_data_loading_and_validation() -> Result<()> {
let file_path = PathBuf::from("examples/sample_data.json");
let format = loader::detect_file_format(&file_path);
let df = loader::load_data(file_path.clone(), format).await?;
loader::validate_dataframe(&df)?;
assert_eq!(df.height(), 3); assert_eq!(df.width(), 3); Ok(())
}
#[tokio::test]
async fn test_data_ranking() -> Result<()> {
let file_path = PathBuf::from("examples/large_sample.csv");
let format = loader::detect_file_format(&file_path);
let df = loader::load_data(file_path.clone(), format).await?;
loader::validate_dataframe(&df)?;
let analyzer = DataFrameAnalyzer::new(df); let ranked_df = analyzer.rank_by_column("value")?;
assert_eq!(ranked_df.height(), 20); assert_eq!(ranked_df.width(), 5);
let values: Vec<f64> = ranked_df.column("value")?
.f64()?
.into_iter()
.filter_map(|x| x)
.collect();
assert_eq!(values, vec![310.10, 300.10, 260.00, 250.00, 210.75, 200.75, 185.60, 175.60, 160.20, 150.20, 130.90, 120.90, 110.50, 100.50, 100.25, 90.30, 90.25, 80.30, 60.00, 50.00]);
Ok(())
}
#[tokio::test] async fn test_descriptive_statistics() -> Result<()> {
let file_path = PathBuf::from("examples/sample_data.csv");
let format = loader::detect_file_format(&file_path);
let df = loader::load_data(file_path.clone(), format).await?;
loader::validate_dataframe(&df)?;
let analyzer = DataFrameAnalyzer::new(df);
let stats_df = analyzer.get_descriptive_statistics()?;
assert_eq!(stats_df.height(), 7); assert!(stats_df.width() >= 3, "Expected at least 3 columns (statistic, id, price), got {}", stats_df.width());
let mean_row_df = stats_df
.lazy()
.filter(col("Measure").eq(lit("mean"))) .collect()?;
let id_mean_val = mean_row_df.column("id")?.f64()?.get(0);
assert_eq!(id_mean_val, Some(2.0));
let price_mean_val = mean_row_df.column("price")?.f64()?.get(0);
assert_eq!(price_mean_val, Some(15.0));
Ok(())
}
#[test]
fn test_data_grouping_and_aggregation() -> Result<()> {
let df = DataFrame::new(vec![
polars::prelude::Series::new("category".into(), &["A", "B", "A", "C", "B"]).into(),
polars::prelude::Series::new("value".into(), &[10, 20, 15, 5, 25]).into(),
])?;
let analyzer = DataFrameAnalyzer::new(df);
let grouped_df = analyzer.group_and_aggregate(
"category",
&[("value", "sum")] )?;
assert_eq!(grouped_df.height(), 3);
assert_eq!(grouped_df.column("category").unwrap().str().unwrap().sort(false).into_series(), polars::prelude::Series::new("category".into(), &["A", "B", "C"]));
assert_eq!(grouped_df.column("value").unwrap().i32().unwrap().sort(false).into_series(), polars::prelude::Series::new("value".into(), &[5, 25, 45]));
Ok(())
}
#[tokio::test]
async fn test_data_filtering() -> Result<()> {
use lk_inside::analysis::filtering;
let file_path = PathBuf::from("examples/sample_data.csv");
let format = loader::detect_file_format(&file_path);
let df = loader::load_data(file_path.clone(), format).await?;
let filtered_df = filtering::apply_filter(&df, "price > 10.0")?;
assert_eq!(filtered_df.height(), 2);
assert_eq!(filtered_df.column("id")?.i64()?.get(0), Some(2));
assert_eq!(filtered_df.column("id")?.i64()?.get(1), Some(3));
assert_eq!(filtered_df.column("price")?.f64()?.get(0), Some(20.0));
assert_eq!(filtered_df.column("price")?.f64()?.get(1), Some(15.0));
Ok(())
}