use anyhow::Result;
use polars::prelude::*;
use std::fs::File;
use crate::sampling;
pub fn run(
file: &str,
outfile: &str,
percent: f64,
max_records: Option<usize>,
seed: Option<u64>,
) -> Result<()> {
let df = CsvReader::from_path(file)?
.infer_schema(max_records)
.has_header(true)
.finish()?;
let mut sampled_df = sampling::sample_df(&df, percent, seed)?;
let mut out = File::create(outfile).expect("Could not create output file");
let _ = CsvWriter::new(&mut out)
.has_header(true)
.with_separator(b',')
.finish(&mut sampled_df);
println!("sample_df dimensions: {:?}", sampled_df.shape());
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Read;
#[test]
fn test_csv_round_trip() {
let dir = tempfile::tempdir().unwrap();
let input_path = dir.path().join("input.csv");
let output_path = dir.path().join("output.csv");
std::fs::write(&input_path, "id,name\n1,alice\n2,bob\n3,charlie\n4,dave\n5,eve\n6,frank\n7,grace\n8,heidi\n9,ivan\n10,judy\n").unwrap();
run(
input_path.to_str().unwrap(),
output_path.to_str().unwrap(),
50.0,
Some(100),
Some(42),
)
.unwrap();
let mut contents = String::new();
File::open(&output_path)
.unwrap()
.read_to_string(&mut contents)
.unwrap();
let line_count = contents.lines().count();
assert_eq!(line_count, 6); }
}