use scirs2_core::ndarray::Array2;
use scirs2_io::csv::{read_csv_chunked, CsvReaderConfig, CsvWriterConfig};
use std::error::Error;
use std::time::{Duration, Instant};
#[allow(dead_code)]
fn main() -> Result<(), Box<dyn Error>> {
println!("=== Chunked CSV Processing Example ===\n");
let rows = 10000;
let cols = 5;
println!(
"Creating a large dataset ({} rows x {} columns)...",
rows, cols
);
let start = Instant::now();
let mut large_data = Array2::<String>::from_elem((rows, cols), String::new());
for i in 0..rows {
for j in 0..cols {
large_data[[i, j]] = format!("Value_{}_{}", i, j);
}
}
let headers = (0..cols)
.map(|i| format!("Column{}", i))
.collect::<Vec<_>>();
println!("Writing large dataset to CSV file...");
let config = CsvWriterConfig {
always_quote: true, ..Default::default()
};
scirs2_io::csv::write_csv(
"scirs2-io/examples/large_dataset.csv",
&large_data,
Some(&headers),
Some(config),
)?;
println!("Dataset created in {:.2?}", start.elapsed());
let chunk_sizes = [100, 500, 1000, 2000];
for &chunk_size in &chunk_sizes {
println!("\nProcessing with chunk size: {}", chunk_size);
let start_time = Instant::now();
let mut total_rows = 0;
let mut total_chunks = 0;
let reader_config = CsvReaderConfig::default();
read_csv_chunked(
"scirs2-io/examples/large_dataset.csv",
Some(reader_config),
chunk_size,
|_, chunk| {
total_chunks += 1;
total_rows += chunk.shape()[0];
std::thread::sleep(Duration::from_millis(5));
true },
)?;
let elapsed = start_time.elapsed();
println!("Processed {} rows in {} chunks", total_rows, total_chunks);
println!("Time taken: {:.2?}", elapsed);
println!(
"Average time per chunk: {:.2?}",
elapsed / total_chunks as u32
);
}
println!("\nPerforming statistical analysis on chunks...");
let mut total_rows = 0;
let mut column_lengths = vec![0; cols];
read_csv_chunked(
"scirs2-io/examples/large_dataset.csv",
None,
1000,
|_headers, chunk| {
println!("Processing chunk with {} rows", chunk.shape()[0]);
total_rows += chunk.shape()[0];
for j in 0..chunk.shape()[1] {
for i in 0..chunk.shape()[0] {
column_lengths[j] += chunk[[i, j]].len();
}
}
true },
)?;
println!("\nAverage string length by column:");
for (i, &total_length) in column_lengths.iter().enumerate() {
let avg_length = total_length as f64 / total_rows as f64;
println!("Column {}: {:.2} characters", i, avg_length);
}
println!("\nChunked CSV example completed successfully!");
Ok(())
}