excelstream 0.20.2

High-performance streaming Excel & CSV library with S3/GCS cloud support and Parquet conversion - Ultra-low memory usage
Documentation
//! Memory benchmark for different writer configurations
//!
//! Uses /proc/self/status to measure actual RSS memory during execution

use excelstream::writer::ExcelWriter;
use std::fs;
use std::time::Instant;

fn get_memory_kb() -> Option<usize> {
    let status = fs::read_to_string("/proc/self/status").ok()?;
    for line in status.lines() {
        if line.starts_with("VmRSS:") {
            let parts: Vec<&str> = line.split_whitespace().collect();
            return parts.get(1)?.parse().ok();
        }
    }
    None
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    println!("=== ExcelStream Memory Benchmark ===\n");
    println!("Testing with custom ZIP implementation (v0.9.0)");
    println!("Configuration: 30 columns with mixed data types\n");

    // Test configurations
    let configs = vec![("1M rows", 1_000_000)];

    for (name, num_rows) in configs {
        println!("📊 Test: {}", name);
        println!(
            "   Rows: {:?} × 30 columns = {} cells",
            num_rows,
            num_rows * 30
        );

        let mem_start = get_memory_kb().unwrap_or(0);
        let start = Instant::now();

        let filename = format!("memory_test_{}.xlsx", num_rows);
        let mut writer = ExcelWriter::new(&filename)?;

        // Write header
        let headers = [
            "ID",
            "Name",
            "Email",
            "Age",
            "Salary",
            "Active",
            "Score",
            "Department",
            "Join_Date",
            "Phone",
            "Address",
            "City",
            "Country",
            "Postal_Code",
            "Website",
            "Tax_ID",
            "Credit_Limit",
            "Balance",
            "Last_Login",
            "Status",
            "Notes",
            "Created_At",
            "Updated_At",
            "Version",
            "Priority",
            "Category",
            "Tags",
            "Description",
            "Metadata",
            "Reference",
        ];
        writer.write_header(headers)?;

        // Track peak memory during write
        let mut peak_mem = mem_start;

        for i in 1..=num_rows {
            let row = generate_row(i);
            let row_refs: Vec<&str> = row.iter().map(|s| s.as_str()).collect();
            writer.write_row(&row_refs)?;

            // Sample memory every 10K rows
            if i % 10_000 == 0 {
                if let Some(mem) = get_memory_kb() {
                    peak_mem = peak_mem.max(mem);
                }
            }
        }

        writer.save()?;

        let mem_end = get_memory_kb().unwrap_or(0);
        let elapsed = start.elapsed();

        // Get file size
        let file_size = fs::metadata(&filename)?.len();

        println!("   ✅ Completed in {:.2}s", elapsed.as_secs_f64());
        println!(
            "   Speed: {:.0} rows/sec",
            num_rows as f64 / elapsed.as_secs_f64()
        );
        println!(
            "   Throughput: {:.0} cells/sec",
            (num_rows * 30) as f64 / elapsed.as_secs_f64()
        );
        println!("   Memory start: {:.2} MB", mem_start as f64 / 1024.0);
        println!("   Memory peak:  {:.2} MB", peak_mem as f64 / 1024.0);
        println!("   Memory end:   {:.2} MB", mem_end as f64 / 1024.0);
        println!(
            "   Memory delta: {:.2} MB",
            (peak_mem - mem_start) as f64 / 1024.0
        );
        println!(
            "   File size:    {:.2} MB",
            file_size as f64 / (1024.0 * 1024.0)
        );
        println!();

        // Keep the last file for read testing
        if num_rows != 1_000_000 {
            let _ = fs::remove_file(&filename);
        } else {
            println!("   💾 File kept for read testing: {}", filename);
        }
    }

    println!("=== Summary ===");
    println!("✅ Custom ZIP implementation (v0.9.0)");
    println!("✅ ZeroTempWorkbook architecture");
    println!("✅ 30 columns: String, Int, Float, Date, Email, URL, etc.");
    println!("✅ Constant memory usage regardless of file size");
    println!("✅ No temp files, pure streaming");
    println!("✅ 2-3 MB RAM for any dataset size");

    Ok(())
}

// Generate a row with 30 columns of mixed data types
fn generate_row(row_num: usize) -> Vec<String> {
    vec![
        format!("{}", row_num),                                // ID
        format!("User_{}", row_num),                           // Name
        format!("user{}@example.com", row_num),                // Email
        format!("{}", 20 + (row_num % 50)),                    // Age
        format!("{:.2}", 30000.0 + (row_num as f64 * 123.45)), // Salary
        if row_num.is_multiple_of(2) {
            "true"
        } else {
            "false"
        }
        .to_string(), // Active
        format!("{:.1}", 50.0 + (row_num % 50) as f64),        // Score
        match row_num % 5 {
            // Department
            0 => "Engineering",
            1 => "Sales",
            2 => "Marketing",
            3 => "HR",
            _ => "Operations",
        }
        .to_string(),
        format!("2024-{:02}-{:02}", 1 + (row_num % 12), 1 + (row_num % 28)), // Join_Date
        format!("+1-555-{:04}-{:04}", row_num % 1000, row_num % 10000),      // Phone
        format!("{} Main Street", row_num),                                  // Address
        match row_num % 10 {
            // City
            0 => "New York",
            1 => "Los Angeles",
            2 => "Chicago",
            3 => "Houston",
            4 => "Phoenix",
            5 => "Philadelphia",
            6 => "San Antonio",
            7 => "San Diego",
            8 => "Dallas",
            _ => "San Jose",
        }
        .to_string(),
        "USA".to_string(),                                    // Country
        format!("{:05}", 10000 + (row_num % 90000)),          // Postal_Code
        format!("https://example{}.com", row_num),            // Website
        format!("TAX-{:08}", row_num),                        // Tax_ID
        format!("{:.2}", 5000.0 + (row_num as f64 * 50.0)),   // Credit_Limit
        format!("{:.2}", (row_num as f64 * 12.34) % 10000.0), // Balance
        format!(
            "2024-12-{:02} {:02}:{:02}:{:02}",
            1 + (row_num % 28), // Last_Login
            row_num % 24,
            row_num % 60,
            row_num % 60
        ),
        match row_num % 4 {
            // Status
            0 => "Active",
            1 => "Pending",
            2 => "Suspended",
            _ => "Inactive",
        }
        .to_string(),
        format!("Note for record #{}", row_num), // Notes
        format!(
            "2024-01-01T{:02}:{:02}:{:02}Z",
            row_num % 24, // Created_At
            row_num % 60,
            row_num % 60
        ),
        format!(
            "2024-12-01T{:02}:{:02}:{:02}Z",
            row_num % 24, // Updated_At
            row_num % 60,
            row_num % 60
        ),
        format!("v{}.{}.{}", row_num % 10, row_num % 100, row_num % 1000), // Version
        match row_num % 3 {
            // Priority
            0 => "High",
            1 => "Medium",
            _ => "Low",
        }
        .to_string(),
        match row_num % 6 {
            // Category
            0 => "Category A",
            1 => "Category B",
            2 => "Category C",
            3 => "Category D",
            4 => "Category E",
            _ => "Category F",
        }
        .to_string(),
        format!("tag1,tag{},tag{}", row_num % 10, row_num % 20), // Tags
        format!(
            "Description for record {} with some longer text to test performance",
            row_num
        ), // Description
        format!("{{\"key\":\"{}\",\"value\":{}}}", row_num, row_num * 100), // Metadata
        format!("REF-{:08}", row_num),                           // Reference
    ]
}