formualizer-workbook 0.6.0

Ergonomic workbook API over the Formualizer engine (sheets, loaders, staging, undo/redo)
Documentation
use formualizer_common::LiteralValue;
#[cfg(any(feature = "umya", feature = "calamine"))]
use formualizer_workbook::SpreadsheetReader;
use formualizer_workbook::{
    CellData, LoadStrategy, SpreadsheetWriter, Workbook, WorkbookConfig, WorkbookLoadLimits,
};

fn assert_sparse_arrow_storage(
    wb: &Workbook,
    sheet: &str,
    expected_rows: u32,
    expected_cols: usize,
) {
    let asheet = wb
        .engine()
        .sheet_store()
        .sheet(sheet)
        .expect("arrow sheet exists");
    assert_eq!(asheet.nrows, expected_rows);
    assert_eq!(asheet.columns.len(), expected_cols);
    let dense_chunks: usize = asheet.columns.iter().map(|col| col.chunks.len()).sum();
    let max_dense_chunks = asheet
        .chunk_starts
        .len()
        .saturating_mul(asheet.columns.len());
    assert!(
        dense_chunks < max_dense_chunks,
        "sparse/adaptive ingest should not materialize every dense chunk"
    );
    assert!(
        asheet
            .columns
            .iter()
            .any(|col| !col.sparse_chunks.is_empty()),
        "expected at least one populated sparse chunk"
    );
}

#[cfg(feature = "json")]
use formualizer_workbook::JsonAdapter;
#[cfg(feature = "umya")]
use formualizer_workbook::UmyaAdapter;

#[cfg(all(feature = "calamine", feature = "umya"))]
use formualizer_workbook::CalamineAdapter;

#[cfg(feature = "umya")]
fn sparse_xlsx_bytes() -> Vec<u8> {
    let mut book = umya_spreadsheet::new_file();
    let sheet = book
        .get_sheet_by_name_mut("Sheet1")
        .expect("default sheet exists");
    sheet.get_cell_mut((1, 1_000)).set_value_number(1.0);

    let mut buf = Vec::new();
    umya_spreadsheet::writer::xlsx::write_writer(&book, &mut buf).expect("write xlsx bytes");
    buf
}

#[cfg(all(feature = "calamine", feature = "umya"))]
fn out_of_order_sparse_xlsx_bytes() -> Vec<u8> {
    let mut book = umya_spreadsheet::new_file();
    let sheet = book
        .get_sheet_by_name_mut("Sheet1")
        .expect("default sheet exists");
    sheet.get_cell_mut((1, 200)).set_value_number(200.0);
    sheet.get_cell_mut((1, 1)).set_value_number(1.0);

    let mut buf = Vec::new();
    umya_spreadsheet::writer::xlsx::write_writer(&book, &mut buf).expect("write xlsx bytes");

    let mut archive = zip::ZipArchive::new(std::io::Cursor::new(buf)).expect("read xlsx zip");
    let mut out = zip::ZipWriter::new(std::io::Cursor::new(Vec::new()));
    let options =
        zip::write::SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
    for i in 0..archive.len() {
        let mut file = archive.by_index(i).expect("zip entry");
        let name = file.name().to_string();
        out.start_file(name.as_str(), options)
            .expect("start zip entry");
        if name == "xl/worksheets/sheet1.xml" {
            let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
  <dimension ref="A1:A200"/>
  <sheetData>
    <row r="200"><c r="A200"><v>200</v></c></row>
    <row r="1"><c r="A1"><v>1</v></c></row>
  </sheetData>
</worksheet>"#;
            std::io::Write::write_all(&mut out, xml.as_bytes()).expect("write replacement sheet");
        } else {
            std::io::copy(&mut file, &mut out).expect("copy zip entry");
        }
    }
    out.finish().unwrap().into_inner()
}

#[cfg(all(feature = "calamine", feature = "umya"))]
fn shared_formula_xlsx_bytes(rows: u32) -> Vec<u8> {
    use std::io::{Cursor, Write};
    use zip::write::SimpleFileOptions;

    let mut sheet_data = String::new();
    for row in 1..=rows {
        let formula = if row == 1 {
            format!(r#"<f t="shared" si="0" ref="B1:B{rows}">A1*2</f>"#)
        } else {
            r#"<f t="shared" si="0"/>"#.to_string()
        };
        sheet_data.push_str(&format!(
            r#"<row r="{row}"><c r="A{row}"><v>{row}</v></c><c r="B{row}">{formula}<v>9999</v></c></row>"#
        ));
    }
    let sheet_xml = format!(
        r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
  <dimension ref="A1:B{rows}"/>
  <sheetData>{sheet_data}</sheetData>
</worksheet>"#
    );

    let mut zip = zip::ZipWriter::new(Cursor::new(Vec::new()));
    let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
    zip.start_file("[Content_Types].xml", options).unwrap();
    zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
  <Default Extension="xml" ContentType="application/xml"/>
  <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
  <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
  <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
</Types>"#).unwrap();
    zip.start_file("_rels/.rels", options).unwrap();
    zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
</Relationships>"#).unwrap();
    zip.start_file("xl/workbook.xml", options).unwrap();
    zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
  <sheets><sheet name="Sheet1" sheetId="1" r:id="rId1"/></sheets>
</workbook>"#).unwrap();
    zip.start_file("xl/_rels/workbook.xml.rels", options)
        .unwrap();
    zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
</Relationships>"#).unwrap();
    zip.start_file("xl/styles.xml", options).unwrap();
    zip.write_all(br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><cellXfs count="1"><xf numFmtId="0"/></cellXfs></styleSheet>"#).unwrap();
    zip.start_file("xl/worksheets/sheet1.xml", options).unwrap();
    zip.write_all(sheet_xml.as_bytes()).unwrap();
    zip.finish().unwrap().into_inner()
}

#[cfg(feature = "umya")]
fn sparse_whole_column_summary_xlsx_bytes() -> Vec<u8> {
    let mut book = umya_spreadsheet::new_file();
    let sheet = book
        .get_sheet_by_name_mut("Sheet1")
        .expect("default sheet exists");
    let mut row = 1u32;
    while row <= 999_001 {
        sheet.get_cell_mut((1, row)).set_value_number(row as f64);
        row += 1_000;
    }
    sheet.get_cell_mut((3, 1)).set_formula("=SUM(A:A)");

    let mut buf = Vec::new();
    umya_spreadsheet::writer::xlsx::write_writer(&book, &mut buf).expect("write xlsx bytes");
    buf
}

fn sparse_limits() -> WorkbookLoadLimits {
    WorkbookLoadLimits {
        max_sheet_rows: 10_000,
        max_sheet_cols: 100,
        max_sheet_logical_cells: u64::MAX,
        sparse_sheet_cell_threshold: 100,
        max_sparse_cell_ratio: 10,
    }
}

#[cfg(feature = "json")]
#[test]
fn json_loader_rejects_dense_sheet_over_logical_budget() {
    let mut adapter = JsonAdapter::new();
    for row in 1..=11 {
        for col in 1..=10 {
            adapter
                .write_cell("S", row, col, CellData::from_value(1.0))
                .expect("write dense cell");
        }
    }

    let mut cfg = WorkbookConfig::ephemeral();
    cfg.ingest_limits = WorkbookLoadLimits {
        max_sheet_rows: 10_000,
        max_sheet_cols: 10_000,
        max_sheet_logical_cells: 100,
        sparse_sheet_cell_threshold: u64::MAX,
        max_sparse_cell_ratio: u64::MAX,
    };

    let err = match Workbook::from_reader(adapter, LoadStrategy::EagerAll, cfg) {
        Ok(_) => panic!("dense sheet should hit logical budget"),
        Err(err) => err,
    };
    let msg = err.to_string();
    assert!(
        msg.contains("logical-cell budget"),
        "unexpected error: {msg}"
    );
}

#[cfg(feature = "json")]
#[test]
fn json_loader_uses_sparse_initial_ingest_over_sparse_guardrail_shape() {
    let mut adapter = JsonAdapter::new();
    adapter
        .write_cell("S", 1_000, 1, CellData::from_value(1.0))
        .expect("write sparse cell");

    let cfg = WorkbookConfig::ephemeral().with_ingest_limits(sparse_limits());
    let wb = Workbook::from_reader(adapter, LoadStrategy::EagerAll, cfg)
        .expect("sparse sheet should load through sparse initial ingest");
    assert_eq!(wb.get_value("S", 1_000, 1), Some(LiteralValue::Number(1.0)));
    assert_sparse_arrow_storage(&wb, "S", 1_000, 1);
}

#[cfg(feature = "json")]
#[test]
fn json_sparse_initial_ingest_does_not_mask_formula_with_cached_value() {
    let mut adapter = JsonAdapter::new();
    adapter
        .write_cell("S", 1_000, 1, CellData::from_value(2.0))
        .expect("write sparse precedent");
    adapter
        .write_cell(
            "S",
            1_000,
            2,
            CellData {
                value: Some(LiteralValue::Number(999.0)),
                formula: Some("=A1000*2".to_string()),
                style: None,
            },
        )
        .expect("write formula with stale cached value");

    let cfg = WorkbookConfig::ephemeral().with_ingest_limits(sparse_limits());
    let mut wb = Workbook::from_reader(adapter, LoadStrategy::EagerAll, cfg)
        .expect("sparse sheet should load through sparse initial ingest");
    wb.evaluate_all().expect("evaluate formulas");
    assert_eq!(wb.get_value("S", 1_000, 2), Some(LiteralValue::Number(4.0)));
}

#[cfg(feature = "umya")]
#[test]
fn umya_loader_uses_sparse_initial_ingest_over_sparse_guardrail_shape() {
    let adapter = UmyaAdapter::open_bytes(sparse_xlsx_bytes()).expect("open sparse xlsx bytes");
    let cfg = WorkbookConfig::ephemeral().with_ingest_limits(sparse_limits());

    let wb = Workbook::from_reader(adapter, LoadStrategy::EagerAll, cfg)
        .expect("sparse xlsx should load through sparse initial ingest");
    assert_eq!(
        wb.get_value("Sheet1", 1_000, 1),
        Some(LiteralValue::Number(1.0))
    );
    assert_sparse_arrow_storage(&wb, "Sheet1", 1_000, 1);
}

#[cfg(all(feature = "calamine", feature = "umya"))]
#[test]
fn calamine_loader_uses_sparse_initial_ingest_over_sparse_guardrail_shape() {
    use std::io::Write;

    let bytes = sparse_xlsx_bytes();
    let mut tmp = tempfile::NamedTempFile::new().expect("create temp xlsx");
    tmp.write_all(&bytes).expect("persist xlsx");

    let adapter = CalamineAdapter::open_path(tmp.path()).expect("open sparse xlsx path");
    let cfg = WorkbookConfig::ephemeral().with_ingest_limits(sparse_limits());

    let wb = Workbook::from_reader(adapter, LoadStrategy::EagerAll, cfg)
        .expect("sparse xlsx should load through sparse initial ingest");
    assert_eq!(
        wb.get_value("Sheet1", 1_000, 1),
        Some(LiteralValue::Number(1.0))
    );
    assert_sparse_arrow_storage(&wb, "Sheet1", 1_000, 1);
}

#[cfg(all(feature = "calamine", feature = "umya"))]
#[test]
fn calamine_loader_preserves_out_of_order_sparse_cells() {
    use std::io::Write;

    let bytes = out_of_order_sparse_xlsx_bytes();
    let mut tmp = tempfile::NamedTempFile::new().expect("create temp xlsx");
    tmp.write_all(&bytes).expect("persist xlsx");

    let adapter = CalamineAdapter::open_path(tmp.path()).expect("open out-of-order xlsx path");
    let wb = Workbook::from_reader(adapter, LoadStrategy::EagerAll, WorkbookConfig::ephemeral())
        .expect("out-of-order sparse sheet should load");
    assert_eq!(
        wb.get_value("Sheet1", 1, 1),
        Some(LiteralValue::Number(1.0))
    );
    assert_eq!(
        wb.get_value("Sheet1", 200, 1),
        Some(LiteralValue::Number(200.0))
    );
}

#[cfg(all(feature = "calamine", feature = "umya"))]
#[test]
fn calamine_loader_loads_shared_formula_semantics() {
    use std::io::Write;

    let rows = 128;
    let bytes = shared_formula_xlsx_bytes(rows);
    let mut tmp = tempfile::NamedTempFile::new().expect("create temp xlsx");
    tmp.write_all(&bytes).expect("persist xlsx");

    let adapter = CalamineAdapter::open_path(tmp.path()).expect("open shared formula xlsx path");
    let (mut wb, load_stats) = Workbook::from_reader_with_adapter_stats(
        adapter,
        LoadStrategy::EagerAll,
        WorkbookConfig::ephemeral().with_span_evaluation(true),
    )
    .expect("shared formula workbook should load");

    assert_eq!(
        load_stats
            .expect("adapter stats should be present")
            .formula_cells_observed,
        Some(rows as u64)
    );

    wb.evaluate_all().expect("evaluate shared formulas");
    assert_eq!(
        wb.get_value("Sheet1", 1, 2),
        Some(LiteralValue::Number(2.0))
    );
    assert_eq!(
        wb.get_value("Sheet1", rows, 2),
        Some(LiteralValue::Number((rows * 2) as f64))
    );
}

#[cfg(all(feature = "calamine", feature = "umya"))]
#[test]
fn calamine_sparse_initial_ingest_evaluates_whole_column_summary() {
    use std::io::Write;

    let bytes = sparse_whole_column_summary_xlsx_bytes();
    let mut tmp = tempfile::NamedTempFile::new().expect("create temp xlsx");
    tmp.write_all(&bytes).expect("persist xlsx");

    let adapter = CalamineAdapter::open_path(tmp.path()).expect("open sparse xlsx path");
    let mut wb =
        Workbook::from_reader(adapter, LoadStrategy::EagerAll, WorkbookConfig::ephemeral())
            .expect("tall sparse whole-column workbook should load");
    assert_sparse_arrow_storage(&wb, "Sheet1", 999_001, 3);

    wb.evaluate_all().expect("evaluate sparse whole-column SUM");
    assert_eq!(
        wb.get_value("Sheet1", 1, 3),
        Some(LiteralValue::Number(499_501_000.0))
    );
}