use crate::load_limits::enforce_sheet_load_limits;
use crate::traits::{
AccessGranularity, BackendCaps, CellData, NamedRange, NamedRangeScope, SheetData,
SpreadsheetReader, SpreadsheetWriter,
};
use chrono::Timelike;
use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue, RangeAddress};
use formualizer_parse::parser::ReferenceType;
use parking_lot::RwLock;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::collections::HashSet;
use std::io::{Cursor, Read, Seek};
use std::path::Path;
use umya_spreadsheet::{
CellRawValue, CellValue, Spreadsheet,
reader::xlsx,
structs::{DefinedName as UmyaDefinedName, Worksheet},
};
use crate::traits::{DefinedName as WorkbookDefinedName, DefinedNameDefinition, DefinedNameScope};
type FormulaBatch = (String, Vec<(u32, u32, formualizer_parse::ASTNode)>);
#[derive(Clone, Debug, PartialEq)]
pub struct FormulaCacheUpdate {
pub sheet: String,
pub row: u32,
pub col: u32,
pub value: LiteralValue,
}
#[derive(Clone, Copy, Debug)]
pub struct FormulaCacheUpdateRef<'a> {
pub sheet: &'a str,
pub row: u32,
pub col: u32,
pub value: &'a LiteralValue,
}
pub struct UmyaAdapter {
workbook: RwLock<Spreadsheet>,
lazy: bool,
original_path: Option<std::path::PathBuf>,
table_header_rows: HashMap<String, bool>,
table_header_rows_available: bool,
}
impl UmyaAdapter {
const EXCEL_MAX_ROWS: u32 = 1_048_576;
const EXCEL_MAX_COLS: u32 = 16_384;
fn extract_attr(tag: &str, key: &str) -> Option<String> {
let needle_dq = format!("{key}=\"");
if let Some(pos) = tag.find(&needle_dq) {
let start = pos + needle_dq.len();
let rest = &tag[start..];
if let Some(end) = rest.find('"') {
return Some(rest[..end].to_string());
}
}
let needle_sq = format!("{key}='");
if let Some(pos) = tag.find(&needle_sq) {
let start = pos + needle_sq.len();
let rest = &tag[start..];
if let Some(end) = rest.find('\'') {
return Some(rest[..end].to_string());
}
}
None
}
fn parse_table_tag(xml: &str) -> Option<(String, bool)> {
let start = xml.find("<table")?;
let after = &xml[start..];
let end = after.find('>')?;
let tag = &after[..end];
let name =
Self::extract_attr(tag, "name").or_else(|| Self::extract_attr(tag, "displayName"))?;
let header_row = match Self::extract_attr(tag, "headerRowCount") {
None => true,
Some(v) => v.parse::<u32>().ok().map(|n| n != 0).unwrap_or(true),
};
Some((name, header_row))
}
#[inline]
fn clamp_excel_row(row_1based: u32) -> u32 {
row_1based.clamp(1, Self::EXCEL_MAX_ROWS)
}
#[inline]
fn clamp_excel_col(col_1based: u32) -> u32 {
col_1based.clamp(1, Self::EXCEL_MAX_COLS)
}
#[inline]
fn clamp_excel_cell(row_1based: u32, col_1based: u32) -> (u32, u32) {
(
Self::clamp_excel_row(row_1based),
Self::clamp_excel_col(col_1based),
)
}
fn normalize_open_ended_bounds(
start_row: Option<u32>,
start_col: Option<u32>,
end_row: Option<u32>,
end_col: Option<u32>,
) -> Option<(u32, u32, u32, u32)> {
let mut sr = start_row;
let mut sc = start_col;
let mut er = end_row;
let mut ec = end_col;
if sr.is_none() && er.is_none() {
sr = Some(1);
er = Some(Self::EXCEL_MAX_ROWS);
}
if sc.is_none() && ec.is_none() {
sc = Some(1);
ec = Some(Self::EXCEL_MAX_COLS);
}
if sr.is_some() && er.is_none() {
er = Some(Self::EXCEL_MAX_ROWS);
}
if er.is_some() && sr.is_none() {
sr = Some(1);
}
if sc.is_some() && ec.is_none() {
ec = Some(Self::EXCEL_MAX_COLS);
}
if ec.is_some() && sc.is_none() {
sc = Some(1);
}
let mut sr = sr?;
let mut sc = sc?;
let mut er = er?;
let mut ec = ec?;
sr = Self::clamp_excel_row(sr);
sc = Self::clamp_excel_col(sc);
er = Self::clamp_excel_row(er);
ec = Self::clamp_excel_col(ec);
if er < sr || ec < sc {
return None;
}
Some((sr, sc, er, ec))
}
fn read_table_header_rows_from_reader<R: Read + Seek>(
reader: R,
) -> Result<HashMap<String, bool>, std::io::Error> {
let mut archive = zip::ZipArchive::new(reader)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let mut out: HashMap<String, bool> = HashMap::new();
for i in 0..archive.len() {
let mut entry = archive
.by_index(i)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let name = entry.name().to_string();
if !name.starts_with("xl/tables/") || !name.ends_with(".xml") {
continue;
}
let mut xml = String::new();
entry.read_to_string(&mut xml)?;
if let Some((tname, header_row)) = Self::parse_table_tag(&xml) {
out.insert(tname, header_row);
}
}
Ok(out)
}
fn convert_cell_value(cv: &CellValue) -> Option<LiteralValue> {
let raw = cv.get_raw_value();
if raw.is_empty() {
return None;
}
if raw.is_error() {
let txt = cv.get_value();
let kind = match txt.as_ref() {
"#DIV/0!" => ExcelErrorKind::Div,
"#N/A" => ExcelErrorKind::Na,
"#NAME?" => ExcelErrorKind::Name,
"#NULL!" => ExcelErrorKind::Null,
"#NUM!" => ExcelErrorKind::Num,
"#REF!" => ExcelErrorKind::Ref,
"#VALUE!" => ExcelErrorKind::Value,
_ => ExcelErrorKind::Value,
};
return Some(LiteralValue::Error(ExcelError::new(kind)));
}
match raw {
CellRawValue::Numeric(n) => Some(LiteralValue::Number(*n)),
CellRawValue::Bool(b) => Some(LiteralValue::Boolean(*b)),
CellRawValue::String(s) => Some(LiteralValue::Text(s.to_string())),
CellRawValue::RichText(rt) => Some(LiteralValue::Text(rt.get_text().to_string())),
CellRawValue::Lazy(s) => {
let txt = s.as_ref();
if let Ok(n) = txt.parse::<f64>() {
Some(LiteralValue::Number(n))
} else if txt.eq_ignore_ascii_case("TRUE") {
Some(LiteralValue::Boolean(true))
} else if txt.eq_ignore_ascii_case("FALSE") {
Some(LiteralValue::Boolean(false))
} else {
Some(LiteralValue::Text(txt.to_string()))
}
}
CellRawValue::Error(_) => unreachable!(),
CellRawValue::Empty => None,
}
}
fn table_header_row_for(&self, table_name: &str) -> bool {
if let Some(v) = self.table_header_rows.get(table_name) {
return *v;
}
let available = self.table_header_rows_available;
if !available {
}
#[cfg(feature = "tracing")]
{
if available {
tracing::warn!(
table = table_name,
"umya: table headerRowCount not found; assuming header_row=true"
);
} else {
tracing::warn!(
table = table_name,
"umya: table headerRowCount unavailable; assuming header_row=true"
);
}
}
true
}
fn read_table_header_rows_from_xlsx(
path: &Path,
) -> Result<HashMap<String, bool>, std::io::Error> {
use std::fs::File;
let file = File::open(path)?;
Self::read_table_header_rows_from_reader(file)
}
pub fn formula_cells(&self) -> Vec<(String, u32, u32)> {
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
let mut out: Vec<(String, u32, u32)> = Vec::new();
for i in 0..count {
wb.read_sheet(i);
let Some(ws) = wb.get_sheet(&i) else {
continue;
};
let sheet_name = ws.get_name().to_string();
for cell in ws.get_cell_collection() {
if !cell.is_formula() {
continue;
}
let coord = cell.get_coordinate();
out.push((
sheet_name.clone(),
*coord.get_row_num(),
*coord.get_col_num(),
));
}
}
out
}
fn write_formula_cache_to_sheet(
ws: &mut Worksheet,
row: u32,
col: u32,
value: &LiteralValue,
date_system: formualizer_eval::engine::DateSystem,
) {
let cell = ws.get_cell_mut((col, row));
if !cell.is_formula() {
return;
}
let formula_obj = cell.get_formula_obj().cloned();
let mut cv = cell.get_cell_value().clone();
match value {
LiteralValue::Empty => {
cv.set_blank();
}
LiteralValue::Int(i) => {
cv.set_value_number(*i as f64);
}
LiteralValue::Number(n) => {
cv.set_value_number(*n);
}
LiteralValue::Boolean(b) => {
cv.set_value_bool(*b);
}
LiteralValue::Text(s) => {
cv.set_value_string(s.clone());
}
LiteralValue::Error(e) => {
cv.set_error(e.kind.to_string());
}
LiteralValue::Date(d) => {
let dt = d.and_hms_opt(0, 0, 0).unwrap();
let serial =
formualizer_eval::builtins::datetime::datetime_to_serial_for(date_system, &dt);
cv.set_value_number(serial);
}
LiteralValue::DateTime(dt) => {
let serial =
formualizer_eval::builtins::datetime::datetime_to_serial_for(date_system, dt);
cv.set_value_number(serial);
}
LiteralValue::Time(t) => {
let serial = t.num_seconds_from_midnight() as f64 / 86_400.0;
cv.set_value_number(serial);
}
LiteralValue::Duration(d) => {
let serial = d.num_seconds() as f64 / 86_400.0;
cv.set_value_number(serial);
}
LiteralValue::Pending | LiteralValue::Array(_) => {
cv.set_error(ExcelErrorKind::Value.to_string());
}
}
if let Some(formula) = formula_obj {
cv.set_formula_obj(formula);
}
cell.set_cell_value(cv);
}
pub fn set_formula_cached_values_batch<'a, I>(
&mut self,
updates: I,
date_system: formualizer_eval::engine::DateSystem,
) -> Result<(), umya_spreadsheet::XlsxError>
where
I: IntoIterator<Item = FormulaCacheUpdateRef<'a>>,
{
let mut grouped: BTreeMap<&str, Vec<(u32, u32, &LiteralValue)>> = BTreeMap::new();
for update in updates {
grouped
.entry(update.sheet)
.or_default()
.push((update.row, update.col, update.value));
}
if grouped.is_empty() {
return Ok(());
}
let mut wb = self.workbook.write();
for (sheet, cells) in grouped {
wb.read_sheet_by_name(sheet);
let ws = wb
.get_sheet_by_name_mut(sheet)
.ok_or_else(|| umya_spreadsheet::XlsxError::CellError("sheet not found".into()))?;
for (row, col, value) in cells {
Self::write_formula_cache_to_sheet(ws, row, col, value, date_system);
}
}
Ok(())
}
pub fn write_formula_caches_batch(
&mut self,
updates: &[FormulaCacheUpdate],
date_system: formualizer_eval::engine::DateSystem,
) -> Result<(), umya_spreadsheet::XlsxError> {
self.set_formula_cached_values_batch(
updates.iter().map(|u| FormulaCacheUpdateRef {
sheet: &u.sheet,
row: u.row,
col: u.col,
value: &u.value,
}),
date_system,
)
}
pub fn set_formula_cached_value(
&mut self,
sheet: &str,
row: u32,
col: u32,
value: &LiteralValue,
date_system: formualizer_eval::engine::DateSystem,
) -> Result<(), umya_spreadsheet::XlsxError> {
self.set_formula_cached_values_batch(
std::iter::once(FormulaCacheUpdateRef {
sheet,
row,
col,
value,
}),
date_system,
)
}
}
impl SpreadsheetReader for UmyaAdapter {
type Error = umya_spreadsheet::XlsxError;
fn access_granularity(&self) -> AccessGranularity {
AccessGranularity::Sheet
}
fn capabilities(&self) -> BackendCaps {
BackendCaps {
read: true,
write: true,
formulas: true,
lazy_loading: self.lazy,
random_access: false,
styles: true,
bytes_input: true,
..Default::default()
}
}
fn sheet_names(&self) -> Result<Vec<String>, Self::Error> {
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
let mut names = Vec::with_capacity(count);
for i in 0..count {
wb.read_sheet(i); if let Some(s) = wb.get_sheet(&i) {
names.push(s.get_name().to_string());
}
}
Ok(names)
}
fn defined_names(&mut self) -> Result<Vec<WorkbookDefinedName>, Self::Error> {
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
for i in 0..count {
wb.read_sheet(i);
}
let mut sheet_names: Vec<String> = Vec::with_capacity(count);
for i in 0..count {
if let Some(s) = wb.get_sheet(&i) {
sheet_names.push(s.get_name().to_string());
}
}
let mut out: Vec<WorkbookDefinedName> = Vec::new();
let mut seen: HashSet<(DefinedNameScope, Option<String>, String)> = HashSet::new();
for i in 0..count {
let Some(ws) = wb.get_sheet(&i) else {
continue;
};
let declared_on = ws.get_name();
for dn in ws.get_defined_names() {
if let Some(converted) =
Self::convert_defined_name_stable(dn, Some(declared_on), &sheet_names)
{
let key = (
converted.scope.clone(),
converted.scope_sheet.clone(),
converted.name.clone(),
);
if seen.insert(key) {
out.push(converted);
}
}
}
}
for dn in wb.get_defined_names() {
if let Some(converted) = Self::convert_defined_name_stable(dn, None, &sheet_names) {
let key = (
converted.scope.clone(),
converted.scope_sheet.clone(),
converted.name.clone(),
);
if seen.insert(key) {
out.push(converted);
}
}
}
Ok(out)
}
fn open_path<P: AsRef<Path>>(path: P) -> Result<Self, Self::Error>
where
Self: Sized,
{
let sheet = xlsx::read(path.as_ref())?;
let (table_header_rows, table_header_rows_available) =
match Self::read_table_header_rows_from_xlsx(path.as_ref()) {
Ok(m) => (m, true),
Err(_) => (HashMap::new(), false),
};
Ok(Self {
workbook: RwLock::new(sheet),
lazy: false,
original_path: Some(path.as_ref().to_path_buf()),
table_header_rows,
table_header_rows_available,
})
}
fn open_reader(mut reader: Box<dyn Read + Send + Sync>) -> Result<Self, Self::Error>
where
Self: Sized,
{
let mut data = Vec::new();
reader
.read_to_end(&mut data)
.map_err(umya_spreadsheet::XlsxError::Io)?;
Self::open_bytes(data)
}
fn open_bytes(data: Vec<u8>) -> Result<Self, Self::Error>
where
Self: Sized,
{
let (table_header_rows, table_header_rows_available) =
match Self::read_table_header_rows_from_reader(Cursor::new(data.as_slice())) {
Ok(m) => (m, true),
Err(_) => (HashMap::new(), false),
};
let sheet = xlsx::read_reader(Cursor::new(data), true)?;
Ok(Self {
workbook: RwLock::new(sheet),
lazy: false,
original_path: None,
table_header_rows,
table_header_rows_available,
})
}
fn read_range(
&mut self,
sheet: &str,
start: (u32, u32),
end: (u32, u32),
) -> Result<BTreeMap<(u32, u32), CellData>, Self::Error> {
let data = self.read_sheet(sheet)?;
Ok(data
.cells
.into_iter()
.filter(|((r, c), _)| *r >= start.0 && *r <= end.0 && *c >= start.1 && *c <= end.1)
.collect())
}
fn read_sheet(&mut self, sheet: &str) -> Result<SheetData, Self::Error> {
let mut wb = self.workbook.write();
wb.read_sheet_by_name(sheet);
let ws = wb
.get_sheet_by_name(sheet)
.ok_or_else(|| umya_spreadsheet::XlsxError::CellError("sheet not found".into()))?;
let mut cells_map: BTreeMap<(u32, u32), CellData> = BTreeMap::new();
for cell in ws.get_cell_collection() {
let coord = cell.get_coordinate();
let col = *coord.get_col_num();
let row = *coord.get_row_num();
let cv = cell.get_cell_value();
let formula = if cv.is_formula() {
let f = cv.get_formula();
if f.is_empty() {
None
} else {
Some(if f.starts_with('=') {
f.to_string()
} else {
format!("={}", f)
})
}
} else {
None
};
let value = Self::convert_cell_value(cv);
if value.is_none() && formula.is_none() {
continue;
}
cells_map.insert(
(row, col),
CellData {
value,
formula,
style: None,
},
);
}
let mut row_hidden_manual: Vec<u32> = ws
.get_row_dimensions_to_hashmap()
.iter()
.filter_map(|(row, row_dim)| {
if *row_dim.get_hidden() {
Some(*row)
} else {
None
}
})
.collect();
row_hidden_manual.sort_unstable();
let dims = cells_map.keys().fold((0u32, 0u32), |mut acc, (r, c)| {
if *r > acc.0 {
acc.0 = *r;
}
if *c > acc.1 {
acc.1 = *c;
}
acc
});
Ok(SheetData {
cells: cells_map,
dimensions: Some(dims),
tables: self.collect_tables(ws),
named_ranges: Self::collect_named_ranges(sheet, &wb, ws),
date_system_1904: false,
merged_cells: vec![],
hidden: false,
row_hidden_manual,
row_hidden_filter: vec![],
})
}
fn sheet_bounds(&self, sheet: &str) -> Option<(u32, u32)> {
let wb = self.workbook.read();
let ws = wb.get_sheet_by_name(sheet)?;
let mut max_r = 0;
let mut max_c = 0;
for cell in ws.get_cell_collection() {
let coord = cell.get_coordinate();
let r = *coord.get_row_num();
let c = *coord.get_col_num();
if r > max_r {
max_r = r;
}
if c > max_c {
max_c = c;
}
}
Some((max_r, max_c))
}
fn is_loaded(&self, sheet: &str, _row: Option<u32>, _col: Option<u32>) -> bool {
let wb = self.workbook.read();
wb.get_sheet_by_name(sheet).is_some()
}
}
impl SpreadsheetWriter for UmyaAdapter {
type Error = umya_spreadsheet::XlsxError;
fn write_cell(
&mut self,
sheet: &str,
row: u32,
col: u32,
data: CellData,
) -> Result<(), Self::Error> {
let mut wb = self.workbook.write();
if wb.get_sheet_by_name(sheet).is_none() {
let _ = wb.new_sheet(sheet);
wb.read_sheet_collection();
}
let ws = wb.get_sheet_by_name_mut(sheet).ok_or_else(|| {
umya_spreadsheet::XlsxError::CellError("sheet create/load failure".into())
})?;
let cell = ws.get_cell_mut((col, row));
if let Some(v) = data.value {
match v {
LiteralValue::Number(n) => {
cell.set_value_number(n);
}
LiteralValue::Int(i) => {
cell.set_value_number(i as f64);
}
LiteralValue::Boolean(b) => {
cell.set_value_bool(b);
}
LiteralValue::Text(s) => {
cell.set_value(s);
}
LiteralValue::Error(e) => {
cell.set_value(e.kind.to_string());
}
LiteralValue::Empty => {
cell.set_blank();
}
LiteralValue::Array(_arr) => {
cell.set_value("#ARRAY");
}
LiteralValue::Date(d) => {
cell.set_value(d.to_string());
}
LiteralValue::DateTime(dt) => {
cell.set_value(dt.to_string());
}
LiteralValue::Time(t) => {
cell.set_value(t.format("%H:%M:%S").to_string());
}
LiteralValue::Duration(dur) => {
cell.set_value(format!("PT{}S", dur.num_seconds()));
}
LiteralValue::Pending => {
cell.set_value("#PENDING");
}
}
} else {
cell.set_blank();
}
if let Some(f) = data.formula {
if let Some(stripped) = f.strip_prefix('=') {
cell.set_formula(stripped); } else {
cell.set_formula(f);
}
}
Ok(())
}
fn write_range(
&mut self,
sheet: &str,
cells: BTreeMap<(u32, u32), CellData>,
) -> Result<(), Self::Error> {
for ((r, c), cd) in cells.into_iter() {
self.write_cell(sheet, r, c, cd)?;
}
Ok(())
}
fn clear_range(
&mut self,
sheet: &str,
start: (u32, u32),
end: (u32, u32),
) -> Result<(), Self::Error> {
let mut wb = self.workbook.write();
wb.read_sheet_by_name(sheet);
let ws = match wb.get_sheet_by_name_mut(sheet) {
Some(s) => s,
None => return Ok(()), };
for r in start.0..=end.0 {
for c in start.1..=end.1 {
ws.get_cell_mut((c, r)).set_blank();
}
}
Ok(())
}
fn create_sheet(&mut self, name: &str) -> Result<(), Self::Error> {
let mut wb = self.workbook.write();
if wb.get_sheet_by_name(name).is_none() {
let _ = wb.new_sheet(name);
}
Ok(())
}
fn delete_sheet(&mut self, name: &str) -> Result<(), Self::Error> {
let mut wb = self.workbook.write();
let _ = wb.remove_sheet_by_name(name); Ok(())
}
fn rename_sheet(&mut self, old: &str, new: &str) -> Result<(), Self::Error> {
let mut wb = self.workbook.write();
if let Some(s) = wb.get_sheet_by_name_mut(old) {
s.set_name(new);
}
Ok(())
}
fn flush(&mut self) -> Result<(), Self::Error> {
Ok(())
}
fn save_to<'a>(
&mut self,
dest: crate::traits::SaveDestination<'a>,
) -> Result<Option<Vec<u8>>, Self::Error> {
use crate::traits::SaveDestination;
match dest {
SaveDestination::InPlace => {
let path = self.original_path.as_ref().ok_or_else(|| {
umya_spreadsheet::XlsxError::Io(std::io::Error::new(
std::io::ErrorKind::Unsupported,
"InPlace save unavailable: no original path",
))
})?;
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
for i in 0..count {
wb.read_sheet(i);
}
umya_spreadsheet::writer::xlsx::write(&wb, path)?;
Ok(None)
}
SaveDestination::Path(p) => {
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
for i in 0..count {
wb.read_sheet(i);
}
umya_spreadsheet::writer::xlsx::write(&wb, p)?;
Ok(None)
}
SaveDestination::Writer(w) => {
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
for i in 0..count {
wb.read_sheet(i);
}
umya_spreadsheet::writer::xlsx::write_writer(&wb, w)?;
Ok(None)
}
SaveDestination::Bytes => {
let mut wb = self.workbook.write();
let count = wb.get_sheet_count();
for i in 0..count {
wb.read_sheet(i);
}
let mut buf: Vec<u8> = Vec::new();
umya_spreadsheet::writer::xlsx::write_writer(&wb, &mut buf)?;
Ok(Some(buf))
}
}
}
}
impl UmyaAdapter {
fn convert_defined_name_stable(
defined: &UmyaDefinedName,
declared_on_sheet: Option<&str>,
sheet_names: &[String],
) -> Option<WorkbookDefinedName> {
let raw = defined.get_address();
let mut trimmed = raw.trim();
if let Some(rest) = trimmed.strip_prefix('=') {
trimmed = rest.trim();
}
if trimmed.is_empty() || trimmed.contains(',') {
return None;
}
let reference = ReferenceType::from_string(trimmed).ok()?;
let scope_sheet = if defined.has_local_sheet_id() {
let idx = *defined.get_local_sheet_id() as usize;
sheet_names.get(idx).cloned()
} else {
None
};
let scope = if scope_sheet.is_some() {
DefinedNameScope::Sheet
} else {
DefinedNameScope::Workbook
};
let base_sheet = scope_sheet.as_deref().or(declared_on_sheet);
let (sheet_name, start_row, start_col, end_row, end_col) = match reference {
ReferenceType::Cell {
sheet, row, col, ..
} => {
let sheet = sheet.or_else(|| base_sheet.map(|s| s.to_string()))?;
let (row, col) = Self::clamp_excel_cell(row, col);
(sheet, row, col, row, col)
}
ReferenceType::Range {
sheet,
start_row,
start_col,
end_row,
end_col,
..
} => {
let (sr, sc, er, ec) =
Self::normalize_open_ended_bounds(start_row, start_col, end_row, end_col)?;
let sheet = sheet.or_else(|| base_sheet.map(|s| s.to_string()))?;
(sheet, sr, sc, er, ec)
}
_ => return None,
};
let address = RangeAddress::new(sheet_name, start_row, start_col, end_row, end_col).ok()?;
Some(WorkbookDefinedName {
name: defined.get_name().to_string(),
scope,
scope_sheet,
definition: DefinedNameDefinition::Range { address },
})
}
fn collect_named_ranges(
sheet_name: &str,
workbook: &Spreadsheet,
worksheet: &Worksheet,
) -> Vec<NamedRange> {
let mut ranges = Vec::new();
let mut seen: HashSet<(NamedRangeScope, String)> = HashSet::new();
for defined in worksheet.get_defined_names() {
if let Some(named) = Self::convert_defined_name(defined, sheet_name) {
let key = (named.scope.clone(), named.name.clone());
if seen.insert(key) {
ranges.push(named);
}
}
}
for defined in workbook.get_defined_names() {
if let Some(named) = Self::convert_defined_name(defined, sheet_name) {
let key = (named.scope.clone(), named.name.clone());
if seen.insert(key) {
ranges.push(named);
}
}
}
ranges
}
fn collect_tables(&self, worksheet: &Worksheet) -> Vec<crate::traits::TableDefinition> {
worksheet
.get_tables()
.iter()
.map(|t| {
let (beg, end) = t.get_area();
let headers = t
.get_columns()
.iter()
.map(|c| c.get_name().to_string())
.collect();
let name = t.get_name().to_string();
let header_row = self.table_header_row_for(&name);
crate::traits::TableDefinition {
name,
range: (
*beg.get_row_num(),
*beg.get_col_num(),
*end.get_row_num(),
*end.get_col_num(),
),
header_row,
headers,
totals_row: *t.get_totals_row_shown(),
}
})
.collect()
}
fn convert_defined_name(defined: &UmyaDefinedName, current_sheet: &str) -> Option<NamedRange> {
let raw = defined.get_address();
let trimmed = raw.trim();
if trimmed.is_empty() || trimmed.contains(',') {
return None;
}
let reference = ReferenceType::from_string(trimmed).ok()?;
let (sheet_name, start_row, start_col, end_row, end_col) = match reference {
ReferenceType::Cell {
sheet, row, col, ..
} => {
let sheet = sheet.unwrap_or_else(|| current_sheet.to_string());
let (row, col) = Self::clamp_excel_cell(row, col);
(sheet, row, col, row, col)
}
ReferenceType::Range {
sheet,
start_row,
start_col,
end_row,
end_col,
..
} => {
let (sr, sc, er, ec) =
Self::normalize_open_ended_bounds(start_row, start_col, end_row, end_col)?;
let sheet = sheet.unwrap_or_else(|| current_sheet.to_string());
(sheet, sr, sc, er, ec)
}
_ => return None,
};
if sheet_name != current_sheet {
return None;
}
let scope = if defined.has_local_sheet_id() {
NamedRangeScope::Sheet
} else {
NamedRangeScope::Workbook
};
let address = RangeAddress::new(sheet_name, start_row, start_col, end_row, end_col).ok()?;
Some(NamedRange {
name: defined.get_name().to_string(),
scope,
address,
})
}
}
impl<R> formualizer_eval::engine::ingest::EngineLoadStream<R> for UmyaAdapter
where
R: formualizer_eval::traits::EvaluationContext,
{
type Error = crate::error::IoError;
fn stream_into_engine(
&mut self,
engine: &mut formualizer_eval::engine::Engine<R>,
) -> Result<(), Self::Error> {
use crate::error::IoError;
use formualizer_eval::arrow_store::IngestBuilder;
use formualizer_eval::engine::named_range::{NameScope, NamedDefinition};
use formualizer_eval::reference::{CellRef, Coord};
let names = self
.sheet_names()
.map_err(|e| IoError::from_backend("umya", e))?;
for n in &names {
engine.add_sheet(n).map_err(IoError::Engine)?;
}
let prev_index_mode = engine.config.sheet_index_mode;
engine.set_sheet_index_mode(formualizer_eval::engine::SheetIndexMode::Lazy);
let prev_range_limit = engine.config.range_expansion_limit;
engine.config.range_expansion_limit = 0;
engine.set_first_load_assume_new(true);
engine.reset_ensure_touched();
let chunk_rows: usize = 32 * 1024;
let mut eager_formula_batches: Vec<FormulaBatch> = Vec::new();
for n in &names {
let sheet_data = self
.read_sheet(n)
.map_err(|e| IoError::from_backend("umya", e))?;
let dims = sheet_data.dimensions.unwrap_or_else(|| {
sheet_data
.cells
.keys()
.fold((0u32, 0u32), |mut acc, (r, c)| {
if *r > acc.0 {
acc.0 = *r;
}
if *c > acc.1 {
acc.1 = *c;
}
acc
})
});
let rows = dims.0 as usize;
let cols = dims.1 as usize;
enforce_sheet_load_limits(
"umya",
n,
dims.0,
dims.1,
sheet_data.cells.len(),
engine.workbook_load_limits(),
)?;
let mut aib = IngestBuilder::new(n, cols, chunk_rows, engine.config.date_system);
for r in 1..=rows {
let mut row_vals = vec![LiteralValue::Empty; cols];
for c in 1..=cols {
if let Some(cd) = sheet_data.cells.get(&(r as u32, c as u32))
&& let Some(v) = &cd.value
{
row_vals[c - 1] = v.clone();
}
}
aib.append_row(&row_vals).map_err(IoError::Engine)?;
}
let asheet = aib.finish();
let store = engine.sheet_store_mut();
if let Some(pos) = store.sheets.iter().position(|s| s.name.as_ref() == n) {
store.sheets[pos] = asheet;
} else {
store.sheets.push(asheet);
}
if let Some(sheet_id) = engine.sheet_id(n) {
for table in &sheet_data.tables {
let (sr, sc, er, ec) = table.range;
let sr0 = sr.saturating_sub(1);
let sc0 = sc.saturating_sub(1);
let er0 = er.saturating_sub(1);
let ec0 = ec.saturating_sub(1);
let start_ref = CellRef::new(sheet_id, Coord::new(sr0, sc0, true, true));
let end_ref = CellRef::new(sheet_id, Coord::new(er0, ec0, true, true));
let range_ref = formualizer_eval::reference::RangeRef::new(start_ref, end_ref);
engine.define_table(
&table.name,
range_ref,
table.header_row,
table.headers.clone(),
table.totals_row,
)?;
}
}
if engine.config.defer_graph_building {
for ((row, col), cd) in &sheet_data.cells {
if let Some(f) = &cd.formula {
if f.is_empty() {
continue;
}
engine.stage_formula_text(n, *row, *col, f.clone());
}
}
} else {
let mut formulas: Vec<(u32, u32, formualizer_parse::ASTNode)> = Vec::new();
for ((row, col), cd) in &sheet_data.cells {
if let Some(f) = &cd.formula {
if f.is_empty() {
continue;
}
let with_eq = if f.starts_with('=') {
f.clone()
} else {
format!("={f}")
};
match formualizer_parse::parser::parse(&with_eq) {
Ok(parsed) => formulas.push((*row, *col, parsed)),
Err(e) => {
if let Some(recovered) = engine
.handle_formula_parse_error(
n,
*row,
*col,
&with_eq,
e.to_string(),
)
.map_err(IoError::Engine)?
{
formulas.push((*row, *col, recovered));
}
}
}
}
}
if !formulas.is_empty() {
eager_formula_batches.push((n.clone(), formulas));
}
}
for row in &sheet_data.row_hidden_manual {
engine
.set_row_hidden(
n,
*row,
true,
formualizer_eval::engine::RowVisibilitySource::Manual,
)
.map_err(|e| IoError::from_backend("umya", e))?;
}
for row in &sheet_data.row_hidden_filter {
engine
.set_row_hidden(
n,
*row,
true,
formualizer_eval::engine::RowVisibilitySource::Filter,
)
.map_err(|e| IoError::from_backend("umya", e))?;
}
}
if !engine.config.defer_graph_building && !eager_formula_batches.is_empty() {
let mut builder = engine.begin_bulk_ingest();
for (sheet_name, formulas) in eager_formula_batches {
let sid = builder.add_sheet(&sheet_name);
builder.add_formulas(sid, formulas.into_iter());
}
builder.finish().map_err(IoError::Engine)?;
}
{
use rustc_hash::FxHashSet;
let defined = self
.defined_names()
.map_err(|e| IoError::from_backend("umya", e))?;
let mut seen: FxHashSet<(DefinedNameScope, Option<String>, String)> =
FxHashSet::default();
for dn in defined {
let key = (dn.scope.clone(), dn.scope_sheet.clone(), dn.name.clone());
if !seen.insert(key) {
continue;
}
let scope = match dn.scope {
DefinedNameScope::Workbook => NameScope::Workbook,
DefinedNameScope::Sheet => {
let sheet_name =
dn.scope_sheet.as_deref().ok_or_else(|| IoError::Backend {
backend: "umya".to_string(),
message: format!(
"sheet-scoped defined name `{}` missing scope_sheet",
dn.name
),
})?;
let sid = engine
.sheet_id(sheet_name)
.ok_or_else(|| IoError::Backend {
backend: "umya".to_string(),
message: format!("scope sheet not found: {sheet_name}"),
})?;
NameScope::Sheet(sid)
}
};
let definition = match dn.definition {
DefinedNameDefinition::Range { address } => {
let sheet_id = engine
.sheet_id(&address.sheet)
.or_else(|| engine.add_sheet(&address.sheet).ok())
.ok_or_else(|| IoError::Backend {
backend: "umya".to_string(),
message: format!("sheet not found: {}", address.sheet),
})?;
let sr0 = address.start_row.saturating_sub(1);
let sc0 = address.start_col.saturating_sub(1);
let er0 = address.end_row.saturating_sub(1);
let ec0 = address.end_col.saturating_sub(1);
let start_ref = CellRef::new(sheet_id, Coord::new(sr0, sc0, true, true));
if sr0 == er0 && sc0 == ec0 {
NamedDefinition::Cell(start_ref)
} else {
let end_ref = CellRef::new(sheet_id, Coord::new(er0, ec0, true, true));
let range_ref =
formualizer_eval::reference::RangeRef::new(start_ref, end_ref);
NamedDefinition::Range(range_ref)
}
}
DefinedNameDefinition::Literal { value } => NamedDefinition::Literal(value),
};
engine.define_name(&dn.name, definition, scope)?;
}
}
for n in &names {
engine.finalize_sheet_index(n);
}
engine.set_first_load_assume_new(false);
engine.reset_ensure_touched();
engine.set_sheet_index_mode(prev_index_mode);
engine.config.range_expansion_limit = prev_range_limit;
Ok(())
}
}