use std::collections::HashMap;
use std::io::Cursor;
use std::path::Path;
use anyhow::{anyhow, bail, Context, Result};
use crate::calamine::{Data as CData, Reader, Xlsx};
use crate::plan::SourceTable;
use crate::styles::{self, NumFmtKind, TemplateStyles};
use crate::value::Value;
#[derive(Debug, Clone)]
pub struct SourceData {
pub name: String,
pub headers: Vec<String>,
pub rows: Vec<HashMap<String, Value>>,
}
pub trait SourceReader {
fn read(&mut self, sheet: &str, table: &SourceTable) -> Result<SourceData>;
}
pub struct CalamineSourceReader {
workbook: Xlsx<Cursor<Vec<u8>>>,
bytes: Vec<u8>,
styles: Option<TemplateStyles>,
}
impl CalamineSourceReader {
pub fn open(path: &Path) -> Result<Self> {
let bytes = std::fs::read(path)
.with_context(|| format!("read data workbook at {}", path.display()))?;
Self::open_bytes(bytes)
}
pub fn open_bytes(bytes: Vec<u8>) -> Result<Self> {
let cursor = Cursor::new(bytes.clone());
let workbook: Xlsx<_> =
Xlsx::new(cursor).context("open data workbook from bytes")?;
Ok(CalamineSourceReader {
workbook,
bytes,
styles: None,
})
}
fn ensure_styles(&mut self) -> &TemplateStyles {
if self.styles.is_none() {
self.styles =
Some(styles::parse_template_styles_bytes(&self.bytes).unwrap_or_default());
}
self.styles.as_ref().unwrap()
}
pub fn first_sheet(&self) -> Option<String> {
self.workbook.sheet_names().into_iter().next()
}
pub fn resolve_sheet_name(&self, pattern: &str) -> Option<String> {
if let Some(prefix) = pattern.strip_suffix('*') {
self.workbook
.sheet_names()
.into_iter()
.find(|n| n.starts_with(prefix))
} else {
let pattern = pattern.to_string();
self.workbook
.sheet_names()
.into_iter()
.find(|n| n == &pattern)
}
}
}
pub fn is_blank_value(v: &Value) -> bool {
match v {
Value::Empty => true,
Value::String(s) => s.chars().all(|c| c.is_whitespace()),
_ => false,
}
}
impl SourceReader for CalamineSourceReader {
fn read(&mut self, sheet: &str, table: &SourceTable) -> Result<SourceData> {
self.ensure_styles();
let styles = self.styles.clone().unwrap_or_default();
let header_merges = self
.workbook
.worksheet_merge_cells(sheet)
.and_then(|r| r.ok())
.unwrap_or_default();
let range = self
.workbook
.worksheet_range(sheet)
.with_context(|| format!("read source sheet {sheet:?}"))?;
if range.get_size() == (0, 0) {
return Ok(SourceData {
name: sheet.to_string(),
headers: vec![],
rows: vec![],
});
}
let (last_row_abs, last_col_abs) = range
.end()
.map(|(r, c)| (r as usize, c as usize))
.unwrap_or((0, 0));
let (header_row, data_first, data_last_excl, col_first, col_last_excl) = match table {
SourceTable::HeaderRow(n) => {
let header = n.saturating_sub(1);
let row_end_excl = last_row_abs + 1;
let col_end_excl = last_col_abs + 1;
(header, header + 1, row_end_excl, 0usize, col_end_excl)
}
SourceTable::Range {
first_row,
last_row,
first_col,
last_col,
} => {
let header = first_row.saturating_sub(1);
let data_first = header + 1;
let data_last_excl = match last_row {
Some(lr) => (*lr).min(last_row_abs + 1),
None => last_row_abs + 1,
};
let col_first0 = first_col.saturating_sub(1);
let col_last_excl0 = match last_col {
Some(lc) => (*lc).min(last_col_abs + 1),
None => last_col_abs + 1,
};
(header, data_first, data_last_excl, col_first0, col_last_excl0)
}
};
if header_row > last_row_abs {
return Err(anyhow!(
"source sheet {sheet:?} header row {} is past the last used row {}",
header_row + 1,
last_row_abs + 1
));
}
let slave_to_master: HashMap<(usize, usize), (u32, u32)> = header_merges
.iter()
.flat_map(|d| {
let master = (d.start.0, d.start.1);
let mut out: Vec<((usize, usize), (u32, u32))> = Vec::new();
for r in d.start.0..=d.end.0 {
for c in d.start.1..=d.end.1 {
if (r, c) != master {
out.push(((r as usize, c as usize), master));
}
}
}
out
})
.collect();
let master_end_col: HashMap<(u32, u32), usize> = header_merges
.iter()
.map(|d| ((d.start.0, d.start.1), d.end.1 as usize))
.collect();
let cell_at = |r: usize, c: usize| -> Option<&CData> {
if let Some(&(mr, mc)) = slave_to_master.get(&(r, c)) {
range.get_value((mr, mc))
} else {
range.get_value((r as u32, c as u32))
}
};
let mut headers: Vec<String> = Vec::new();
let mut header_cols: Vec<usize> = Vec::new();
let mut c = col_first;
let mut seen_masters: std::collections::HashSet<(u32, u32)> =
std::collections::HashSet::new();
while c < col_last_excl {
let master_pos = slave_to_master
.get(&(header_row, c))
.copied()
.unwrap_or((header_row as u32, c as u32));
if !seen_masters.insert(master_pos) {
c += 1;
continue;
}
let cell = range.get_value(master_pos);
match cell {
Some(CData::String(s)) if !s.is_empty() => {
headers.push(s.clone());
header_cols.push(c);
if let Some(&end_col) = master_end_col.get(&master_pos) {
c = end_col + 1;
} else {
c += 1;
}
}
None | Some(CData::Empty) => break,
Some(other) => {
bail!(
"source header at column {c} is not a string: {other:?} (xl3 expects text headers)"
);
}
}
}
if headers.is_empty() {
return Err(anyhow!("source sheet {sheet:?} has no header row"));
}
let mut data_rows = Vec::with_capacity(data_last_excl.saturating_sub(data_first));
for r in data_first..data_last_excl {
let mut record = HashMap::with_capacity(headers.len());
let mut row_blank = true;
for (i, header) in headers.iter().enumerate() {
let c = header_cols[i];
let raw = cell_at(r, c)
.map(Value::from_calamine)
.unwrap_or(Value::Empty);
let v = match &raw {
Value::Number(n) => {
let kind = styles
.format_code(sheet, r as u32, c as u32)
.as_deref()
.map(styles::classify_num_fmt)
.unwrap_or(NumFmtKind::General);
if kind == NumFmtKind::Date {
Value::DateNumber(*n)
} else {
raw
}
}
_ => raw,
};
if !is_blank_value(&v) {
row_blank = false;
}
record.insert(header.clone(), v);
}
if row_blank {
continue;
}
data_rows.push(record);
}
Ok(SourceData {
name: sheet.to_string(),
headers,
rows: data_rows,
})
}
}