use std::fs::File;
use std::io::BufReader;
use calamine_styles::{Data, Reader, Sheets};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use crate::cell::{Cell, CellValue};
use crate::error::{Error, Result};
use crate::workbook::WorkbookStyles;
pub(crate) type SheetsReader = Sheets<BufReader<File>>;
pub struct Sheet {
pub name: String,
rows: Vec<Vec<Cell>>,
}
impl Sheet {
pub(crate) fn load(
wb: &mut SheetsReader,
name: &str,
mut styles: Option<&mut WorkbookStyles>,
) -> Result<Self> {
let value_range = wb
.worksheet_range(name)
.map_err(|e| Error::Xlsx(format!("read range for {name:?}: {e}")))?;
let style_range = wb.worksheet_style(name).ok();
if let Some(s) = styles.as_mut() {
let _ = s.sheet_style_ids_mut(name);
}
let (h, w) = value_range.get_size();
let (start_row, start_col) = value_range.start().unwrap_or((0, 0));
let mut rows: Vec<Vec<Cell>> = Vec::with_capacity(h);
for r in 0..h {
let mut row: Vec<Cell> = Vec::with_capacity(w);
for c in 0..w {
let value = value_range
.get((r, c))
.map(data_to_cell_value)
.unwrap_or(CellValue::Empty);
let absolute_position = absolute_position(start_row, start_col, r, c);
let number_format = style_range
.as_ref()
.and_then(|sr| {
let (row, col) = absolute_position?;
style_at_absolute_position(sr, row, col)
})
.and_then(extract_number_format)
.or_else(|| {
styles
.as_ref()
.and_then(|s| walker_number_format(s, name, start_row, start_col, r, c))
});
row.push(Cell {
value,
number_format,
});
}
rows.push(row);
}
Ok(Self {
name: name.to_string(),
rows,
})
}
pub fn dimensions(&self) -> (usize, usize) {
let h = self.rows.len();
let w = self.rows.first().map(|r| r.len()).unwrap_or(0);
(h, w)
}
#[cfg(test)]
pub(crate) fn from_rows_for_test(name: &str, rows: Vec<Vec<Cell>>) -> Self {
Self {
name: name.to_string(),
rows,
}
}
pub fn from_rows(name: String, rows: Vec<Vec<Cell>>) -> Self {
Self { name, rows }
}
pub fn rows(&self) -> &[Vec<Cell>] {
&self.rows
}
pub fn row(&self, idx: usize) -> Option<&[Cell]> {
self.rows.get(idx).map(|r| r.as_slice())
}
pub fn headers(&self) -> Vec<String> {
self.rows
.first()
.map(|row| {
row.iter()
.map(|c| match &c.value {
CellValue::String(s) => s.clone(),
CellValue::Empty => String::new(),
other => format_value_plain(other),
})
.collect()
})
.unwrap_or_default()
}
}
fn absolute_position(
start_row: u32,
start_col: u32,
row_offset: usize,
col_offset: usize,
) -> Option<(u32, u32)> {
let row = start_row.checked_add(u32::try_from(row_offset).ok()?)?;
let col = start_col.checked_add(u32::try_from(col_offset).ok()?)?;
Some((row, col))
}
fn style_at_absolute_position(
range: &calamine_styles::StyleRange,
row: u32,
col: u32,
) -> Option<&calamine_styles::Style> {
let (start_row, start_col) = range.start()?;
if row < start_row || col < start_col {
return None;
}
let rel_row = usize::try_from(row - start_row).ok()?;
let rel_col = usize::try_from(col - start_col).ok()?;
range.get((rel_row, rel_col))
}
fn format_value_plain(v: &CellValue) -> String {
match v {
CellValue::Empty => String::new(),
CellValue::String(s) => s.clone(),
CellValue::Int(n) => n.to_string(),
CellValue::Float(n) => n.to_string(),
CellValue::Bool(b) => if *b { "TRUE" } else { "FALSE" }.to_string(),
CellValue::Date(d) => d.format("%Y-%m-%d").to_string(),
CellValue::DateTime(dt) => dt.format("%Y-%m-%d %H:%M:%S").to_string(),
CellValue::Time(t) => t.format("%H:%M:%S").to_string(),
CellValue::Error(e) => e.clone(),
}
}
fn data_to_cell_value(d: &Data) -> CellValue {
match d {
Data::Empty => CellValue::Empty,
Data::String(s) => CellValue::String(s.clone()),
Data::Int(i) => CellValue::Int(*i),
Data::Float(f) => {
if f.fract() == 0.0 && f.abs() < (i64::MAX as f64) {
CellValue::Int(*f as i64)
} else {
CellValue::Float(*f)
}
}
Data::Bool(b) => CellValue::Bool(*b),
Data::DateTime(dt) => excel_serial_to_datetime(dt.as_f64()),
Data::DateTimeIso(s) => parse_iso_datetime_or_string(s),
Data::DurationIso(s) => CellValue::String(s.clone()),
Data::Error(e) => CellValue::Error(format!("{e:?}")),
Data::RichText(rt) => CellValue::String(rt.plain_text().to_string()),
}
}
fn excel_serial_to_datetime(serial: f64) -> CellValue {
if serial < 1.0 && serial >= 0.0 {
let mut secs = (serial * 86_400.0).round() as u32;
if secs >= 86_400 {
secs -= 86_400;
}
let h = secs / 3600;
let m = (secs % 3600) / 60;
let s = secs % 60;
return NaiveTime::from_hms_opt(h, m, s)
.map(CellValue::Time)
.unwrap_or_else(|| CellValue::Float(serial));
}
let mut days = serial.trunc() as i64;
let frac = serial - (days as f64);
if serial > 0.0 && serial < 60.0 {
days += 1;
}
if frac.abs() < f64::EPSILON {
return CellValue::Date(days_to_date_from_excel_base(days));
}
let mut secs_signed = (frac * 86_400.0).round() as i64;
if secs_signed < 0 {
let borrow_days = (-secs_signed + 86_399) / 86_400; secs_signed += borrow_days * 86_400;
days -= borrow_days;
} else if secs_signed >= 86_400 {
let carry_days = secs_signed / 86_400;
secs_signed -= carry_days * 86_400;
days += carry_days;
}
let secs = secs_signed as u32; let date = days_to_date_from_excel_base(days);
let h = secs / 3600;
let m = (secs % 3600) / 60;
let s = secs % 60;
let time = NaiveTime::from_hms_opt(h, m, s)
.unwrap_or_else(|| NaiveTime::from_hms_opt(0, 0, 0).unwrap());
CellValue::DateTime(NaiveDateTime::new(date, time))
}
fn days_to_date_from_excel_base(days: i64) -> NaiveDate {
let base = NaiveDate::from_ymd_opt(1899, 12, 30).expect("static date");
if days >= 0 {
base.checked_add_days(chrono::Days::new(days as u64))
} else {
base.checked_sub_days(chrono::Days::new((-days) as u64))
}
.unwrap_or(base)
}
fn parse_iso_datetime_or_string(s: &str) -> CellValue {
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") {
return CellValue::DateTime(dt);
}
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
return CellValue::DateTime(dt);
}
if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
return CellValue::Date(d);
}
CellValue::String(s.to_string())
}
fn extract_number_format(style: &calamine_styles::Style) -> Option<String> {
let nf = style.get_number_format()?;
let code = nf.format_code.trim();
if code.is_empty() || code.eq_ignore_ascii_case("general") {
None
} else {
Some(code.to_string())
}
}
fn walker_number_format(
styles: &WorkbookStyles,
sheet_name: &str,
start_row: u32,
start_col: u32,
r: usize,
c: usize,
) -> Option<String> {
let row = start_row.checked_add(u32::try_from(r).ok()?)?;
let col = start_col.checked_add(u32::try_from(c).ok()?)?;
let style_id = styles
.sheet_style_ids(sheet_name)?
.get(&(row, col))
.copied()?;
styles
.number_format_for_style_id(style_id)
.map(|s| s.to_string())
}
#[cfg(test)]
mod tests {
use chrono::Datelike;
use super::*;
fn date(value: CellValue) -> NaiveDate {
match value {
CellValue::Date(d) => d,
other => panic!("expected Date, got {other:?}"),
}
}
#[test]
fn absolute_position_overflow_returns_none() {
assert_eq!(absolute_position(10, 20, 2, 3), Some((12, 23)));
assert_eq!(absolute_position(u32::MAX, 20, 1, 0), None);
assert_eq!(absolute_position(10, u32::MAX, 0, 1), None);
assert_eq!(absolute_position(10, 20, usize::MAX, 0), None);
}
#[test]
fn excel_serial_matches_openpyxl_for_pre_leap_serials() {
assert_eq!(
date(excel_serial_to_datetime(1.0)),
NaiveDate::from_ymd_opt(1900, 1, 1).unwrap()
);
assert_eq!(
date(excel_serial_to_datetime(59.0)),
NaiveDate::from_ymd_opt(1900, 2, 28).unwrap()
);
assert_eq!(
date(excel_serial_to_datetime(61.0)),
NaiveDate::from_ymd_opt(1900, 3, 1).unwrap()
);
assert_eq!(
date(excel_serial_to_datetime(44197.0)),
NaiveDate::from_ymd_opt(2021, 1, 1).unwrap()
);
}
#[test]
fn excel_serial_negative_does_not_wrap() {
let value = excel_serial_to_datetime(-100.0);
let d = date(value);
assert!(d.year() < 1900, "got {d}");
}
#[test]
fn excel_serial_sub_day_near_midnight_carries_to_zero_time() {
let value = excel_serial_to_datetime(0.99999999);
match value {
CellValue::Time(t) => {
assert_eq!(t, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
}
other => panic!("expected Time(00:00:00), got {other:?}"),
}
}
#[test]
fn excel_serial_zero_returns_midnight_time() {
let value = excel_serial_to_datetime(0.0);
match value {
CellValue::Time(t) => {
assert_eq!(t, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
}
other => panic!("expected Time(00:00:00), got {other:?}"),
}
}
#[test]
fn excel_serial_negative_fractional_borrows_into_prior_day() {
let value = excel_serial_to_datetime(-0.5);
match value {
CellValue::DateTime(dt) => {
assert_eq!(
dt.date(),
NaiveDate::from_ymd_opt(1899, 12, 29).unwrap(),
"expected borrow into prior day, got {dt}",
);
assert_eq!(dt.time(), NaiveTime::from_hms_opt(12, 0, 0).unwrap());
}
other => panic!("expected DateTime, got {other:?}"),
}
}
#[test]
fn excel_serial_carries_near_midnight_fraction_to_next_day() {
let value = excel_serial_to_datetime(44197.0 + 0.99999999);
match value {
CellValue::DateTime(dt) => {
assert_eq!(dt.date(), NaiveDate::from_ymd_opt(2021, 1, 2).unwrap(),);
assert_eq!(dt.time(), NaiveTime::from_hms_opt(0, 0, 0).unwrap());
}
other => panic!("expected DateTime, got {other:?}"),
}
}
}