use std::io::{Read, Seek};
use crate::sheet::{Workbook, Worksheet as SheetTrait, WorksheetIterator};
use crate::ole::xls::error::{XlsError, XlsResult};
use crate::ole::xls::records::{RecordIter, BofRecord, BoundSheetRecord, SharedStringTable, XlsEncoding, BiffVersion, CellRecord, DimensionsRecord};
use crate::ole::xls::worksheet::XlsWorksheet;
use crate::ole::xls::cell::XlsCell;
use crate::ole::file::OleFile;
#[derive(Debug)]
pub struct XlsWorkbook<R: Read + Seek> {
ole_file: OleFile<R>,
worksheets: Vec<XlsWorksheet>,
worksheet_names: Vec<String>,
shared_strings: Option<Vec<String>>,
biff_version: BiffVersion,
is_1904_date_system: bool,
}
impl<R: Read + Seek> XlsWorkbook<R> {
pub fn new(reader: R) -> XlsResult<Self> {
let ole_file = OleFile::open(reader)?;
let mut workbook = XlsWorkbook {
ole_file,
worksheets: Vec::new(),
worksheet_names: Vec::new(),
shared_strings: None,
biff_version: BiffVersion::Biff8,
is_1904_date_system: false,
};
workbook.parse_workbook()?;
Ok(workbook)
}
fn parse_workbook(&mut self) -> XlsResult<()> {
let workbook_data = self.ole_file.open_stream(&["Workbook"])
.or_else(|_| self.ole_file.open_stream(&["Book"]))?;
let mut record_iter = RecordIter::new(std::io::Cursor::new(&workbook_data))?;
let mut encoding = XlsEncoding::from_codepage(1252)?; let mut bound_sheets = Vec::new();
let mut strings = Vec::new();
self.parse_workbook_globals(&mut record_iter, &mut encoding, &mut bound_sheets, &mut strings)?;
self.shared_strings = Some(strings);
self.worksheet_names = bound_sheets.iter().map(|s| s.name.clone()).collect();
println!("DEBUG: Found {} bound sheets", bound_sheets.len());
for bound_sheet in &bound_sheets {
println!("DEBUG: Parsing worksheet '{}' at position {}", bound_sheet.name, bound_sheet.position);
match self.parse_worksheet_from_position(bound_sheet, &encoding, &mut record_iter) {
Ok(worksheet) => {
println!("DEBUG: Successfully parsed worksheet");
self.worksheets.push(worksheet);
}
Err(e) => {
println!("DEBUG: Failed to parse worksheet {}: {}", bound_sheet.name, e);
}
}
}
Ok(())
}
fn parse_workbook_globals<Reader: Read + Seek>(
&mut self,
record_iter: &mut RecordIter<Reader>,
encoding: &mut XlsEncoding,
bound_sheets: &mut Vec<BoundSheetRecord>,
strings: &mut Vec<String>,
) -> XlsResult<()> {
let mut records = Vec::new();
for record_result in record_iter.by_ref() {
records.push(record_result?);
}
let mut i = 0;
while i < records.len() {
let record = &records[i];
match record.header.record_type {
0x0809 => { let bof = BofRecord::parse(&record.data)?;
self.biff_version = bof.version;
self.is_1904_date_system = bof.is_1904_date_system;
}
0x0042 => { if record.data.len() >= 2 {
let codepage = crate::ole::binary::read_u16_le_at(&record.data, 0)?;
*encoding = XlsEncoding::from_codepage(codepage)?;
}
}
0x0022 => { if record.data.len() >= 2 {
let flag = crate::ole::binary::read_u16_le_at(&record.data, 0)?;
self.is_1904_date_system = flag == 1;
}
}
0x0085 => { let sheet = BoundSheetRecord::parse(&record.data, encoding)?;
bound_sheets.push(sheet);
}
0x00FC => { let mut sst_records = vec![record.clone()];
let mut sst_idx = i + 1;
while sst_idx < records.len() && records[sst_idx].header.record_type == 0x003C {
sst_records.push(records[sst_idx].clone());
sst_idx += 1;
}
let sst = SharedStringTable::parse_from_records(&sst_records, encoding)?;
strings.extend(sst.strings);
i = sst_idx - 1;
}
0x000A => { break;
}
_ => {
}
}
i += 1;
}
Ok(())
}
fn parse_worksheet_from_position<Reader: Read + Seek>(
&self,
bound_sheet: &BoundSheetRecord,
encoding: &XlsEncoding,
record_iter: &mut RecordIter<Reader>,
) -> XlsResult<XlsWorksheet> {
record_iter.seek(bound_sheet.position as u64)?;
if let Some(record_result) = record_iter.next() {
let record = record_result?;
if record.header.record_type != 0x0809 { return Err(XlsError::UnexpectedRecordType {
expected: 0x0809,
found: record.header.record_type,
});
}
} else {
return Err(XlsError::Eof("Expected BOF record for worksheet"));
}
let shared_strings = self.shared_strings.as_ref().unwrap_or(&Vec::new()).clone();
Self::parse_worksheet_records(record_iter, encoding, &bound_sheet.name, shared_strings)
}
fn parse_worksheet_records<Reader: Read + Seek>(
record_iter: &mut RecordIter<Reader>,
encoding: &XlsEncoding,
name: &str,
shared_strings: Vec<String>
) -> XlsResult<XlsWorksheet> {
let mut worksheet = XlsWorksheet::with_shared_strings(name.to_string(), shared_strings);
for record_result in record_iter.by_ref() {
let record = record_result?;
match record.header.record_type {
0x0809 => { }
0x000A => { break;
}
0x0200 => { if let Ok(dimensions) = DimensionsRecord::parse(&record.data) {
worksheet.set_dimensions(dimensions.first_row, dimensions.last_row,
dimensions.first_col, dimensions.last_col);
}
}
0x0201 | 0x0203 | 0x0204 | 0x0205 | 0x027E | 0x00FD | 0x00BD | 0x0006 => {
let cell_record = CellRecord::parse(record.header.record_type, &record.data, encoding)?;
if let Some(cell) = XlsCell::from_record(&cell_record, worksheet.shared_strings()) {
worksheet.add_cell(cell);
}
}
_ => {
}
}
}
Ok(worksheet)
}
}
impl<R: Read + Seek> Workbook for XlsWorkbook<R> {
fn active_worksheet(&self) -> Result<Box<dyn SheetTrait + '_>, Box<dyn std::error::Error>> {
if self.worksheets.is_empty() {
return Err(Box::new(XlsError::WorksheetNotFound("No worksheets found".to_string())));
}
Ok(Box::new(self.worksheets[0].clone()))
}
fn worksheet_names(&self) -> Vec<String> {
self.worksheet_names.clone()
}
fn worksheet_by_name(&self, name: &str) -> Result<Box<dyn SheetTrait + '_>, Box<dyn std::error::Error>> {
for worksheet in &self.worksheets {
if worksheet.name() == name {
return Ok(Box::new(worksheet.clone()));
}
}
Err(Box::new(XlsError::WorksheetNotFound(name.to_string())))
}
fn worksheet_by_index(&self, index: usize) -> Result<Box<dyn SheetTrait + '_>, Box<dyn std::error::Error>> {
if index >= self.worksheets.len() {
return Err(Box::new(XlsError::WorksheetNotFound(format!("Index {} out of bounds", index))));
}
Ok(Box::new(self.worksheets[index].clone()))
}
fn worksheets(&self) -> Box<dyn WorksheetIterator<'_> + '_> {
Box::new(XlsWorksheetIterator {
worksheets: self.worksheets.iter().collect(),
index: 0,
})
}
fn worksheet_count(&self) -> usize {
self.worksheets.len()
}
fn active_sheet_index(&self) -> usize {
0 }
}
struct XlsWorksheetIterator<'a> {
worksheets: Vec<&'a XlsWorksheet>,
index: usize,
}
impl<'a> WorksheetIterator<'a> for XlsWorksheetIterator<'a> {
fn next(&mut self) -> Option<Result<Box<dyn SheetTrait + 'a>, Box<dyn std::error::Error>>> {
if self.index >= self.worksheets.len() {
None
} else {
let worksheet = self.worksheets[self.index];
self.index += 1;
Some(Ok(Box::new(worksheet.clone())))
}
}
}