use std::borrow::Cow;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::{Read, Seek};
use std::str::FromStr;
use quick_xml::events::attributes::{Attribute, Attributes};
use quick_xml::events::{BytesStart, Event};
use quick_xml::Reader as XmlReader;
use zip::read::{ZipArchive, ZipFile};
use zip::result::ZipError;
use vba::VbaProject;
use {Cell, CellErrorType, DataType, Metadata, Range, Reader};
type XlsReader<'a> = XmlReader<BufReader<ZipFile<'a>>>;
#[derive(Debug)]
pub enum XlsxError {
Io(::std::io::Error),
Zip(::zip::result::ZipError),
Vba(::vba::VbaError),
Xml(::quick_xml::Error),
Parse(::std::string::ParseError),
ParseFloat(::std::num::ParseFloatError),
ParseInt(::std::num::ParseIntError),
XmlEof(&'static str),
UnexpectedNode(&'static str),
FileNotFound(String),
Alphanumeric(u8),
NumericColumn(u8),
DimensionCount(usize),
CellTAttribute(String),
CellRAttribute,
Unexpected(&'static str),
CellError(String),
}
from_err!(::std::io::Error, XlsxError, Io);
from_err!(::zip::result::ZipError, XlsxError, Zip);
from_err!(::vba::VbaError, XlsxError, Vba);
from_err!(::quick_xml::Error, XlsxError, Xml);
from_err!(::std::string::ParseError, XlsxError, Parse);
from_err!(::std::num::ParseFloatError, XlsxError, ParseFloat);
from_err!(::std::num::ParseIntError, XlsxError, ParseInt);
impl std::fmt::Display for XlsxError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
XlsxError::Io(e) => write!(f, "I/O error: {}", e),
XlsxError::Zip(e) => write!(f, "Zip error: {}", e),
XlsxError::Xml(e) => write!(f, "Xml error: {}", e),
XlsxError::Vba(e) => write!(f, "Vba error: {}", e),
XlsxError::Parse(e) => write!(f, "Parse string error: {}", e),
XlsxError::ParseInt(e) => write!(f, "Parse integer error: {}", e),
XlsxError::ParseFloat(e) => write!(f, "Parse float error: {}", e),
XlsxError::XmlEof(e) => write!(f, "Unexpected end of xml, expecting '</{}>'", e),
XlsxError::UnexpectedNode(e) => write!(f, "Expecting '{}' node", e),
XlsxError::FileNotFound(e) => write!(f, "File not found '{}'", e),
XlsxError::Alphanumeric(e) => {
write!(f, "Expecting alphanumeric character, got {:X}", e)
}
XlsxError::NumericColumn(e) => write!(
f,
"Numeric character is not allowed for column name, got {}",
e
),
XlsxError::DimensionCount(e) => {
write!(f, "Range dimension must be lower than 2. Got {}", e)
}
XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {:?}", e),
XlsxError::CellRAttribute => write!(f, "Cell missing 'r' attribute"),
XlsxError::Unexpected(e) => write!(f, "{}", e),
XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{}'", e),
}
}
}
impl std::error::Error for XlsxError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
XlsxError::Io(e) => Some(e),
XlsxError::Zip(e) => Some(e),
XlsxError::Xml(e) => Some(e),
XlsxError::Vba(e) => Some(e),
XlsxError::Parse(e) => Some(e),
XlsxError::ParseInt(e) => Some(e),
XlsxError::ParseFloat(e) => Some(e),
_ => None,
}
}
}
impl FromStr for CellErrorType {
type Err = XlsxError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"#DIV/0!" => Ok(CellErrorType::Div0),
"#N/A" => Ok(CellErrorType::NA),
"#NAME?" => Ok(CellErrorType::Name),
"#NULL!" => Ok(CellErrorType::Null),
"#NUM!" => Ok(CellErrorType::Num),
"#REF!" => Ok(CellErrorType::Ref),
"#VALUE!" => Ok(CellErrorType::Value),
_ => return Err(XlsxError::CellError(s.into())),
}
}
}
pub struct Xlsx<RS>
where
RS: Read + Seek,
{
zip: ZipArchive<RS>,
strings: Vec<String>,
sheets: Vec<(String, String)>,
metadata: Metadata,
}
impl<RS: Read + Seek> Xlsx<RS> {
fn read_shared_strings(&mut self) -> Result<(), XlsxError> {
let mut xml = match xml_reader(&mut self.zip, "xl/sharedStrings.xml") {
None => return Ok(()),
Some(x) => x?,
};
let mut buf = Vec::new();
loop {
buf.clear();
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name() == b"si" => {
if let Some(s) = read_string(&mut xml, e.name())? {
self.strings.push(s);
}
}
Ok(Event::End(ref e)) if e.local_name() == b"sst" => break,
Ok(Event::Eof) => return Err(XlsxError::XmlEof("sst")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
Ok(())
}
fn read_workbook(&mut self, relationships: &HashMap<Vec<u8>, String>) -> Result<(), XlsxError> {
let mut xml = match xml_reader(&mut self.zip, "xl/workbook.xml") {
None => return Ok(()),
Some(x) => x?,
};
let mut defined_names = Vec::new();
let mut buf = Vec::new();
let mut val_buf = Vec::new();
loop {
buf.clear();
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name() == b"sheet" => {
let mut name = String::new();
let mut path = String::new();
for a in e.attributes() {
let a = a?;
match a {
Attribute { key: b"name", .. } => {
name = a.unescape_and_decode_value(&xml)?;
}
Attribute {
key: b"r:id",
value: v,
} => {
let r = &relationships[&*v][..];
path = if r.starts_with("/xl/") {
r[1..].to_string()
} else if r.starts_with("xl/") {
r.to_string()
} else {
format!("xl/{}", r)
};
}
_ => (),
}
}
self.sheets.push((name, path));
}
Ok(Event::Start(ref e)) if e.local_name() == b"definedName" => {
if let Some(a) = e
.attributes()
.filter_map(|a| a.ok())
.find(|a| a.key == b"name")
{
let name = a.unescape_and_decode_value(&xml)?;
val_buf.clear();
let value = xml.read_text(b"definedName", &mut val_buf)?;
defined_names.push((name, value));
}
}
Ok(Event::End(ref e)) if e.local_name() == b"workbook" => break,
Ok(Event::Eof) => return Err(XlsxError::XmlEof("workbook")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
self.metadata.names = defined_names;
self.metadata.sheets = self.sheets.iter().map(|&(ref s, _)| s.clone()).collect();
Ok(())
}
fn read_relationships(&mut self) -> Result<HashMap<Vec<u8>, String>, XlsxError> {
let mut xml = match xml_reader(&mut self.zip, "xl/_rels/workbook.xml.rels") {
None => {
return Err(XlsxError::FileNotFound(
"xl/_rels/workbook.xml.rels".to_string(),
));
}
Some(x) => x?,
};
let mut relationships = HashMap::new();
let mut buf = Vec::new();
loop {
buf.clear();
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name() == b"Relationship" => {
let mut id = Vec::new();
let mut target = String::new();
for a in e.attributes() {
match a? {
Attribute {
key: b"Id",
value: v,
} => id.extend_from_slice(&v),
Attribute {
key: b"Target",
value: v,
} => target = xml.decode(&v).into_owned(),
_ => (),
}
}
relationships.insert(id, target);
}
Ok(Event::End(ref e)) if e.local_name() == b"Relationships" => break,
Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
Ok(relationships)
}
}
fn worksheet<T, F>(
strings: &[String],
mut xml: XlsReader,
read_data: &mut F,
) -> Result<Range<T>, XlsxError>
where
T: Default + Clone + PartialEq,
F: FnMut(&[String], &mut XlsReader, &mut Vec<Cell<T>>) -> Result<(), XlsxError>,
{
let mut cells = Vec::new();
let mut buf = Vec::new();
'xml: loop {
buf.clear();
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.local_name() {
b"dimension" => {
for a in e.attributes() {
if let Attribute {
key: b"ref",
value: rdim,
} = a?
{
let (start, end) = get_dimension(&rdim)?;
let len = (end.0 - start.0 + 1) * (end.1 - start.1 + 1);
if len < 1_000_000 {
cells.reserve(len as usize);
}
continue 'xml;
}
}
return Err(XlsxError::UnexpectedNode("dimension"));
}
b"sheetData" => {
read_data(&strings, &mut xml, &mut cells)?;
break;
}
_ => (),
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
Ok(Range::from_sparse(cells))
}
impl<RS: Read + Seek> Reader for Xlsx<RS> {
type RS = RS;
type Error = XlsxError;
fn new(reader: RS) -> Result<Self, XlsxError>
where
RS: Read + Seek,
{
let mut xlsx = Xlsx {
zip: ZipArchive::new(reader)?,
strings: Vec::new(),
sheets: Vec::new(),
metadata: Metadata::default(),
};
xlsx.read_shared_strings()?;
let relationships = xlsx.read_relationships()?;
xlsx.read_workbook(&relationships)?;
Ok(xlsx)
}
fn vba_project(&mut self) -> Option<Result<Cow<VbaProject>, XlsxError>> {
self.zip.by_name("xl/vbaProject.bin").ok().map(|mut f| {
let len = f.size() as usize;
VbaProject::new(&mut f, len)
.map(Cow::Owned)
.map_err(XlsxError::Vba)
})
}
fn metadata(&self) -> &Metadata {
&self.metadata
}
fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, XlsxError>> {
let xml = match self.sheets.iter().find(|&&(ref n, _)| n == name) {
Some(&(_, ref path)) => xml_reader(&mut self.zip, path),
None => return None,
};
let strings = &self.strings;
xml.map(|xml| {
worksheet(strings, xml?, &mut |s, xml, cells| {
read_sheet_data(xml, s, cells)
})
})
}
fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, XlsxError>> {
let xml = match self.sheets.iter().find(|&&(ref n, _)| n == name) {
Some(&(_, ref path)) => xml_reader(&mut self.zip, path),
None => return None,
};
let strings = &self.strings;
xml.map(|xml| {
worksheet(strings, xml?, &mut |_, xml, cells| {
read_sheet(xml, cells, &mut |cells, xml, e, pos, _| {
match e.local_name() {
b"is" | b"v" => xml.read_to_end(e.name(), &mut Vec::new())?,
b"f" => {
let f = xml.read_text(e.name(), &mut Vec::new())?;
if !f.is_empty() {
cells.push(Cell::new(pos, f));
}
}
_ => return Err(XlsxError::UnexpectedNode("v, f, or is")),
}
Ok(())
})
})
})
}
}
fn xml_reader<'a, RS>(
zip: &'a mut ZipArchive<RS>,
path: &str,
) -> Option<Result<XlsReader<'a>, XlsxError>>
where
RS: Read + Seek,
{
match zip.by_name(path) {
Ok(f) => {
let mut r = XmlReader::from_reader(BufReader::new(f));
r.check_end_names(false)
.trim_text(false)
.check_comments(false)
.expand_empty_elements(true);
Some(Ok(r))
}
Err(ZipError::FileNotFound) => None,
Err(e) => Some(Err(e.into())),
}
}
fn get_attribute<'a>(atts: Attributes<'a>, n: &[u8]) -> Result<Option<&'a [u8]>, XlsxError> {
for a in atts {
match a {
Ok(Attribute {
key,
value: Cow::Borrowed(value),
}) if key == n => return Ok(Some(value)),
Err(e) => return Err(XlsxError::Xml(e)),
_ => {}
}
}
Ok(None)
}
fn read_sheet<T, F>(
xml: &mut XlsReader,
cells: &mut Vec<Cell<T>>,
push_cell: &mut F,
) -> Result<(), XlsxError>
where
T: Clone + Default + PartialEq,
F: FnMut(
&mut Vec<Cell<T>>,
&mut XlsReader,
&BytesStart,
(u32, u32),
&BytesStart,
) -> Result<(), XlsxError>,
{
let mut buf = Vec::new();
let mut cell_buf = Vec::new();
loop {
buf.clear();
match xml.read_event(&mut buf) {
Ok(Event::Start(ref c_element)) if c_element.local_name() == b"c" => {
let pos = get_attribute(c_element.attributes(), b"r")
.and_then(|o| o.ok_or(XlsxError::CellRAttribute))
.and_then(get_row_column)?;
loop {
cell_buf.clear();
match xml.read_event(&mut cell_buf) {
Ok(Event::Start(ref e)) => push_cell(cells, xml, e, pos, c_element)?,
Ok(Event::End(ref e)) if e.local_name() == b"c" => break,
Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
}
Ok(Event::End(ref e)) if e.local_name() == b"sheetData" => return Ok(()),
Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
}
fn read_sheet_data(
xml: &mut XlsReader,
strings: &[String],
cells: &mut Vec<Cell<DataType>>,
) -> Result<(), XlsxError> {
fn read_value<'a>(
v: String,
strings: &[String],
atts: Attributes<'a>,
) -> Result<DataType, XlsxError> {
match get_attribute(atts, b"t")? {
Some(b"s") => {
let idx: usize = v.parse()?;
Ok(DataType::String(strings[idx].clone()))
}
Some(b"b") => {
Ok(DataType::Bool(v != "0"))
}
Some(b"e") => {
Ok(DataType::Error(v.parse()?))
}
Some(b"d") => {
Ok(DataType::String(v))
}
Some(b"str") => {
v.parse()
.map(DataType::Float)
.map_err(XlsxError::ParseFloat)
.or_else::<XlsxError, _>(|_| Ok(DataType::String(v)))
}
Some(b"n") => {
v.parse()
.map(DataType::Float)
.map_err(XlsxError::ParseFloat)
}
None => {
v.parse()
.map(DataType::Float)
.map_err(XlsxError::ParseFloat)
.or_else::<XlsxError, _>(|_| Ok(DataType::String(v)))
}
Some(b"is") => {
return Err(XlsxError::Unexpected(
"called read_value on a cell of type inlineStr",
));
}
Some(t) => {
let t = ::std::str::from_utf8(t)
.unwrap_or("<utf8 error>")
.to_string();
return Err(XlsxError::CellTAttribute(t));
}
}
}
read_sheet(xml, cells, &mut |cells, xml, e, pos, c_element| {
match e.local_name() {
b"is" => {
if let Some(s) = read_string(xml, e.name())? {
cells.push(Cell::new(pos, DataType::String(s)));
}
}
b"v" => {
let v = xml.read_text(e.name(), &mut Vec::new())?;
match read_value(v, strings, c_element.attributes())? {
DataType::Empty => (),
v => cells.push(Cell::new(pos, v)),
}
}
b"f" => xml.read_to_end(e.name(), &mut Vec::new())?,
_n => return Err(XlsxError::UnexpectedNode("v, f, or is")),
}
Ok(())
})
}
fn get_dimension(dimension: &[u8]) -> Result<((u32, u32), (u32, u32)), XlsxError> {
let parts: Vec<_> = dimension
.split(|c| *c == b':')
.map(|s| get_row_column(s))
.collect::<Result<Vec<_>, XlsxError>>()?;
match parts.len() {
0 => Err(XlsxError::DimensionCount(0)),
1 => Ok((parts[0], parts[0])),
2 => Ok((parts[0], parts[1])),
len => Err(XlsxError::DimensionCount(len)),
}
}
fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> {
let (mut row, mut col) = (0, 0);
let mut pow = 1;
let mut readrow = true;
for c in range.iter().rev() {
match *c {
c @ b'0'..=b'9' => {
if readrow {
row += ((c - b'0') as u32) * pow;
pow *= 10;
} else {
return Err(XlsxError::NumericColumn(c));
}
}
c @ b'A'..=b'Z' => {
if readrow {
pow = 1;
readrow = false;
}
col += ((c - b'A') as u32 + 1) * pow;
pow *= 26;
}
c @ b'a'..=b'z' => {
if readrow {
pow = 1;
readrow = false;
}
col += ((c - b'a') as u32 + 1) * pow;
pow *= 26;
}
_ => return Err(XlsxError::Alphanumeric(*c)),
}
}
Ok((row - 1, col - 1))
}
fn read_string(xml: &mut XlsReader, closing: &[u8]) -> Result<Option<String>, XlsxError> {
let mut buf = Vec::new();
let mut val_buf = Vec::new();
let mut rich_buffer: Option<String> = None;
loop {
buf.clear();
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name() == b"r" => {
if rich_buffer.is_none() {
rich_buffer = Some(String::new());
}
}
Ok(Event::End(ref e)) if e.local_name() == closing => {
return Ok(rich_buffer);
}
Ok(Event::Start(ref e)) if e.local_name() == b"t" => {
val_buf.clear();
let value = xml.read_text(e.name(), &mut val_buf)?;
if let Some(ref mut s) = rich_buffer {
s.push_str(&value);
} else {
xml.read_to_end(closing, &mut val_buf)?;
return Ok(Some(value));
}
}
Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
}
#[test]
fn test_dimensions() {
assert_eq!(get_row_column(b"A1").unwrap(), (0, 0));
assert_eq!(get_row_column(b"C107").unwrap(), (106, 2));
assert_eq!(get_dimension(b"C2:D35").unwrap(), ((1, 2), (34, 3)));
}
#[test]
fn test_parse_error() {
assert_eq!(
CellErrorType::from_str("#DIV/0!").unwrap(),
CellErrorType::Div0
);
assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA);
assert_eq!(
CellErrorType::from_str("#NAME?").unwrap(),
CellErrorType::Name
);
assert_eq!(
CellErrorType::from_str("#NULL!").unwrap(),
CellErrorType::Null
);
assert_eq!(
CellErrorType::from_str("#NUM!").unwrap(),
CellErrorType::Num
);
assert_eq!(
CellErrorType::from_str("#REF!").unwrap(),
CellErrorType::Ref
);
assert_eq!(
CellErrorType::from_str("#VALUE!").unwrap(),
CellErrorType::Value
);
}