#![deny(missing_docs)]
extern crate zip;
extern crate quick_xml;
extern crate encoding;
extern crate byteorder;
#[macro_use]
extern crate error_chain;
#[macro_use]
extern crate log;
mod errors;
mod utils;
mod xlsb;
mod xlsx;
mod xls;
mod cfb;
pub mod vba;
use std::path::Path;
use std::collections::HashMap;
use std::fs::File;
use std::slice::Chunks;
use std::str::FromStr;
use std::borrow::Cow;
pub use errors::*;
use vba::VbaProject;
#[derive(Debug, Clone, PartialEq)]
pub enum CellErrorType {
Div0,
NA,
Name,
Null,
Num,
Ref,
Value,
GettingData,
}
impl FromStr for CellErrorType {
type Err = errors::Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"#DIV/0!" => Ok(CellErrorType::Div0),
"#N/A" => Ok(CellErrorType::NA),
"#NAME?" => Ok(CellErrorType::Name),
"#NULL!" => Ok(CellErrorType::Null),
"#NUM!" => Ok(CellErrorType::Num),
"#REF!" => Ok(CellErrorType::Ref),
"#VALUE!" => Ok(CellErrorType::Value),
_ => Err(format!("{} is not an excel error", s).into()),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum DataType {
Int(i64),
Float(f64),
String(String),
Bool(bool),
Error(CellErrorType),
Empty,
}
enum FileType {
Xls(xls::Xls),
Xlsx(xlsx::Xlsx),
Xlsb(xlsb::Xlsb),
}
pub struct Excel {
file: FileType,
strings: Vec<String>,
relationships: HashMap<Vec<u8>, String>,
sheets: HashMap<String, String>,
}
macro_rules! inner {
($s:expr, $func:ident()) => {{
match $s.file {
FileType::Xls(ref mut f) => f.$func(),
FileType::Xlsx(ref mut f) => f.$func(),
FileType::Xlsb(ref mut f) => f.$func(),
}
}};
($s:expr, $func:ident($first_arg:expr $(, $args:expr)*)) => {{
match $s.file {
FileType::Xls(ref mut f) => f.$func($first_arg $(, $args)*),
FileType::Xlsx(ref mut f) => f.$func($first_arg $(, $args)*),
FileType::Xlsb(ref mut f) => f.$func($first_arg $(, $args)*),
}
}};
}
impl Excel {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Excel> {
let f = try!(File::open(&path));
let file = match path.as_ref().extension().and_then(|s| s.to_str()) {
Some("xls") | Some("xla") => FileType::Xls(try!(xls::Xls::new(f))),
Some("xlsx") | Some("xlsm") | Some("xlam") => FileType::Xlsx(try!(xlsx::Xlsx::new(f))),
Some("xlsb") => FileType::Xlsb(try!(xlsb::Xlsb::new(f))),
Some(e) => return Err(format!("unrecognized extension: {:?}", e).into()),
None => return Err("expecting a file with an extension".into()),
};
Ok(Excel {
file: file,
strings: vec![],
relationships: HashMap::new(),
sheets: HashMap::new(),
})
}
pub fn worksheet_range(&mut self, name: &str) -> Result<Range> {
if self.strings.is_empty() {
let strings = try!(inner!(self, read_shared_strings()));
self.strings = strings;
}
if self.relationships.is_empty() {
let rels = try!(inner!(self, read_relationships()));
self.relationships = rels;
}
if self.sheets.is_empty() {
let sheets = try!(inner!(self, read_sheets_names(&self.relationships)));
self.sheets = sheets;
}
match self.sheets.get(name) {
Some(ref p) => inner!(self, read_worksheet_range(p, &self.strings)),
None => Err(format!("Sheet '{}' does not exist", name).into()),
}
}
pub fn has_vba(&mut self) -> bool {
inner!(self, has_vba())
}
pub fn vba_project(&mut self) -> Result<Cow<VbaProject>> {
inner!(self, vba_project())
}
pub fn sheet_names(&mut self) -> Result<Vec<String>> {
if self.relationships.is_empty() {
let rels = try!(inner!(self, read_relationships()));
self.relationships = rels;
}
if self.sheets.is_empty() {
let sheets = try!(inner!(self, read_sheets_names(&self.relationships)));
self.sheets = sheets;
}
Ok(self.sheets.keys().map(|k| k.to_string()).collect())
}
}
pub trait ExcelReader: Sized {
fn new(f: File) -> Result<Self>;
fn has_vba(&mut self) -> bool;
fn vba_project(&mut self) -> Result<Cow<VbaProject>>;
fn read_shared_strings(&mut self) -> Result<Vec<String>>;
fn read_sheets_names(&mut self, relationships: &HashMap<Vec<u8>, String>) -> Result<HashMap<String, String>>;
fn read_relationships(&mut self) -> Result<HashMap<Vec<u8>, String>>;
fn read_worksheet_range(&mut self, path: &str, strings: &[String]) -> Result<Range>;
}
#[derive(Debug, Default, Clone)]
pub struct Range {
position: (u32, u32),
size: (usize, usize),
inner: Vec<DataType>,
}
pub struct Rows<'a> {
inner: Chunks<'a, DataType>,
}
impl Range {
pub fn new(position: (u32, u32), size: (usize, usize)) -> Range {
Range {
position: position,
size: size,
inner: vec![DataType::Empty; size.0 * size.1],
}
}
pub fn get_position(&self) -> (u32, u32) {
self.position
}
pub fn get_size(&self) -> (usize, usize) {
self.size
}
pub fn set_value(&mut self, pos: (u32, u32), value: DataType) {
assert!(self.position <= pos);
let idx = (pos.0 - self.position.0) * self.size.1 as u32 + pos.1 - self.position.1;
self.inner[idx as usize] = value;
}
pub fn get_value(&self, i: usize, j: usize) -> &DataType {
assert!((i, j) < self.size);
let idx = i * self.size.1 + j;
&self.inner[idx]
}
pub fn rows(&self) -> Rows {
let width = self.size.1;
Rows { inner: self.inner.chunks(width) }
}
}
impl<'a> Iterator for Rows<'a> {
type Item = &'a [DataType];
fn next(&mut self) -> Option<&'a [DataType]> {
self.inner.next()
}
}
#[test]
fn test_parse_error() {
assert_eq!(CellErrorType::from_str("#DIV/0!").unwrap(), CellErrorType::Div0);
assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA);
assert_eq!(CellErrorType::from_str("#NAME?").unwrap(), CellErrorType::Name);
assert_eq!(CellErrorType::from_str("#NULL!").unwrap(), CellErrorType::Null);
assert_eq!(CellErrorType::from_str("#NUM!").unwrap(), CellErrorType::Num);
assert_eq!(CellErrorType::from_str("#REF!").unwrap(), CellErrorType::Ref);
assert_eq!(CellErrorType::from_str("#VALUE!").unwrap(), CellErrorType::Value);
}
#[test]
fn test_dimensions() {
assert_eq!(utils::get_row_column(b"A1").unwrap(), (1, 1));
assert_eq!(utils::get_row_column(b"C107").unwrap(), (107, 3));
assert_eq!(utils::get_dimension(b"C2:D35").unwrap(), ((2, 3), (34, 2)));
}