#![deny(missing_docs)]
extern crate zip;
extern crate quick_xml;
extern crate encoding_rs;
extern crate byteorder;
#[macro_use]
extern crate error_chain;
#[macro_use]
extern crate log;
mod errors;
mod utils;
mod xlsb;
mod xlsx;
mod xls;
mod cfb;
pub mod vba;
use std::path::Path;
use std::collections::HashMap;
use std::fs::File;
use std::str::FromStr;
use std::borrow::Cow;
pub use errors::*;
use vba::VbaProject;
#[derive(Debug, Clone, PartialEq)]
pub enum CellErrorType {
Div0,
NA,
Name,
Null,
Num,
Ref,
Value,
GettingData,
}
impl FromStr for CellErrorType {
type Err = errors::Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"#DIV/0!" => Ok(CellErrorType::Div0),
"#N/A" => Ok(CellErrorType::NA),
"#NAME?" => Ok(CellErrorType::Name),
"#NULL!" => Ok(CellErrorType::Null),
"#NUM!" => Ok(CellErrorType::Num),
"#REF!" => Ok(CellErrorType::Ref),
"#VALUE!" => Ok(CellErrorType::Value),
_ => Err(format!("{} is not an excel error", s).into()),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum DataType {
Int(i64),
Float(f64),
String(String),
Bool(bool),
Error(CellErrorType),
Empty,
}
enum FileType {
Xls(xls::Xls),
Xlsx(xlsx::Xlsx),
Xlsb(xlsb::Xlsb),
}
pub struct Excel {
file: FileType,
strings: Vec<String>,
relationships: HashMap<Vec<u8>, String>,
sheets: Vec<(String, String)>,
}
macro_rules! inner {
($s:expr, $func:ident()) => {{
match $s.file {
FileType::Xls(ref mut f) => f.$func(),
FileType::Xlsx(ref mut f) => f.$func(),
FileType::Xlsb(ref mut f) => f.$func(),
}
}};
($s:expr, $func:ident($first_arg:expr $(, $args:expr)*)) => {{
match $s.file {
FileType::Xls(ref mut f) => f.$func($first_arg $(, $args)*),
FileType::Xlsx(ref mut f) => f.$func($first_arg $(, $args)*),
FileType::Xlsb(ref mut f) => f.$func($first_arg $(, $args)*),
}
}};
}
impl Excel {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Excel> {
let f = File::open(&path)?;
let file = match path.as_ref().extension().and_then(|s| s.to_str()) {
Some("xls") | Some("xla") => FileType::Xls(xls::Xls::new(f)?),
Some("xlsx") | Some("xlsm") | Some("xlam") => FileType::Xlsx(xlsx::Xlsx::new(f)?),
Some("xlsb") => FileType::Xlsb(xlsb::Xlsb::new(f)?),
Some(e) => return Err(ErrorKind::InvalidExtension(e.to_string()).into()),
None => return Err(ErrorKind::InvalidExtension("".to_string()).into()),
};
Ok(Excel {
file: file,
strings: vec![],
relationships: HashMap::new(),
sheets: Vec::new(),
})
}
pub fn worksheet_range(&mut self, name: &str) -> Result<Range> {
self.initialize()?;
let &(_, ref p) = self.sheets
.iter()
.find(|&&(ref n, _)| n == name)
.ok_or_else(|| ErrorKind::WorksheetName(name.to_string()))?;
inner!(self, read_worksheet_range(p, &self.strings))
}
pub fn worksheet_range_by_index(&mut self, idx: usize) -> Result<Range> {
self.initialize()?;
let &(_, ref p) = self.sheets.get(idx).ok_or(ErrorKind::WorksheetIndex(idx))?;
inner!(self, read_worksheet_range(p, &self.strings))
}
fn initialize(&mut self) -> Result<()> {
if self.strings.is_empty() {
self.strings = inner!(self, read_shared_strings())?;
}
if self.relationships.is_empty() {
self.relationships = inner!(self, read_relationships())?;
}
if self.sheets.is_empty() {
self.sheets = inner!(self, read_sheets_names(&self.relationships))?;
}
Ok(())
}
pub fn has_vba(&mut self) -> bool {
inner!(self, has_vba())
}
pub fn vba_project(&mut self) -> Result<Cow<VbaProject>> {
inner!(self, vba_project())
}
pub fn sheet_names(&mut self) -> Result<Vec<String>> {
self.initialize()?;
Ok(self.sheets.iter().map(|&(ref k, _)| k.to_string()).collect())
}
}
pub trait ExcelReader: Sized {
fn new(f: File) -> Result<Self>;
fn has_vba(&mut self) -> bool;
fn vba_project(&mut self) -> Result<Cow<VbaProject>>;
fn read_shared_strings(&mut self) -> Result<Vec<String>>;
fn read_sheets_names(&mut self,
relationships: &HashMap<Vec<u8>, String>)
-> Result<Vec<(String, String)>>;
fn read_relationships(&mut self) -> Result<HashMap<Vec<u8>, String>>;
fn read_worksheet_range(&mut self, path: &str, strings: &[String]) -> Result<Range>;
}
#[derive(Debug, Clone)]
pub struct Cell {
pos: (u32, u32),
val: DataType,
}
impl Cell {
pub fn new(position: (u32, u32), value: DataType) -> Cell {
Cell {
pos: position,
val: value,
}
}
pub fn get_position(&self) -> (u32, u32) {
self.pos
}
pub fn get_value(&self) -> &DataType {
&self.val
}
}
#[derive(Debug, Default, Clone)]
pub struct Range {
start: (u32, u32),
end: (u32, u32),
inner: Vec<DataType>,
}
impl Range {
pub fn new(start: (u32, u32), end: (u32, u32)) -> Range {
Range {
start: start,
end: end,
inner: vec![DataType::Empty; ((end.0 - start.0 + 1)
* (end.1 - start.1 + 1)) as usize],
}
}
pub fn from_sparse(cells: Vec<Cell>) -> Range {
if cells.is_empty() {
Range {
start: (0, 0),
end: (0, 0),
inner: Vec::new(),
}
} else {
let row_start = cells.first().unwrap().pos.0;
let row_end = cells.last().unwrap().pos.0;
let mut col_start = ::std::u32::MAX;
let mut col_end = 0;
for c in cells.iter().map(|c| c.pos.1) {
if c < col_start {
col_start = c;
}
if c > col_end {
col_end = c
}
}
let width = col_end - col_start + 1;
let len = ((row_end - row_start + 1) * width) as usize;
let mut v = vec![DataType::Empty; len];
v.shrink_to_fit();
for c in cells {
let idx = ((c.pos.0 - row_start) * width + (c.pos.1 - col_start)) as usize;
v[idx] = c.val;
}
Range {
start: (row_start, col_start),
end: (row_end, col_end),
inner: v,
}
}
}
pub fn start(&self) -> (u32, u32) {
self.start
}
pub fn end(&self) -> (u32, u32) {
self.end
}
pub fn width(&self) -> usize {
(self.end.1 - self.start.1 + 1) as usize
}
pub fn height(&self) -> usize {
(self.end.0 - self.start.0 + 1) as usize
}
pub fn get_size(&self) -> (usize, usize) {
(self.height(), self.width())
}
pub fn is_empty(&self) -> bool {
self.start.0 > self.end.0 || self.start.1 > self.end.1
}
pub fn set_value(&mut self, pos: (u32, u32), value: DataType) -> Result<()> {
if self.start > pos {
return Err(ErrorKind::CellOutOfRange(pos, self.start).into());
}
match (self.end.0 < pos.0, self.end.1 < pos.1) {
(false, false) => (), (true, false) => {
let len = (pos.0 - self.end.0 + 1) as usize * self.width();
self.inner.extend_from_slice(&vec![DataType::Empty; len]);
self.end.0 = pos.0;
} (e, true) => {
let height = if e {
(pos.0 - self.start.0 + 1) as usize
} else {
self.height()
};
let width = (pos.1 - self.start.1 + 1) as usize;
let old_width = self.width();
let mut data = Vec::with_capacity(width * height);
for sce in self.inner.chunks(old_width) {
data.extend_from_slice(sce);
data.extend_from_slice(&vec![DataType::Empty; width - old_width]);
}
data.extend_from_slice(&vec![DataType::Empty; width * (height - self.height())]);
if e {
self.end = pos
} else {
self.end.1 = pos.1
}
self.inner = data;
} }
let pos = (pos.0 - self.start.0, pos.1 - self.start.1);
let idx = pos.0 as usize * self.width() + pos.1 as usize;
self.inner[idx] = value;
Ok(())
}
pub fn get_value(&self, pos: (u32, u32)) -> &DataType {
assert!(pos <= self.end);
let idx = (pos.0 - self.start.0) as usize * self.width() + (pos.1 - self.start.1) as usize;
&self.inner[idx]
}
pub fn rows(&self) -> Rows {
if self.inner.is_empty() {
Rows { inner: None }
} else {
let width = self.width();
Rows { inner: Some(self.inner.chunks(width)) }
}
}
pub fn used_cells(&self) -> UsedCells {
UsedCells {
width: self.width(),
inner: self.inner.iter().enumerate(),
}
}
}
#[derive(Debug)]
pub struct UsedCells<'a> {
width: usize,
inner: ::std::iter::Enumerate<::std::slice::Iter<'a, DataType>>,
}
impl<'a> Iterator for UsedCells<'a> {
type Item = (usize, usize, &'a DataType);
fn next(&mut self) -> Option<Self::Item> {
self.inner
.by_ref()
.find(|&(_, v)| v != &DataType::Empty)
.map(|(i, v)| {
let row = i / self.width;
let col = i % self.width;
(row, col, v)
})
}
}
#[derive(Debug)]
pub struct Rows<'a> {
inner: Option<::std::slice::Chunks<'a, DataType>>,
}
impl<'a> Iterator for Rows<'a> {
type Item = &'a [DataType];
fn next(&mut self) -> Option<Self::Item> {
self.inner.as_mut().and_then(|c| c.next())
}
}
#[test]
fn test_parse_error() {
assert_eq!(CellErrorType::from_str("#DIV/0!").unwrap(),
CellErrorType::Div0);
assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA);
assert_eq!(CellErrorType::from_str("#NAME?").unwrap(),
CellErrorType::Name);
assert_eq!(CellErrorType::from_str("#NULL!").unwrap(),
CellErrorType::Null);
assert_eq!(CellErrorType::from_str("#NUM!").unwrap(),
CellErrorType::Num);
assert_eq!(CellErrorType::from_str("#REF!").unwrap(),
CellErrorType::Ref);
assert_eq!(CellErrorType::from_str("#VALUE!").unwrap(),
CellErrorType::Value);
}