1use quick_xml::events::BytesStart;
2use zip::read::{ZipArchive, ZipFile};
3use indicatif::ProgressBar;
4use std::fs::File;
5use std::fmt;
6use std::io::BufReader;
7use quick_xml::{
8 Reader,
9 events::{
10 Event, BytesText,
11 attributes::Attribute
12 },
13};
14use ndarray::{Array2, Array, s, Axis, ArrayView};
15use crate::{
16 evaluate::{
17 value::Value,
18 evaluate_expr_with_context,
19 ensure_non_range
20 },
21 utils::adjust_formula,
22 dependency::{CellId, DependencyTree},
23 utils::excel_to_date,
24 reference::Reference,
25 parser::{
26 parse_str,
27 ast::Expr
28 },
29 cell::Cell,
30 errors::Error
31};
32
33pub type ZipType = ZipArchive<File>;
34
35pub struct Book {
36 zip: Option<ZipType>,
37 pub sheets: Vec<Sheet>,
38 shared_strings: Vec<SharedString>,
39 styles: Vec<Style>,
40 pub current_sheet: usize,
41 pub dependencies: DependencyTree,
42 }
44
45impl From<String> for Book {
46 fn from(s: String) -> Self {
47 let zip = Self::zip_from_path(&s);
48 Book { zip: Some(zip), sheets: vec![], shared_strings: vec![], styles: vec![], current_sheet: 0, dependencies: DependencyTree::new() }
49 }
50}
51
52impl From<&str> for Book {
53 fn from(s: &str) -> Self {
54 Book::from(s.to_string())
55 }
56}
57
58impl Default for Book {
59 fn default() -> Self {
60 Self::new()
61 }
62}
63
64impl Book {
65 pub fn new() -> Book {
66 Book { zip: None, sheets: vec![], shared_strings: vec![], styles: vec![], current_sheet: 0, dependencies: DependencyTree::new() }
67 }
68
69 pub fn load(&mut self, progress: bool) -> Result<(), Error> {
70 self.load_sheet_names()?;
71 self.load_shared_strings()?;
72 self.load_styles()?;
73 self.load_sheets_dimensions()?;
74 self.load_sheets(progress)?;
75 Ok(())
76 }
77
78 pub fn load_shared_strings(&mut self) -> Result<(), Error> {
79 let mut buf = Vec::new();
80 if let Ok(f) = self.zip.as_mut().unwrap().by_name("xl/sharedStrings.xml") {
81 let mut reader: Reader<BufReader<ZipFile>> = Reader::<BufReader<ZipFile>>::from_reader(BufReader::new(f));
82 let mut is_shared_string: bool = false;
83 loop {
84 match reader.read_event(&mut buf) {
85 Ok(Event::Start(ref e)) if e.name() == b"t" => {
86 is_shared_string = true;
87 },
88 Ok(Event::Text(ref e)) => {
89 if is_shared_string {
90 let decoded_text: String = Self::decode_text_event(&reader, e);
91 if !decoded_text.is_empty() {
92 self.shared_strings.push(
93 SharedString(Self::decode_text_event(&reader, e))
94 )
95 }
96 }
97 },
98 Ok(Event::Eof) => break,
99 _ => {}
100 }
101 buf.clear();
102 }
103 }
104 Ok(())
105 }
106
107 pub fn load_styles(&mut self) -> Result<(), Error> {
108 let mut buf = Vec::new();
109 if let Ok(f) = self.zip.as_mut().unwrap().by_name("xl/styles.xml") {
110 let mut reader: Reader<BufReader<ZipFile>> = Reader::<BufReader<ZipFile>>::from_reader(BufReader::new(f));
111 let mut is_cell_xfs: bool = false;
112 loop {
113 match reader.read_event(&mut buf) {
114 Ok(Event::Start(ref e)) if e.name() == b"cellXfs" || e.name() == b"xf" => {
115 match e.name() {
116 b"cellXfs" => { is_cell_xfs = true; },
117 b"xf" => {
118 if is_cell_xfs {
119 self.styles.push(Book::decode_style(&reader, e));
120 }
121 },
122 _ => {}
123 }
124 },
125 Ok(Event::Empty(ref e)) if e.name() == b"xf" => {
126 if is_cell_xfs {
127 self.styles.push(Book::decode_style(&reader, e));
128 }
129 },
130 Ok(Event::End(ref e)) if e.name() == b"cellXfs" => { is_cell_xfs = false; },
131 Ok(Event::Eof) => break,
132 _ => {}
133 }
134 buf.clear();
135 }
136 }
137 Ok(())
138 }
139
140 pub fn load_sheet_names(&mut self) -> Result<(), Error> {
141 let mut buf = Vec::new();
142 if let Ok(f) = self.zip.as_mut().unwrap().by_name("xl/workbook.xml") {
143 let mut reader: Reader<BufReader<ZipFile>> = Reader::<BufReader<ZipFile>>::from_reader(BufReader::new(f));
144 loop {
145 match reader.read_event(&mut buf) {
146 Ok(Event::Empty(ref e)) if e.local_name() == b"sheet" => {
147 for a in e.attributes() {
148 let a = a.unwrap();
149 if let b"name" = a.key {
150 let name = a.unescape_and_decode_value(&reader).unwrap();
151 self.sheets.push(Sheet::from(name));
152 }
153 }
154 },
155 Ok(Event::Eof) => break,
156 _ => {}
157 }
158 buf.clear();
159 }
160 }
161 Ok(())
162 }
163
164 pub fn load_sheets(&mut self, progress: bool) -> Result<(), Error> {
165 for sheet_id in 0..self.sheets.len() {
166 self.load_sheet(sheet_id, progress)?;
167 }
168 Ok(())
169 }
170
171 pub fn load_sheets_dimensions(&mut self) -> Result<(), Error> {
172 for sheet_id in 0..self.sheets.len() {
173 self.load_sheet_dimensions(sheet_id)?;
174 }
175 Ok(())
176 }
177
178 pub fn load_sheet_dimensions(&mut self, sheet_idx: usize) -> Result<(), Error> {
179 let mut buf = Vec::new();
180 if let Ok(f) = self.zip.as_mut().unwrap().by_name(&format!("xl/worksheets/sheet{}.xml", sheet_idx + 1)) {
181 let mut reader: Reader<BufReader<ZipFile>> = Reader::<BufReader<ZipFile>>::from_reader(BufReader::new(f));
182 loop {
183 match reader.read_event(&mut buf) {
184 Ok(Event::Empty(ref e)) if e.name() == b"dimension" => {
185 for a in e.attributes() {
186 let a = a.unwrap();
187 if let b"ref" = a.key {
188 let dimension: String = a.unescape_and_decode_value(&reader).unwrap();
189 let (row, column, num_rows, num_cols) = Reference::from(dimension.clone()).get_dimensions();
190 let sheet: &mut Sheet = self.sheets.get_mut(sheet_idx).unwrap();
191 sheet.cells = Array::from_elem((num_rows + row, num_cols + column), Value::Empty);
192 sheet.max_rows = num_rows + row;
193 sheet.max_columns = num_cols + column;
194 }
195 }
196 break
197 },
198 _ => {}
199 }
200 }
201 }
202 Ok(())
203 }
204
205 pub fn load_sheet(&mut self, sheet_idx: usize, progress: bool) -> Result<(), Error> {
206 let mut buf = Vec::new();
207 let max_rows = self.get_sheet_by_idx(sheet_idx).max_rows.clone();
208 let max_columns = self.get_sheet_by_idx(sheet_idx).max_columns.clone();
209 let pb = match progress {
210 true => ProgressBar::new((max_rows * max_columns) as u64),
211 false => ProgressBar::hidden()
212 };
213 if let Ok(f) = self.zip.as_mut().unwrap().by_name(&format!("xl/worksheets/sheet{}.xml", sheet_idx + 1)) {
214 let mut reader: Reader<BufReader<ZipFile>> = Reader::<BufReader<ZipFile>>::from_reader(BufReader::new(f));
215 let mut flags = SheetFlags::new();
216 loop {
217 match reader.read_event(&mut buf) {
218 Ok(Event::Start(ref e)) if e.name() == b"c" => {
219 for a in e.attributes() {
220 let a = a.unwrap();
221 match a.key {
222 b"r" => {
223 flags.reset();
225 flags.current_cell_reference = a.unescape_and_decode_value(&reader).unwrap();
226 },
227 b"t" => {
228 let a_value = a.unescape_and_decode_value(&reader).unwrap();
230 if a_value == *"s" {
231 flags.is_string = true;
232 }
233 },
234 b"s" => {
235 let cell_style_idx: usize = a.unescape_and_decode_value(&reader).unwrap().parse::<usize>().unwrap();
237 let style: &Style = self.styles.get(cell_style_idx).expect("Could not find style index");
238 if style.number_format_id >= 14 && style.number_format_id <= 22 && style.apply_number_format {
239 flags.is_date = true;
240 }
241 },
242 _ => {}
243 }
244 }
245 },
246 Ok(Event::Start(ref e)) if e.name() == b"f" => {
247 flags.is_formula = true;
249 for a in e.attributes() {
250 let a = a.unwrap();
251 if a.key == b"ref" {
252 flags.is_shared_formula = true;
253 }
254 }
255 },
256 Ok(Event::Empty(ref e)) if e.name() == b"f" => {
257 for a in e.attributes() {
259 let a = a.unwrap();
260 if let b"si" = a.key {
261 let formula_index: usize = a.unescape_and_decode_value(&reader).unwrap().parse::<usize>().unwrap();
262 let (start_cell, formula_text): &(Cell, String) = flags.shared_formulas.get(formula_index).unwrap();
263 let base_reference = Reference::from(start_cell.as_tuple());
264 let current_cell = Cell::from(flags.current_cell_reference.clone());
265 let current_reference = Reference::from(current_cell.as_tuple());
266 let adjusted_formula: Value = Value::Formula(format!("={}", adjust_formula(base_reference, current_reference, formula_text.clone())?));
267 let sheet = self.sheets.get_mut(sheet_idx).unwrap();
268 let (row, column): (usize, usize) = current_cell.as_tuple();
269 sheet.cells[[row-1, column-1]] = adjusted_formula.clone();
270 let cell_id = CellId::from((sheet_idx, row, column, 1, 1, Some(false)));
271 self.dependencies.add_formula(cell_id, &adjusted_formula.to_string(), &self.sheets)?;
272 flags.reset();
273 }
274 }
275 },
276 Ok(Event::Start(ref e)) if e.name() == b"v" => {
277 flags.is_value = true;
279 },
280 Ok(Event::Text(ref e)) => {
281 let cell_text = Book::decode_text_event(&reader, e);
282 if !cell_text.is_empty() && !flags.current_cell_reference.is_empty() {
283 let cell_text = Book::decode_text_event(&reader, e);
284 let value: Value;
285 if flags.is_formula {
286 value = Value::Formula(format!("={}", &cell_text.replace("_xlfn.", "").to_owned()));
287 if flags.is_shared_formula {
288 flags.shared_formulas.push(
289 (Cell::from(flags.current_cell_reference.clone()), cell_text.clone())
290 )
291 }
292 } else if flags.is_string {
293 let shared_string_idx: usize = cell_text.parse::<usize>().unwrap();
294 let SharedString(s) = self.shared_strings.get(shared_string_idx).unwrap();
295 value = Value::from(s.clone());
296 } else if flags.is_date {
297 value = Value::from(excel_to_date(cell_text.parse::<f64>().unwrap()));
298 } else if !cell_text.is_empty() {
299 value = match &*cell_text {
300 "TRUE" => Value::Bool(true),
301 "FALSE" => Value::Bool(false),
302 _ => {
303 Value::Num(cell_text.parse::<f64>().expect("Unable to parse to number"))
304 }
305 };
306 } else {
307 value = Value::Empty;
308 }
309 let cell = Cell::from(flags.current_cell_reference.clone());
310 let (row, column): (usize, usize) = cell.as_tuple();
311
312 if value.is_formula() {
313 let cell_id = CellId::from((sheet_idx, row, column, 1, 1, Some(false)));
314 self.dependencies.add_formula(cell_id, &value.to_string(), &self.sheets)?;
315 }
316
317 let sheet = self.sheets.get_mut(sheet_idx).unwrap();
318 sheet.cells[[row-1, column-1]] = value;
319 pb.set_position((row * max_columns + column) as u64);
320 flags.reset();
321 }
322 },
323 Ok(Event::Eof) => break,
324 _ => {
325 }
326 }
327 }
328 }
329 Ok(())
330 }
331
332 pub fn zip_from_path(path: &str) -> ZipType {
333 let file: File = File::open(path).expect("Unable to find file");
334 zip::ZipArchive::new(file).expect("Unable to create zip")
335 }
336
337 pub fn decode_text_event(reader: &Reader<BufReader<ZipFile>>, e: &BytesText) -> String {
338 e.unescape_and_decode(reader).unwrap()
339 }
340
341 pub fn decode_attribute_usize(reader: &Reader<BufReader<ZipFile>>, a: Attribute) -> usize {
342 a.unescape_and_decode_value(reader)
343 .unwrap()
344 .parse::<usize>()
345 .unwrap()
346 }
347
348 pub fn decode_style(reader: &Reader<BufReader<ZipFile>>, e: &BytesStart) -> Style {
349 let mut number_format_id : usize = 0;
350 let mut apply_number_format: bool = false;
351 for a in e.attributes() {
352 let a = a.unwrap();
353 match a.key {
354 b"numFmtId" => {
355 number_format_id = Book::decode_attribute_usize(reader, a);
356 },
357 b"applyNumberFormat" => {
358 apply_number_format = Book::decode_attribute_usize(reader, a) != 0;
359 },
360 _ => {}
361 }
362 }
363 Style { number_format_id, apply_number_format }
364 }
365
366 pub fn get_mut_sheet_by_name<'a>(&'a mut self, s: &'a str) -> &'a mut Sheet {
367 let idx = self.sheets.iter().position(|x| x.name == s).unwrap();
368 self.get_mut_sheet_by_idx(idx)
369 }
370
371 pub fn get_mut_sheet_by_idx(&mut self, idx: usize) -> &mut Sheet {
372 self.sheets.get_mut(idx).unwrap()
373 }
374
375 pub fn get_sheet_by_name(&self, s: String) -> &Sheet {
376 let idx = self.sheets.iter().position(|x| x.name == s.as_str()).unwrap();
377 self.get_sheet_by_idx(idx)
378 }
379
380 pub fn get_sheet_by_idx(&self, idx: usize) -> &Sheet {
381 self.sheets.get(idx).unwrap()
382 }
383
384 pub fn resolve_str_ref(&self, s: &str) -> Result<Array2<Value>, Error> {
385 let expr: Expr = parse_str(s)?;
386 if matches!(expr, Expr::Reference { sheet: _, reference: _}) {
387 self.resolve_ref(expr)
388 } else {
389 panic!("Could not resolve {} to a reference", s);
390 }
391 }
392
393 pub fn resolve_ref(&self, expr: Expr) -> Result<Array2<Value>, Error> {
394 if let Expr::Reference {sheet, reference} = expr {
395 let (mut row, mut col, mut num_rows, mut num_cols) = Reference::from(reference).get_dimensions();
396 let sheet: &Sheet = match sheet {
397 Some(s) => self.get_sheet_by_name(s),
398 None => self.get_sheet_by_idx(self.current_sheet)
399 };
400 if num_rows == usize::MAX {
401 num_rows = sheet.cells.dim().0;
402 row = 1; }
404 if num_cols == usize::MAX {
405 num_cols = sheet.cells.dim().0;
406 col = 1; }
408 let row_idx_start: usize = sheet.cells.dim().0.min(row-1);
409 let row_idx_end: usize = sheet.cells.dim().0.min(row+num_rows-1);
410 let rows_append: usize = num_rows - (row_idx_end - row_idx_start);
411 let col_idx_start: usize = sheet.cells.dim().1.min(col-1);
412 let col_idx_end: usize = sheet.cells.dim().1.min(col+num_cols-1);
413 let cols_append: usize = num_cols - (col_idx_end - col_idx_start);
414 let mut output: Array2<Value> = sheet.cells.slice(s![row_idx_start..row_idx_end, col_idx_start..col_idx_end]).into_owned();
415 if rows_append > 0 {
416 for _ in 0..rows_append {
417 output.push(Axis(0), ArrayView::from(&Array::from_elem(output.dim().1, Value::Empty))).unwrap();
418 }
419 }
420 if cols_append > 0 {
421 for _ in 0..cols_append {
422 output.push(Axis(1), ArrayView::from(&Array::from_elem(output.dim().0, Value::Empty))).unwrap();
423 }
424 }
425 Ok(output)
426 } else {
427 panic!("Can only resolve a reference expression.")
428 }
429 }
430
431 pub fn calculate_cell(&mut self, cell_id: &CellId, debug: bool) -> Result<(), Error> {
432 if ! cell_id.calculated.unwrap_or(true) {
433 if debug {
434 println!("======= Calculating cell: {}.{}", cell_id.sheet, Reference::from((cell_id.row, cell_id.column)));
435 }
436 let sheet: &Sheet = self.get_sheet_by_idx(cell_id.sheet);
437 let cell_value = &sheet.cells[[cell_id.row-1, cell_id.column-1]];
438 if let Value::Formula(formula_text) = cell_value.clone() {
439 self.current_sheet = cell_id.sheet;
440 let mut chars = formula_text.chars(); chars.next();
442 let expr: Expr = parse_str(chars.as_str())?;
443 let new_value_result = evaluate_expr_with_context(expr, self, debug);
444 match new_value_result {
445 Ok(new_value) => {
446 let sheet: &mut Sheet = self.get_mut_sheet_by_idx(cell_id.sheet);
447 sheet.cells[[cell_id.row-1, cell_id.column-1]] = ensure_non_range(new_value).ensure_single();
448 return Ok(());
449 },
450 Err(e) => {
451 return match e {
452 Error::Volatile(_) => Err(e),
453 _ => Err(Error::Calculation(cell_id.clone(), Box::new(e)))
454
455 };
456 }
457 };
458 }
459 }
460 Ok(())
461 }
462
463 pub fn is_calculated(&self, expr: Expr) -> bool {
464 let value = self.resolve_ref(expr).unwrap();
465 value.into_raw_vec().iter().all(|x| ! x.is_formula())
466 }
467
468 pub fn calculate(&mut self, debug: bool, progress: bool) -> Result<(), Error> {
469 loop {
470 let mut calculated = true;
471 let order: Vec<CellId> = self.dependencies.get_order();
472 let pb = match progress {
473 true => ProgressBar::new(order.len() as u64),
474 false => ProgressBar::hidden()
475 };
476 for cell_id in self.dependencies.get_order().iter_mut() {
477 pb.inc(1);
478 match self.calculate_cell(cell_id, debug) {
479 Ok(()) => {
480 cell_id.calculated = Some(true)
481 },
482 Err(err) => {
483 match err {
484 Error::Volatile(new_expr) => {
485 self.dependencies.add_expression(*cell_id, *new_expr, &self.sheets)?;
486 calculated = false;
487 break },
489 _ => return Err(Error::Calculation(*cell_id, Box::new(err)))
490 }
491 }
492 }
493 }
494 if calculated {
495 break
496 }
497 }
498 Ok(())
499 }
500}
501
502#[derive(Debug, PartialEq, Eq, Clone)]
503pub struct Sheet {
504 pub name: String,
505 pub max_rows: usize,
506 pub max_columns: usize,
507 pub cells: Array2<Value>
508}
509
510impl From<&str> for Sheet {
511 fn from(s: &str) -> Sheet {
512 Sheet::from(s.to_string())
513 }
514}
515
516impl From<String> for Sheet {
517 fn from(s: String) -> Sheet {
518 Sheet {
519 name: s,
520 max_rows: 0,
521 max_columns: 0,
522 cells: Array::from_elem((0, 0), Value::Empty)
523 }
524 }
525}
526
527impl fmt::Display for Sheet {
528 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
529 write!(f, "'{}'!", self.name)
530 }
531}
532
533#[derive(Debug)]
534pub struct SharedString(String);
535
536#[derive(Default, Debug)]
537pub struct Style {
538 pub number_format_id: usize,
539 pub apply_number_format: bool
540}
541
542impl Style {
543 pub fn new() -> Style {
544 Default::default()
545 }
546}
547
548#[derive(Debug)]
549struct SheetFlags {
550 is_shared_formula: bool,
551 is_date: bool,
552 is_formula: bool,
553 is_string: bool,
554 is_value: bool,
555 current_cell_reference: String,
556 shared_formulas: Vec<(Cell, String)>, }
558
559impl SheetFlags {
560 fn new() -> SheetFlags {
561 SheetFlags {
562 is_shared_formula: false,
563 is_date: false,
564 is_formula: false,
565 is_string: false,
566 is_value: false,
567 current_cell_reference: String::new(),
568 shared_formulas: vec![]
569 }
570 }
571
572 fn reset(&mut self) {
573 self.is_shared_formula = false;
574 self.is_date = false;
575 self.is_formula = false;
576 self.is_string = false;
577 self.is_value = false;
578 self.current_cell_reference = String::new();
579 }
580}
581
582#[cfg(test)]
583mod tests {
584 use crate::workbook::{Sheet, Book};
585 use crate::evaluate::value::Value;
586 use crate::parser::parse_str;
587 use crate::errors::Error;
588 use ndarray::arr2;
589
590 fn get_cell<'a>(book: &'a Book, sheet_name: &'a str, row: usize, column: usize) -> Value {
591 let sheet: &Sheet = book.get_sheet_by_name(sheet_name.to_string());
592 sheet.cells[[row, column]].clone()
593 }
594
595 #[test]
596 fn test_sheet_names() {
597 let mut book = Book::from("assets/data_types.xlsx");
598 book.load(false).expect("Could not load workbook");
599 assert_eq!(&book.sheets[0].name, "test 1");
600 assert_eq!(&book.sheets[1].name, "test 2");
601 assert_eq!(&book.sheets[2].name, "test 3");
602 }
603
604 #[test]
605 fn test_cells() {
606 let mut book = Book::from("assets/data_types.xlsx");
607 book.load(false).expect("Could not load workbook");
608 assert_eq!(get_cell(&book, "test 1", 0, 0), Value::from("Text"));
609 assert_eq!(get_cell(&book, "test 1", 1, 0), Value::from("a"));
610 assert_eq!(get_cell(&book, "test 1", 2, 0), Value::from("b"));
611 assert_eq!(get_cell(&book, "test 1", 3, 0), Value::from("c"));
612 assert_eq!(get_cell(&book, "test 1", 1, 4), Value::Formula(String::from("=B2+1")));
613 assert_eq!(get_cell(&book, "test 1", 2, 4), Value::Formula(String::from("=B3+1")));
614 assert_eq!(get_cell(&book, "test 1", 3, 4), Value::Formula(String::from("=(B4+1)")));
615 }
616
617 #[test]
618 fn test_resolve_ref() -> Result<(), Error> {
619 let mut book = Book::from("assets/basic.xlsx");
620 book.load(false).expect("Could not load workbook");
621 book.calculate(false, false)?;
622 assert_eq!(book.resolve_ref(parse_str("Sheet2!B2")?)?, arr2(&[[Value::from(55.0)]]));
623 assert_eq!(book.resolve_ref(parse_str("Sheet2!A1:B2")?)?, arr2(&
624 [[Value::Empty, Value::Empty],
625 [Value::Empty, Value::from(55.0)]]
626 ));
627 assert_eq!(book.resolve_ref(parse_str("Sheet2!C5:D6")?)?, arr2(&
628 [[Value::Empty, Value::Empty],
629 [Value::Empty, Value::Empty]]
630 ));
631 assert_eq!(book.resolve_ref(parse_str("Sheet2!B:B")?)?, arr2(&
632 [[Value::Empty],
633 [Value::from(55.0)],
634 [Value::Empty]
635 ]
636 ));
637 Ok(())
638 }
639}
640