xlsx_batch_reader/read/
mod.rs

1use std::{cmp::max, collections::{HashMap, HashSet}, fs::File, io::BufReader, path::Path};
2use anyhow::{anyhow, Result};
3use zip::{ZipArchive, read::ZipFile};
4use chrono::{Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
5use quick_xml::{events::Event, reader::Reader};
6
7use lazy_static::lazy_static;
8use crate::{get_num_from_ord, get_tuple_from_ord, CellValue, ColNum, Date32, MergedRange, RowNum, Timesecond, Timestamp, MAX_COL_NUM};
9
10#[cfg(feature = "cached")]
11use crate::is_merged_cell;
12
13// ooxml: http://www.officeopenxml.com/
14
15macro_rules! get_attr_val {
16    ($e:expr, $tag:expr) => {
17        match $e.try_get_attribute($tag)? {
18            Some(v) => {v.unescape_value()?},
19            None => return Err(anyhow!("attribute {} not exist", $tag))
20        }
21    };
22    ($e:expr, $tag:expr, parse) => {
23        match $e.try_get_attribute($tag)? {
24            Some(v) => {v.unescape_value()?.parse()?},
25            None => return Err(anyhow!("attribute {} not exist", $tag))
26        }
27    };
28    ($e:expr, $tag:expr, to_string) => {
29        match $e.try_get_attribute($tag)? {
30            Some(v) => {v.unescape_value()?.to_string()},
31            None => return Err(anyhow!("attribute {} not exist", $tag))
32        }
33    };
34}
35
36/// xlsx book reader
37pub struct XlsxBook {
38    ini_share: bool,
39    str_share: Vec<String>,
40    shts_hidden: Vec<String>,
41    shts_visible: Vec<String>,
42    map_style: HashMap<u32, u32>,
43    map_sheet: HashMap<String, String>,
44    zip_archive: ZipArchive<BufReader<File>>,
45    datetime_fmts: HashMap<u32, u8>,
46}
47
48impl XlsxBook {
49    /// load_share: if set to false, you should call load_share_strings before reading data. it should usually be true. If you only need to obtain the sheet names, you can set it false to open the file faster.
50    pub fn new<T: AsRef<Path>>(path: T, load_share: bool) -> Result<XlsxBook> {
51        // zip压缩文件
52        let mut zip_archive = {
53            let file = File::open(path)?;
54            let zipreader = BufReader::new(file);
55            ZipArchive::new(zipreader)?
56        };
57
58        let book_refs = {
59            let file = zip_archive.by_name("xl/_rels/workbook.xml.rels")?;
60            
61            let mut buf = Vec::new();
62            let mut refs = HashMap::new();
63            let mut reader =  Reader::from_reader(BufReader::new(file));
64            loop {
65                match reader.read_event_into(&mut buf) {
66                    Ok(Event::Empty(ref e)) => {
67                        if e.name().as_ref() == b"Relationship"{
68                            refs.insert(get_attr_val!(e, "Id", to_string), get_attr_val!(e, "Target", to_string));
69                        };
70                    },
71                    Ok(Event::Start(ref e)) => {   // 解析 <sheet ..></sheet> 模式
72                        if e.name().as_ref() == b"Relationship"{
73                            refs.insert(get_attr_val!(e, "Id", to_string), get_attr_val!(e, "Target", to_string));
74                        };
75                    },
76                    Ok(Event::Eof) => break, // exits the loop when reaching end of file
77                    Err(e) => return Err(anyhow!("workbook.xml.refs broken: {:?}", e)),
78                    _ => ()                  // There are several other `Event`s we do not consider here
79                }
80                buf.clear();
81            };
82            refs
83        };
84
85
86        // 初始化sheet列表
87        let mut shts_hidden = Vec::<String>::new();
88        let mut shts_visible = Vec::<String>::new();
89        let map_sheet = {
90            let file = zip_archive.by_name("xl/workbook.xml")?;
91            let mut reader =  Reader::from_reader(BufReader::new(file));
92            // reader.trim_text(true);
93
94            let mut buf = Vec::new();
95            let mut map_share: HashMap<String, String> = HashMap::new();
96            loop {
97                match reader.read_event_into(&mut buf) {
98                    Ok(Event::Empty(ref e)) => {
99                        if e.name().as_ref() == b"sheet"{
100                            let name = get_attr_val!(e, "name", to_string);
101                            let rid = get_attr_val!(e, "r:id", to_string);
102                            let sheet = if book_refs.contains_key(&rid) {
103                                if book_refs[&rid].starts_with('/') {
104                                    format!("{}", book_refs[&rid].trim_start_matches('/'))
105                                } else {
106                                    format!("xl/{}", book_refs[&rid])
107                                }
108                            } else {
109                                return Err(anyhow!("Relationship of sheet-{rid} not found"))
110                            };
111                            match e.try_get_attribute("state").unwrap_or(None) {
112                                Some(attr) => {
113                                    if attr.unescape_value()?.as_bytes() == b"hidden" {
114                                        shts_hidden.push(name.clone());
115                                    } else {
116                                        shts_visible.push(name.clone());
117                                    };
118                                },
119                                _ => {shts_visible.push(name.clone());}
120                            };
121                            map_share.insert(name, sheet);  // sheet名,对应的真是xml文件
122                        };
123                    },
124                    Ok(Event::Start(ref e)) => {   // 解析 <sheet ..></sheet> 模式
125                        if e.name().as_ref() == b"sheet"{
126                            let name = get_attr_val!(e, "name", to_string);
127                            let rid = get_attr_val!(e, "r:id", to_string);
128                            let sheet = if book_refs.contains_key(&rid) {
129                                if book_refs[&rid].starts_with('/') {
130                                    format!("{}", book_refs[&rid].trim_start_matches('/'))
131                                } else {
132                                    format!("/xl/{}", book_refs[&rid])
133                                }
134                            } else {
135                                return Err(anyhow!("Relationship of sheet-rid not found!"))
136                            };
137                            match e.try_get_attribute("state").unwrap_or(None) {
138                                Some(attr) => {
139                                    if attr.unescape_value()?.as_bytes() != b"hidden" {
140                                        shts_visible.push(name.clone());
141                                    };
142                                },
143                                _ => {shts_visible.push(name.clone());}
144                            };
145                            map_share.insert(name, sheet);  // sheet名,对应的真是xml文件
146                        };
147                    },
148                    Ok(Event::Eof) => break, // exits the loop when reaching end of file
149                    Err(e) => return Err(anyhow!("workbook.xml is broken: {:?}", e)),
150                    _ => ()                  // There are several other `Event`s we do not consider here
151                }
152                buf.clear();
153            };
154            map_share
155        };
156
157        // 初始化单元格格式
158        let mut datetime_fmts = DATETIME_FMTS.clone();
159        let map_style = {
160            match zip_archive.by_name("xl/styles.xml") {
161                Ok(file) => {
162                    let mut reader =  Reader::from_reader(BufReader::new(file));
163                    // reader.trim_text(true);
164
165                    let mut inx: u32 = 0;
166                    let mut act = false;
167                    let mut buf = Vec::new();
168                    let mut map_style: HashMap<u32, u32> = HashMap::new();
169                    loop {
170                        match reader.read_event_into(&mut buf) {
171                            Ok(Event::Start(ref e)) => {
172                                if e.name().as_ref() == b"cellXfs" || e.name().as_ref() == b"numFmts" {
173                                    act = true;
174                                } else if act && (e.name().as_ref() == b"numFmt"){
175                                    let code = get_attr_val!(e, "formatCode", to_string);
176                                    if code.contains("yy") {
177                                        if code.contains("h") || code.contains("ss") {
178                                            datetime_fmts.insert(get_attr_val!(e, "numFmtId", parse), FMT_DATETIME);
179                                        } else {
180                                            datetime_fmts.insert(get_attr_val!(e, "numFmtId", parse), FMT_DATE);
181                                        }
182                                    } else if code.contains("ss") {
183                                        datetime_fmts.insert(get_attr_val!(e, "numFmtId", parse), FMT_TIME);
184                                    };
185                                } else if act && (e.name().as_ref() == b"xf"){
186                                    map_style.insert(inx, get_attr_val!(e, "numFmtId", parse));
187                                    inx += 1;
188                                };
189                            },
190                            Ok(Event::Empty(ref e)) => {
191                                if act && (e.name().as_ref() == b"numFmt"){
192                                    let code = get_attr_val!(e, "formatCode", to_string);
193                                    if code.contains("yy") {
194                                        if code.contains("h") || code.contains("ss") {
195                                            datetime_fmts.insert(get_attr_val!(e, "numFmtId", parse), FMT_DATETIME);
196                                        } else {
197                                            datetime_fmts.insert(get_attr_val!(e, "numFmtId", parse), FMT_DATE);
198                                        }
199                                    } else if code.contains("ss") {
200                                        datetime_fmts.insert(get_attr_val!(e, "numFmtId", parse), FMT_TIME);
201                                    };
202                                } else if act && (e.name().as_ref() == b"xf"){
203                                    map_style.insert(inx, get_attr_val!(e, "numFmtId", parse));
204                                    inx += 1;
205                                };
206                            },
207                            Ok(Event::End(ref e)) => {
208                                if e.name().as_ref() == b"numFmts" {
209                                    act = false;
210                                } else if e.name().as_ref() == b"cellXfs" {
211                                    break;
212                                };
213                            },
214                            Ok(Event::Eof) => break, // exits the loop when reaching end of file
215                            Err(e) => return Err(anyhow!("styles.xml is broken: {:?}", e)),
216                            _ => ()                  // There are several other `Event`s we do not consider here
217                        }
218                        buf.clear();
219                    };
220                    map_style
221                },
222                Err(_) => {
223                    HashMap::new()
224                }
225            }
226        };
227        
228        let mut book = XlsxBook{
229                ini_share: false,
230                str_share: Vec::new(),
231                map_style,
232                map_sheet,
233                shts_hidden,
234                shts_visible,
235                zip_archive,
236                datetime_fmts,
237            };
238        if load_share {
239            book.load_share_strings()?;
240        };
241        Ok(book)
242    }
243    /// get hidden sheets
244    pub fn get_hidden_sheets(&self) -> &Vec<String> {
245        &self.shts_hidden
246    } 
247    /// get visible sheets
248    pub fn get_visible_sheets(&self) -> &Vec<String> {
249        &self.shts_visible
250    }
251    /// if set load_share to false, you should call load_share_strings before reading data
252    pub fn load_share_strings(&mut self) -> Result<()>{
253        if self.ini_share {
254            return Ok(());
255        };
256        let str_share = {
257            match self.zip_archive.by_name("xl/sharedStrings.xml") {
258                Ok(file) => {
259                    let mut reader =  Reader::from_reader(BufReader::new(file));
260                    // reader.trim_text(true);
261
262                    let mut buf = Vec::with_capacity(3069);
263                    let cap = loop {    // 获取ShareString容量
264                        match reader.read_event_into(&mut buf) {
265                            Ok(Event::Start(ref e)) => {
266                                if e.name().as_ref() == b"sst"{
267                                    let cnt: usize =  {
268                                        match e.try_get_attribute("uniqueCount")? {
269                                            Some(a) => {a.unescape_value()?.parse()?},
270                                            None => {get_attr_val!(e, "count", parse)}
271                                        }
272                                    };
273                                    break cnt
274                                }
275                            }
276                            Ok(Event::Eof) => {return Ok(())}, // exits the loop when reaching end of file
277                            Err(e) => return Err(anyhow!("sharedStrings.xml is broken: {:?}", e)),
278                            _ => (),                     // There are several other `Event`s we do not consider here
279                        }
280                    };
281
282                    let mut insert = false;
283                    let mut shstring = String::new(); 
284                    let mut vec_share: Vec<String> = Vec::with_capacity(cap);
285                    loop {
286                        match reader.read_event_into(&mut buf) {
287                            Ok(Event::Start(ref e)) => {
288                                match e.name().as_ref() {
289                                    b"si" => {shstring.clear()},
290                                    b"t" => {insert = true},
291                                    _ => {insert = false},
292                                }
293                            },
294                            Ok(Event::Text(ref t)) => {
295                                if insert {
296                                    shstring += &String::from_utf8(t.to_vec())?;
297                                }
298                            },
299                            Ok(Event::End(ref e)) => {
300                                if e.name().as_ref() == b"si" {
301                                    vec_share.push(shstring.clone());
302                                }
303                            },
304                            Ok(Event::Eof) => break, // exits the loop when reaching end of file
305                            Err(e) => return Err(anyhow!("sharedStrings.xml is broken: {:?}", e)),
306                            _ => ()                  // There are several other `Event`s we do not consider here
307                        }
308                        buf.clear();
309                    };
310                    if cap != vec_share.len() {  
311                        return Err(anyhow!("shareString-lenth check error!!"));
312                    };
313                    vec_share
314                },
315                Err(_) => {
316                    Vec::<String>::new()
317                }
318            }
319        };
320        self.ini_share = true;
321        self.str_share = str_share;
322        Ok(())
323    }
324    /// sht_name: sheet name  
325    /// iter_batch: The number of rows per batch  
326    /// skip_rows: number of skipped rows  
327    /// left_ncol: Starting column (included), with 1 as the starting value  
328    /// right_ncol: Terminate columns (including), MAX-COL_NUM to get non fixed termination columns  
329    pub fn get_sheet_by_name<'a, 'b>(&'a mut self, sht_name: &'b String, iter_batch: usize, skip_rows: u32, left_ncol: ColNum, right_ncol: ColNum, first_row_is_header: bool) -> Result<XlsxSheet<'a>> {
330        for (k, v) in self.map_sheet.clone() {
331            if k.eq(sht_name) {
332                if !self.ini_share {
333                    self.load_share_strings()?;
334                };
335
336                match self.zip_archive.by_name(v.as_str()) {
337                    Ok(file) => {
338                        let reader = Reader::from_reader(BufReader::new(file));
339                        // reader.trim_text(true);
340            
341                        return Ok(XlsxSheet {
342                            reader,
343                            skip_rows,
344                            left_ncol: left_ncol-1,
345                            right_ncol,
346                            iter_batch,
347                            first_row_is_header,
348                            first_row: None,
349                            key: k,
350                            buf: Vec::with_capacity(8*1024),
351                            status: 1,
352                            currow: 0,
353                            str_share: &self.str_share,
354                            map_style: &self.map_style,
355                            datetime_fmts: &self.datetime_fmts,
356                            max_size: None,
357                            merged_rects: None,
358                            skip_until: None,
359                            skip_matched: None,
360                            skip_matched_check_by_and: true,
361                            read_before: None,
362                            addr_captures: None,
363                            vals_captures: HashMap::new(),
364                        });
365                    },
366                    Err(_) => {
367                        return Err(anyhow!("sheet {} - {} lost!", k.as_str(), v.as_str()));
368                    }
369                };
370            };
371        };
372        Err(anyhow!(format!("{} sheet not found!", sht_name)))
373    }
374    /// get cached sheet by name, all data will be cached in memory when sheet created
375    #[cfg(feature = "cached")]
376    pub fn get_cached_sheet_by_name(&mut self, sht_name: &String, iter_batch: usize, skip_rows: u32, left_ncol: ColNum, right_ncol: ColNum, first_row_is_header: bool) -> Result<CachedSheet> {
377        Ok(self.get_sheet_by_name(sht_name, iter_batch, skip_rows, left_ncol, right_ncol, first_row_is_header)?.into_cached_sheet()?)
378    }
379    pub fn get_sheets_maps(&self) -> &HashMap<String, String> {
380        &self.map_sheet
381    }
382}
383
384/// batch sheet reader
385pub struct XlsxSheet<'a> {
386    key: String,
387    str_share: &'a Vec<String>,
388    map_style: &'a HashMap<u32, u32>,
389    buf: Vec<u8>,
390    status: u8,   // 0-closed; 1-new; 2-active; 3-get_cell; 4-skip_cell; 初始为1
391    currow: RowNum,  //  当前行号
392    reader: Reader<BufReader<ZipFile<'a, BufReader<File>>>>,
393    iter_batch: usize,
394    skip_rows: u32,
395    max_size: Option<(RowNum, ColNum)>,
396    left_ncol: ColNum,
397    right_ncol: ColNum,
398    first_row_is_header: bool,    //  标识是否需要把读取到的第一行作为标题,读取到标题行以后,会被设置为false
399    first_row: Option<(u32, Vec<CellValue<'a>>)>,
400    datetime_fmts: &'a HashMap<u32, u8>,
401    merged_rects: Option<Vec<((RowNum, ColNum), (RowNum, ColNum))>>,
402    skip_until: Option<HashMap<usize, String>>,
403    skip_matched: Option<HashMap<usize, String>>,
404    skip_matched_check_by_and: bool,
405    read_before: Option<HashMap<usize, String>>,
406    addr_captures: Option<HashSet<String>>,
407    vals_captures: HashMap<String, CellValue<'a>>
408}
409
410impl<'a> XlsxSheet<'a> {
411    /// into cached sheet
412    #[cfg(feature = "cached")]
413    fn into_cached_sheet(mut self) -> Result<CachedSheet<'a>> {
414        let (data, bottom_nrow) =  match self.get_next_row() {
415            Ok(Some((r, d))) => {
416                let mut data = if let Some((rn, _)) = self.max_size {
417                    HashMap::with_capacity(rn as usize)
418                } else {
419                    HashMap::new()
420                };
421                data.insert(r, d);
422                let mut last_nrow = r;
423                loop {
424                    match self.get_next_row() {
425                        Ok(Some((r, d))) => {
426                            last_nrow = r;
427                            data.insert(r, d);
428                        },
429                        Ok(None) => {
430                            break;
431                        },
432                        Err(e) => {
433                            return Err(e);
434                        }
435                    };
436                };
437                (data, last_nrow)
438            },
439            Ok(None) => {(HashMap::new(), 0)},
440            Err(e) => {return Err(e);}
441        };
442        let merged_rects = self.get_merged_ranges()?.to_owned();
443        let right_ncol = if self.right_ncol == MAX_COL_NUM {
444            if let Some((_mr, mc)) = self.max_size {
445                mc
446            } else {
447                self.right_ncol
448            }
449        } else {
450            self.right_ncol
451        };
452        let empty = self.is_empty()?;
453        let top_nrow = if self.first_row_is_header {self.skip_rows+2} else {self.skip_rows+1};
454        Ok(CachedSheet {
455            data,
456            merged_rects,
457            key: self.key,
458            current: top_nrow,
459            empty,
460            keep_empty: false,
461            iter_batch: self.iter_batch,
462            top_nrow,
463            bottom_nrow,
464            left_ncol: self.left_ncol + 1,
465            right_ncol,
466            header_row: self.first_row,
467        })
468    }
469    /// get sheet name
470    pub fn sheet_name(&self) -> &String {
471        &self.key
472    }
473    /// skip until a row matched,this function should be called before reading(the matched row will be included)   
474    pub fn with_skip_until(&mut self, checks: &HashMap<String, String>) {
475        let mut maps = HashMap::new();
476        for (c, v) in checks {
477            let col = get_num_from_ord(c.as_bytes()).unwrap_or(0);
478            if col > self.left_ncol && col <= self.right_ncol {
479                maps.insert((col-self.left_ncol-1) as usize, v.clone());
480            }
481        }
482        if maps.len() > 0 {
483            self.skip_until = Some(maps);
484        } else {
485            self.skip_until = None;
486        }
487    }
488    /// skip the matched row, this function should be called before reading(the matched row will be skiped)
489    /// check_by_and - true: all cells should be matched
490    /// check_by_and - false: any cell should be matched
491    pub fn with_skip_matched(&mut self, checks: &HashMap<String, String>, check_by_and: bool) {
492        let mut maps = HashMap::new();
493        for (c, v) in checks {
494            let col = get_num_from_ord(c.as_bytes()).unwrap_or(0);
495            if col > self.left_ncol && col <= self.right_ncol {
496                maps.insert((col-self.left_ncol-1) as usize, v.clone());
497            }
498        }
499        if maps.len() > 0 {
500            self.skip_matched = Some(maps);
501            self.skip_matched_check_by_and = check_by_and;
502        } else {
503            self.skip_matched = None;
504        }
505    }
506    /// read before a row matched,this function should be called before reading(the matched row will not be included)
507    pub fn with_read_before(&mut self, checks: &HashMap<String, String>) {
508        let mut maps = HashMap::new();
509        for (c, v) in checks {
510            let col = get_num_from_ord(c.as_bytes()).unwrap_or(0);
511            if col > self.left_ncol && col <= self.right_ncol {
512                maps.insert((col-self.left_ncol-1) as usize, v.clone());
513            }
514        }
515        if maps.len() > 0 {
516            self.read_before = Some(maps);
517        } else {
518            self.read_before = None;
519        }
520    }
521    /// capture values by address
522    pub fn with_capture_vals(&mut self, captures: HashSet<String>) {
523        if captures.len() > 0 {
524            self.addr_captures = Some(captures);
525        } else {
526            self.addr_captures = None;
527        };
528        self.vals_captures = HashMap::new();
529    }
530    /// get cell captured values,  required:   
531    /// 1. with_capture_vals must be called before this function    
532    /// 2. first_row_is_header must be true     
533    /// 3. the captured values must be after skip_rows(excluded, passed to get_sheet_by_name) and before header row(included)
534    pub fn get_captured_vals(&mut self) -> Result<&HashMap<String, CellValue<'a>>> {
535        if self.addr_captures.is_none() {
536            Ok(&self.vals_captures)
537        } else if self.first_row_is_header {
538            self.get_header_row()?;
539            Ok(&self.vals_captures)
540        } else {
541            Err(anyhow!("get_captured_vals error: first_row_is_header must be true"))
542        }
543    }
544    /// check whether the sheet is empty, should be called after at least one row has been read
545    pub fn is_empty(&self) -> Result<bool> {
546        if self.currow > 0 {
547            Ok(false)
548        } else if self.status == 0 {
549            Ok(true)
550        } else {
551            Err(anyhow!("is_empty should be called after at least one row has been read"))
552        }
553    }
554    /// get column range, v0.1.7 the start column number included (start from 1)
555    pub fn column_range(&self) -> (ColNum, ColNum) {
556        (self.left_ncol+1, self.right_ncol)
557    }
558    /// get next row
559    fn get_next_row(&mut self) -> Result<Option<(u32, Vec<CellValue<'a>>)>> {
560        fn is_matched_row(row: &Vec<CellValue<'_>>, checks: &HashMap<usize, String>, check_by_and: bool) -> bool {
561            if check_by_and {
562                for (i, v) in checks {
563                    if let Some(cell) = row.get(*i) {
564                        if let Ok(Some(s)) = cell.get::<String>() {
565                            if s != *v {
566                                return false;
567                            }
568                        } else {
569                            return false;
570                        }
571                    } else {
572                        return false;
573                    }
574                }
575                true
576            } else {
577                for (i, v) in checks {
578                    if let Some(cell) = row.get(*i) {
579                        if let Ok(Some(s)) = cell.get::<String>() {
580                            if s == *v {
581                                return true;
582                            }
583                        }
584                    }
585                }
586                false
587            }
588        }
589        let mut col: ColNum = 0;
590        let mut cell_addr = "".into();
591        let mut cell_type = vec![];
592        let mut prev_head = vec![];
593        let mut col_index: ColNum = 1;    // 当前需增加cell的col_index
594        // let mut row_num: u32 = 0;     //  sheet中增加currow储存当前行号
595        let mut row_value: Vec<CellValue<'_>> = Vec::new();
596        let mut num_fmt_id: u32 = 0;
597        if self.status == 0 {
598            return Ok(None)
599        }  //  已关闭的sheet直接返回None
600        loop {
601            match self.reader.read_event_into(&mut self.buf) {
602                Ok(Event::Start(ref e)) => {
603                    prev_head = e.name().as_ref().to_vec();
604                    if self.status == 0 {
605                        break Ok(None)
606                    } else if self.status == 1 {
607                        if prev_head == b"dimension" {
608                            let attr = get_attr_val!(e, "ref", to_string);
609                            let dim: Vec<&str> = attr.split(':').collect();
610                            if let Some(x) = dim.get(1) {
611                                self.max_size = Some(get_tuple_from_ord(x.as_bytes())?);
612                            };
613                        } else if prev_head == b"sheetData" {
614                            self.status = 2;
615                        } else if prev_head == b"mergeCells" {
616                            let cnt: usize = get_attr_val!(e, "count", parse);
617                            self.process_merged_cells(cnt)?;
618                        }; 
619                    } else {
620                        if prev_head == b"c" {
621                            match e.try_get_attribute("t")? {
622                                Some(attr) => {
623                                    cell_type = attr.unescape_value()?.as_bytes().to_owned();
624                                },
625                                _ => {
626                                    cell_type = b"n".to_vec();
627                                }
628                            };
629                            match e.try_get_attribute("s")? {
630                                Some(attr) => {
631                                    num_fmt_id = self.map_style[&attr.unescape_value()?.parse::<u32>()?];
632                                },
633                                _ => {
634                                    num_fmt_id = 0;
635                                }
636                            };
637                            cell_addr = get_attr_val!(e, "r").to_string();   //  单元格地址
638                            col = get_num_from_ord(cell_addr.as_bytes()).unwrap_or(0);
639                            
640                            if self.currow > self.skip_rows && col > self.left_ncol && col <= self.right_ncol {
641                                self.status = 3;   // 3-get_cell; 4-skip_cell;
642                            } else {
643                                self.status = 4;   // 3-get_cell; 4-skip_cell;
644                            }
645                        } else if prev_head == b"row" {
646                            self.currow = get_attr_val!(e, "r", parse);
647                            let cap = {
648                                if self.right_ncol == MAX_COL_NUM {
649                                    match e.try_get_attribute("spans") {
650                                        Ok(Some(spans)) => {
651                                            if let Some(x) = spans.unescape_value()?.as_ref().split(":").last() {
652                                                x.parse()?
653                                            } else {
654                                                1
655                                            }
656                                        },
657                                        _ => {
658                                            1
659                                        }
660                                    }
661                                    // if let Some(x) = get_attr_val!(e, "spans").as_ref().split(":").last() {
662                                    //     x.parse()?
663                                    // } else {
664                                    //     1
665                                    // }
666                                } else {
667                                    self.right_ncol
668                                }
669                            } - self.left_ncol;
670                            row_value = Vec::with_capacity(cap.into());
671                            col_index = 1;         // 当前需增加cell的col_index
672                            // row_value.push(CellValue::Number(row_num as f64));  // 行号单独返回
673                        }; 
674                    };
675                },
676                Ok(Event::Empty(ref e)) => {
677                    prev_head = e.name().as_ref().to_vec();
678                    if self.status == 1 && prev_head == b"dimension" {
679                        let attr = get_attr_val!(e, "ref", to_string);
680                        let dim: Vec<&str> = attr.split(':').collect();
681                        if let Some(x) = dim.get(1) {
682                            self.max_size = Some(get_tuple_from_ord(x.as_bytes())?);
683                        };
684                    } else if prev_head == b"sheetData" {
685                        self.status = 0;
686                        break Ok(None)
687                    }
688                },
689                Ok(Event::Text(ref t)) => {
690                    // b for boolean
691                    // d for date
692                    // e for error
693                    // inlineStr for an inline string (i.e., not stored in the shared strings part, but directly in the cell)
694                    // n for number
695                    // s for shared string (so stored in the shared strings part and not in the cell)
696                    // str for a formula (a string representing the formula)
697                    if self.status == 3 && (prev_head == b"v" || prev_head == b"t") {
698                        while col_index + self.left_ncol < col {
699                            row_value.push(CellValue::Blank);
700                            col_index += 1;
701                        }
702                        let cel_val = if cell_type == b"inlineStr" && prev_head == b"t" { 
703                            CellValue::String(String::from_utf8(t.to_vec())?)
704                        } else if prev_head == b"v" {
705                            if cell_type == b"s" {
706                                CellValue::Shared(&self.str_share[String::from_utf8(t.to_vec())?.parse::<usize>()?])
707                            } else if cell_type == b"n" {
708                                let fmt = self.datetime_fmts.get(&num_fmt_id).unwrap_or(&FMT_DEFAULT);
709                                if *fmt == FMT_DATE {
710                                    CellValue::Date(String::from_utf8(t.to_vec())?.parse::<f64>()?)
711                                } else if *fmt == FMT_DATETIME {
712                                    CellValue::Datetime(String::from_utf8(t.to_vec())?.parse::<f64>()?)
713                                } else if *fmt == FMT_TIME {
714                                    CellValue::Time(String::from_utf8(t.to_vec())?.parse::<f64>()?)
715                                } else {
716                                    CellValue::Number(String::from_utf8(t.to_vec())?.parse::<f64>()?)
717                                }
718                            } else if cell_type == b"b" {
719                                if String::from_utf8(t.to_vec())?.parse::<usize>() == Ok(1) {
720                                    CellValue::Bool(true)
721                                } else {
722                                    CellValue::Bool(false)
723                                }
724                            } else if cell_type == b"d" {
725                                CellValue::String(String::from_utf8(t.to_vec())?)
726                            } else if cell_type == b"e" {
727                                CellValue::Error(String::from_utf8(t.to_vec())?)
728                            } else if cell_type == b"str" {
729                                CellValue::String(String::from_utf8(t.to_vec())?)
730                            } else{
731                                CellValue::Blank
732                            }
733                        } else {
734                            CellValue::Error("Unknown cell type".into())
735                        };
736                        if let Some(addrs) = &mut self.addr_captures {
737                            if let Some(key) = addrs.take(&cell_addr) {
738                                self.vals_captures.insert(key, cel_val.clone());
739                            }
740                        }
741                        col_index += 1;
742                        row_value.push(cel_val);
743                    }
744                },
745                Ok(Event::End(ref e)) => {
746                    // 0-closed; 1-new; 2-active;
747                    if (e.name().as_ref() == b"row") && self.status > 1 && row_value.len() > 0 {
748                        if let Some(skip_until) = &self.skip_until {
749                            if is_matched_row(&row_value, skip_until, true) {
750                                self.skip_until = None;
751                            } else {
752                                // col = 0;   //  reset each cell
753                                // cell_type = Vec::new();   // reset each cell
754                                // num_fmt_id = 0;   // reset each cell
755                                // prev_head = Vec::new();    reset each tag
756                                // col_index = 1;    // 当前需增加cell的col_index  // reset each row
757                                // row_num = 0;       //  reset each row
758                                // row_value = Vec::new();    // reset each row
759                                continue;
760                            }   //  读取到初始行前继续读取
761                        } else if let Some(read_before) = &self.read_before {
762                            if is_matched_row(&row_value, read_before, true) {
763                                self.status = 0; 
764                                self.read_before = None;
765                                break Ok(None);
766                            }  //  读取到结尾行后不再继续读取,且抛弃结尾行
767                        };
768                        if !self.first_row_is_header {    //  不跳过标题行
769                            if let Some(skip_matched) = &self.skip_matched {
770                                if is_matched_row(&row_value, skip_matched, self.skip_matched_check_by_and) {
771                                    continue;    //   如果当前行满足条件,忽略当前行; 
772                                }
773                            } 
774                        };
775                        if self.right_ncol != MAX_COL_NUM {
776                            while row_value.len() < row_value.capacity() {
777                                row_value.push(CellValue::Blank);
778                            };
779                        }
780                        self.addr_captures = None;    //  返回首行时,不再匹配captures
781                        break Ok(Some((self.currow, row_value)))
782                    }else if e.name().as_ref() == b"sheetData" {
783                        self.status = 0; 
784                        break Ok(None)
785                    }
786                },
787                Ok(Event::Eof) => {
788                    self.status = 0; 
789                    break Ok(None)
790                },   // exits the loop when reaching end of file
791                Err(e) => {
792                    return Err(anyhow!("sheet data is broken: {:?}", e));
793                },
794                _ => ()                  // There are several other `Event`s we do not consider here
795            }
796            self.buf.clear();
797        }
798    }
799    /// get header if first_row_is_header is true
800    pub fn get_header_row(&mut self) -> Result<(u32, Vec<CellValue<'a>>)> {
801        if self.first_row_is_header {
802            match self.get_next_row() {
803                Ok(Some(v)) => {
804                    self.first_row = Some(v);
805                    self.first_row_is_header = false;
806                },
807                Ok(None) => {},
808                Err(e) => {return Err(e)}
809            }
810        }
811        match &self.first_row {
812            Some(v) => Ok(v.clone()),
813            None => Err(anyhow!("no header row!"))
814        }
815    }
816    fn process_merged_cells(&mut self, count: usize) -> Result<()> {
817        if self.status == 1 || self.status == 0 {
818            if self.merged_rects.is_none() {
819                self.merged_rects = Some(vec![]);
820            }
821            loop {
822                match self.reader.read_event_into(&mut self.buf) {
823                    Ok(Event::Start(ref e)) => {
824                        if e.name().as_ref() == b"mergeCell" {
825                            let attr = get_attr_val!(e, "ref", to_string);
826                            let dim: Vec<&str> = attr.split(':').collect();
827                            if let Some(x) = dim.get(0) {
828                                let left_top = get_tuple_from_ord(x.as_bytes())?;
829                                let right_end =  if let Some(x) = dim.get(1) {
830                                    get_tuple_from_ord(x.as_bytes())?
831                                } else {
832                                    return Err(anyhow!("mergeCell error:{}", attr));
833                                };
834                                if let Some(ref mut mgs) = self.merged_rects {
835                                    mgs.push((left_top, right_end))
836                                };
837                            } else {
838                                return Err(anyhow!("mergeCell error:{}", attr));
839                            }; 
840                        }
841                    },
842                    Ok(Event::Empty(ref e)) => {
843                        if e.name().as_ref() == b"mergeCell" {
844                            let attr = get_attr_val!(e, "ref", to_string);
845                            let dim: Vec<&str> = attr.split(':').collect();
846                            if let Some(x) = dim.get(0) {
847                                let left_top = get_tuple_from_ord(x.as_bytes())?;
848                                let right_end =  if let Some(x) = dim.get(1) {
849                                    get_tuple_from_ord(x.as_bytes())?
850                                } else {
851                                    return Err(anyhow!("mergeCell error:{}", attr));
852                                };
853                                if let Some(ref mut mgs) = self.merged_rects {
854                                    mgs.push((left_top, right_end))
855                                };
856                            } else {
857                                return Err(anyhow!("mergeCell error:{}", attr));
858                            }; 
859                        }
860                    },
861                    Ok(Event::End(ref e)) => {
862                        if e.name().as_ref() != b"mergeCells" {
863                            break;
864                        }
865                        else if e.name().as_ref() != b"mergeCell" {
866                            break;
867                        }
868                    },
869                    Ok(Event::Eof) => {
870                        break;
871                    }, // exits the loop when reaching end of file
872                    _ => {}
873                }
874            };
875            if let Some(ref rects) = self.merged_rects {
876                if rects.len() != count {
877                    self.merged_rects = None;
878                    return Err(anyhow!("the number of merged ranges is not equal to the number of rows"));
879                };
880            }
881        }
882        Ok(())
883    }
884    /// get merged ranges, call after all data getched
885    pub fn get_merged_ranges(&mut self) -> Result<&Vec<MergedRange>> {
886        if self.merged_rects.is_none() {
887            if self.status == 0 {  // 已关闭的情况下读取合并单元格
888                loop {
889                    match self.reader.read_event_into(&mut self.buf) {
890                        Ok(Event::Start(ref e)) => {
891                            if e.name().as_ref() == b"mergeCells" {
892                                let cnt: usize = get_attr_val!(e, "count", parse);
893                                self.process_merged_cells(cnt)?;
894                                break;
895                            };
896                        },
897                        _ => {}
898                    }
899                };
900            } else {
901                return Err(anyhow!("finish fetching data first"));
902            }
903        };
904        if let Some(ref rects) = self.merged_rects {
905            Ok(rects)
906        } else {
907            return Err(anyhow!("merged_rects error"));
908        }
909    }
910    /// Get all the remaining data
911    pub fn get_remaining_cells(&mut self) -> Result<Option<(Vec<u32>, Vec<Vec<CellValue<'_>>>)>> {
912        match self.get_next_row() {
913            Ok(Some((r, d))) => {
914                let (mut rows, mut data) = if let Some((rn, _)) = self.max_size {
915                    (Vec::with_capacity(max(1, rn-r+1) as usize), Vec::with_capacity(rn as usize))
916                } else {
917                    (Vec::new(), Vec::new())
918                };
919                rows.push(r);
920                data.push(d);
921                loop {
922                    match self.get_next_row() {
923                        Ok(Some((r, d))) => {
924                            rows.push(r);
925                            data.push(d);
926                        },
927                        Ok(None) => {
928                            break Ok(Some((rows, data)));
929                        },
930                        Err(e) => {
931                            break Err(e);
932                        }
933                    };
934                }
935            },
936            Ok(None) => {
937                Ok(None)
938            },
939            Err(e) => {Err(e)}
940        }
941    }
942}
943
944impl<'a> Iterator for XlsxSheet<'a> {
945    type Item = Result<(Vec<u32>, Vec<Vec<CellValue<'a>>>)>;
946    fn next(&mut self) -> Option<Self::Item> {
947        let mut nums = Vec::with_capacity(self.iter_batch);
948        let mut data = Vec::with_capacity(self.iter_batch);
949        loop {
950            match self.get_next_row() {
951                Ok(Some(v)) => {
952                    if self.first_row_is_header {
953                        self.first_row = Some(v);
954                        self.first_row_is_header = false;
955                    } else {
956                        nums.push(v.0);
957                        data.push(v.1);
958                        if nums.len() >= self.iter_batch { 
959                            break Some(Ok((nums, data)))
960                        }
961                    }
962                },
963                Ok(None) => {
964                    if nums.len() > 0 {
965                        break Some(Ok((nums, data)))
966                    } else {
967                        break None
968                    }
969                },
970                Err(e) => {
971                    break Some(Err(e));
972                }
973            }
974        }
975        
976    }
977}
978
979/// cached sheet reader
980#[cfg(feature = "cached")]
981pub struct CachedSheet<'a> {
982    data: HashMap<RowNum, Vec<CellValue<'a>>>,
983    key: String,
984    current: RowNum,
985    empty: bool,
986    keep_empty: bool,
987    iter_batch: usize,
988    top_nrow: RowNum,
989    bottom_nrow: RowNum,
990    left_ncol: ColNum,
991    right_ncol: ColNum,
992    header_row: Option<(u32, Vec<CellValue<'a>>)>,
993    merged_rects: Vec<((RowNum, ColNum), (RowNum, ColNum))>
994}
995
996#[cfg(feature = "cached")]
997impl <'a> CachedSheet<'a> {
998    /// whether keep empty rows when iter (default: skip empty rows)
999    pub fn with_empty_rows(mut self, keep_empty: bool) -> Self {
1000        self.keep_empty = keep_empty;
1001        self
1002    }
1003    /// get sheet name
1004    pub fn sheet_name(&self) -> &String {
1005        &self.key
1006    }
1007    /// check whether the sheet is empty
1008    pub fn is_empty(&self) -> bool {
1009        self.empty
1010    }
1011    /// get row range
1012    pub fn row_range(&self) -> (RowNum, RowNum) {
1013        (self.top_nrow, self.bottom_nrow)
1014    }
1015    /// get column range
1016    pub fn column_range(&self) -> (ColNum, ColNum) {
1017        (self.left_ncol, self.right_ncol)
1018    }
1019    /// get header if first_row_is_header is true
1020    pub fn get_header_row(&self) -> Result<(u32, Vec<CellValue<'a>>)> {
1021        match &self.header_row {
1022            Some(v) => Ok(v.clone()),
1023            None => Err(anyhow!("no header row!"))
1024        }
1025    }
1026    /// get merged ranges, call as any time
1027    pub fn get_merged_ranges(&self) -> &Vec<MergedRange> {
1028        &self.merged_rects
1029    }
1030    /// Get all data
1031    pub fn get_all_cells(&self) -> &HashMap<RowNum, Vec<CellValue<'_>>> {
1032        &self.data
1033    }
1034    /// get cell value by address, if the cell is not exist, return &CellValue::Blank
1035    pub fn get_cell_value<A: AsRef<str>>(&self, addr: A) -> Result<&CellValue<'a>> {
1036        let (row, col) = get_tuple_from_ord(addr.as_ref().as_bytes())?;
1037        if row >= self.top_nrow && row <= self.bottom_nrow
1038            && col >= self.left_ncol && col <= self.right_ncol {
1039            if self.data.contains_key(&row) {
1040                Ok(self.data[&row].get((col-1) as usize).unwrap_or(&CellValue::Blank))
1041            } else {
1042                Ok(&CellValue::Blank)
1043            }
1044        } else {
1045            Err(anyhow!("Invalid address - out of range"))
1046        }
1047    }
1048    /// get cell value by address, if the cell is not exist, return &CellValue::Blank
1049    pub fn get_cell_value_with_merge_info<A: AsRef<str>>(&self, addr: A) -> Result<(&CellValue<'a>, (bool, Option<(RowNum, ColNum)>))> {
1050        let (row, col) = get_tuple_from_ord(addr.as_ref().as_bytes())?;
1051        if row >= self.top_nrow && row <= self.bottom_nrow
1052            && col >= self.left_ncol && col <= self.right_ncol {
1053            let (merge, spans) = is_merged_cell(&self.merged_rects, row, col);
1054            if self.data.contains_key(&row) {
1055                Ok((self.data[&row].get((col-1) as usize).unwrap_or(&CellValue::Blank), (merge, spans)))
1056            } else {
1057                Ok((&CellValue::Blank, (merge, spans)))
1058            }
1059        } else {
1060            Err(anyhow!("Invalid address - out of range"))
1061        }
1062    }
1063}
1064
1065#[cfg(feature = "cached")]
1066impl<'a> Iterator for CachedSheet<'a> {
1067    type Item = (Vec<u32>, Vec<Vec<CellValue<'a>>>);
1068    fn next(&mut self) -> Option<Self::Item> {
1069        let mut nrow = Vec::with_capacity(self.iter_batch);
1070        let mut data = Vec::with_capacity(self.iter_batch);
1071        while nrow.len() < self.iter_batch && self.current <= self.bottom_nrow {
1072            if self.data.contains_key(&self.current) {
1073                nrow.push(self.current);
1074                data.push(self.data[&self.current].to_owned());
1075            } else if self.keep_empty {
1076                nrow.push(self.current);
1077                data.push(vec![]);
1078            };
1079            self.current += 1;
1080        }
1081        Some((nrow, data))
1082    }
1083}
1084
1085/// get another type of data from cell value
1086pub trait FromCellValue {
1087    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> 
1088        where Self: Sized;
1089}
1090
1091impl FromCellValue for String {
1092    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1093        match val {
1094            CellValue::Number(n) => Ok(Some(n.to_string())),
1095            CellValue::Date(n) => {
1096                Ok(Some((BASE_DATE.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid date"))?)).to_string()))
1097            },
1098            CellValue::Time(n) => {
1099                Ok(Some(NaiveTime::from_num_seconds_from_midnight_opt(((*n-n.trunc()) * 86400.0) as u32, 0).unwrap().format("%H:%M:%S").to_string()))
1100            }
1101            CellValue::Datetime(n) => {
1102                Ok(Some((BASE_DATETIME.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid date"))?)+(Duration::try_seconds(((*n-n.trunc()) * 86400.0) as i64).ok_or(anyhow!("invalid date"))?)).to_string()))
1103            },
1104            CellValue::Shared(s) => Ok(Some((**s).to_owned())),
1105            CellValue::String(s) => Ok(Some((*s).to_owned())),
1106            CellValue::Error(s) => Ok(Some((*s).to_string())),
1107            CellValue::Bool(b) => Ok(Some(if *b {"true".to_string()}else{"false".to_string()})),
1108            CellValue::Blank => Ok(Some("".to_string())),
1109        }
1110    }
1111}
1112
1113impl FromCellValue for f64 {
1114    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1115        match val {
1116            CellValue::Number(n) => Ok(Some(*n)),
1117            CellValue::Date(n) => Ok(Some(*n)),
1118            CellValue::Time(n) => Ok(Some(*n)),
1119            CellValue::Datetime(n) => Ok(Some(*n)),
1120            CellValue::Shared(s) => {
1121                match s.parse::<f64>() {
1122                    Ok(n) => Ok(Some(n)),
1123                    Err(_) => {
1124                        if NULL_STRING.contains(*s) {
1125                            Ok(None)
1126                        } else if let Ok(n) = s.replace(',', "").parse::<f64>() {
1127                            Ok(Some(n))
1128                        } else {
1129                            Err(anyhow!(format!("invalid value-{:?}", val)))
1130                        }
1131                    }
1132                }
1133            },
1134            CellValue::String(s) => {
1135                match s.parse::<f64>() {
1136                    Ok(n) => Ok(Some(n)),
1137                    Err(_) => {
1138                        if NULL_STRING.contains(s) {
1139                            Ok(None)
1140                        } else if let Ok(n) = s.replace(',', "").parse::<f64>() {
1141                            Ok(Some(n))
1142                        } else {
1143                            Err(anyhow!(format!("invalid value-{:?}", val)))
1144                        }
1145                    }
1146                }
1147            },
1148            CellValue::Error(_) => Err(anyhow!(format!("invalid value-{:?}", val))),
1149            CellValue::Bool(b) => Ok(Some(if *b {1.0}else{0.0})),
1150            CellValue::Blank => Ok(None),
1151        }
1152    }
1153}
1154
1155impl FromCellValue for i64 {
1156    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1157        match val {
1158            CellValue::Number(n) => Ok(Some(*n as i64)),
1159            CellValue::Date(n) => Ok(Some(*n as i64)),
1160            CellValue::Time(n) => Ok(Some(*n as i64)),
1161            CellValue::Datetime(n) => Ok(Some(*n as i64)),
1162            CellValue::Shared(s) => {
1163                match s.parse::<i64>() {
1164                    Ok(n) => Ok(Some(n)),
1165                    Err(_) => {
1166                        if NULL_STRING.contains(*s) {
1167                            Ok(None)
1168                        } else if let Ok(n) = s.replace(',', "").parse::<i64>() {
1169                            Ok(Some(n))
1170                        } else {
1171                            Err(anyhow!(format!("invalid value-{:?}", val)))
1172                        }
1173                    }
1174                }
1175            },
1176            CellValue::String(s) => {
1177                match s.parse::<i64>() {
1178                    Ok(n) => Ok(Some(n)),
1179                    Err(_) => {
1180                        if NULL_STRING.contains(s) {
1181                            Ok(None)
1182                        } else if let Ok(n) = s.replace(',', "").parse::<i64>() {
1183                            Ok(Some(n))
1184                        } else {
1185                            Err(anyhow!(format!("invalid value-{:?}", val)))
1186                        }
1187                    }
1188                }
1189            },
1190            CellValue::Error(_) => Err(anyhow!(format!("invalid value-{:?}", val))),
1191            CellValue::Bool(b) => Ok(Some(if *b {1}else{0})),
1192            CellValue::Blank => Ok(None),
1193        }
1194    }
1195}
1196
1197impl FromCellValue for bool {
1198    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1199        match val {
1200            CellValue::Number(n) => {if (*n).abs() > 0.009 {Ok(Some(true))} else {Ok(Some(false))}},
1201            CellValue::Date(n) => {if (*n).abs() > 0.009 {Ok(Some(true))} else {Ok(Some(false))}},
1202            CellValue::Time(n) => {if (*n).abs() > 0.009 {Ok(Some(true))} else {Ok(Some(false))}},
1203            CellValue::Datetime(n) => {if (*n).abs() > 0.009 {Ok(Some(true))} else {Ok(Some(false))}},
1204            CellValue::Shared(s) => {
1205                match s.parse::<bool>() {
1206                    Ok(n) => Ok(Some(n)),
1207                    Err(_) => {
1208                        if NULL_STRING.contains(*s) {
1209                            Ok(None)
1210                        } else {
1211                            Err(anyhow!(format!("invalid value-{:?}", val)))
1212                        }
1213                    }
1214                }
1215            },
1216            CellValue::String(s) => {
1217                match s.parse::<bool>() {
1218                    Ok(n) => Ok(Some(n)),
1219                    Err(_) => {
1220                        if NULL_STRING.contains(s) {
1221                            Ok(None)
1222                        } else {
1223                            Err(anyhow!(format!("invalid value-{:?}", val)))
1224                        }
1225                    }
1226                }
1227            },
1228            CellValue::Error(_) => Err(anyhow!(format!("invalid value-{:?}", val))),
1229            CellValue::Bool(b) => Ok(Some(*b)),
1230            CellValue::Blank => Ok(None),
1231        }
1232    }
1233}
1234
1235impl FromCellValue for NaiveDate {
1236    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1237        match val {
1238            CellValue::Number(n) => Ok(Some(BASE_DATE.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid datetime"))?))),
1239            CellValue::Date(n) => Ok(Some(BASE_DATE.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid datetime"))?))),
1240            CellValue::Time(n) => Ok(Some(BASE_DATE.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid datetime"))?))),
1241            CellValue::Datetime(n) => Ok(Some(BASE_DATE.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid datetime"))?))),
1242            CellValue::Shared(s) => {
1243                match NaiveDate::parse_from_str(*s, "%Y-%m-%d") {
1244                    Ok(v) => Ok(Some(v)),
1245                    Err(_) => {
1246                        match NaiveDate::parse_from_str(*s, "%Y/%m/%d") {
1247                            Ok(v) => Ok(Some(v)),
1248                            Err(_) => {
1249                                if NULL_STRING.contains(*s) {
1250                                    Ok(None)
1251                                } else {
1252                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1253                                }
1254                            }
1255                        }
1256                    }
1257                }
1258            },
1259            CellValue::String(s) => {
1260                match NaiveDate::parse_from_str(s, "%Y-%m-%d") {
1261                    Ok(v) => Ok(Some(v)),
1262                    Err(_) => {
1263                        match NaiveDate::parse_from_str(s, "%Y/%m/%d") {
1264                            Ok(v) => Ok(Some(v)),
1265                            Err(_) => {
1266                                if NULL_STRING.contains(s) {
1267                                    Ok(None)
1268                                } else {
1269                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1270                                }
1271                            }
1272                        }
1273                    }
1274                }
1275            },
1276            CellValue::Error(_) => Err(anyhow!(format!("invalid datetime{:?}", val))),
1277            CellValue::Bool(_) => Err(anyhow!(format!("invalid datetime{:?}", val))),
1278            CellValue::Blank => Ok(None),
1279        }
1280    }
1281}
1282
1283impl FromCellValue for NaiveDateTime {
1284    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1285        match val {
1286            CellValue::Number(n) => {
1287                Ok(Some(BASE_DATETIME.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid date"))?)+(Duration::try_seconds(((*n-n.trunc()) * 86400.0) as i64).ok_or(anyhow!("invalid date"))?)))
1288            },
1289            CellValue::Date(n) => {
1290                Ok(Some(BASE_DATETIME.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid date"))?)+(Duration::try_seconds(((*n-n.trunc()) * 86400.0) as i64).ok_or(anyhow!("invalid date"))?)))
1291            },
1292            CellValue::Time(n) => {
1293                Ok(Some(BASE_DATETIME.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid date"))?)+(Duration::try_seconds(((*n-n.trunc()) * 86400.0) as i64).ok_or(anyhow!("invalid date"))?)))
1294            },
1295            CellValue::Datetime(n) => {
1296                Ok(Some(BASE_DATETIME.clone()+(Duration::try_days(*n as i64).ok_or(anyhow!("invalid date"))?)+(Duration::try_seconds(((*n-n.trunc()) * 86400.0) as i64).ok_or(anyhow!("invalid date"))?)))
1297            },
1298            CellValue::Shared(s) => {
1299                match NaiveDateTime::parse_from_str(*s, "%Y-%m-%d %H:%M:%S") {
1300                    Ok(v) => Ok(Some(v)),
1301                    Err(_) => {
1302                        match NaiveDateTime::parse_from_str(*s, "%Y/%m/%d %H:%M:%S") {
1303                            Ok(v) => Ok(Some(v)),
1304                            Err(_) => {
1305                                if NULL_STRING.contains(*s) {
1306                                    Ok(None)
1307                                } else {
1308                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1309                                }
1310                            }
1311                        }
1312                    }
1313                }
1314            },
1315            CellValue::String(s) => {
1316                match NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
1317                    Ok(v) => Ok(Some(v)),
1318                    Err(_) => {
1319                        match NaiveDateTime::parse_from_str(s, "%Y/%m/%d %H:%M:%S") {
1320                            Ok(v) => Ok(Some(v)),
1321                            Err(_) => {
1322                                if NULL_STRING.contains(s) {
1323                                    Ok(None)
1324                                } else {
1325                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1326                                }
1327                            }
1328                        }
1329                    }
1330                }
1331            },
1332            CellValue::Error(_) => Err(anyhow!(format!("invalid datetime{:?}", val))),
1333            CellValue::Bool(_) => Err(anyhow!(format!("invalid datetime{:?}", val))),
1334            CellValue::Blank => Ok(None),
1335        }
1336    }
1337}
1338
1339impl FromCellValue for NaiveTime {
1340    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1341        match val {
1342            CellValue::Number(n) => {
1343                Ok(Some(NaiveTime::from_num_seconds_from_midnight_opt(((*n-n.trunc()) * 86400.0) as u32, 0).ok_or(anyhow!("invalid time"))?))
1344            },
1345            CellValue::Date(n) => {
1346                Ok(Some(NaiveTime::from_num_seconds_from_midnight_opt(((*n-n.trunc()) * 86400.0) as u32, 0).ok_or(anyhow!("invalid time"))?))
1347            },
1348            CellValue::Time(n) => {
1349                Ok(Some(NaiveTime::from_num_seconds_from_midnight_opt(((*n-n.trunc()) * 86400.0) as u32, 0).ok_or(anyhow!("invalid time"))?))
1350            },
1351            CellValue::Datetime(n) => {
1352                Ok(Some(NaiveTime::from_num_seconds_from_midnight_opt(((*n-n.trunc()) * 86400.0) as u32, 0).ok_or(anyhow!("invalid time"))?))
1353            },
1354            CellValue::Shared(s) => {
1355                match NaiveTime::parse_from_str(*s, "%H:%M:%S") {
1356                    Ok(v) => Ok(Some(v)),
1357                    Err(_) => {
1358                        match NaiveTime::parse_from_str(*s, "%H:%M:%S") {
1359                            Ok(v) => Ok(Some(v)),
1360                            Err(_) => {
1361                                if NULL_STRING.contains(*s) {
1362                                    Ok(None)
1363                                } else {
1364                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1365                                }
1366                            }
1367                        }
1368                    }
1369                }
1370            },
1371            CellValue::String(s) => {
1372                match NaiveTime::parse_from_str(s, "%H:%M:%S") {
1373                    Ok(v) => Ok(Some(v)),
1374                    Err(_) => {
1375                        match NaiveTime::parse_from_str(s, "%H:%M:%S") {
1376                            Ok(v) => Ok(Some(v)),
1377                            Err(_) => {
1378                                if NULL_STRING.contains(s) {
1379                                    Ok(None)
1380                                } else {
1381                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1382                                }
1383                            }
1384                        }
1385                    }
1386                }
1387            },
1388            CellValue::Error(_) => Err(anyhow!(format!("invalid time{:?}", val))),
1389            CellValue::Bool(_) => Err(anyhow!(format!("invalid time{:?}", val))),
1390            CellValue::Blank => Ok(None),
1391        }
1392    }
1393}
1394
1395impl FromCellValue for Date32 {
1396    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1397        match val {
1398            // 1970-01-01的Excel值为25569
1399            CellValue::Number(n) => Ok(Some((*n as i32)-25569)),
1400            CellValue::Date(n) => Ok(Some((*n as i32)-25569)),
1401            CellValue::Time(n) => Ok(Some((*n as i32)-25569)),
1402            CellValue::Datetime(n) => Ok(Some((*n as i32)-25569)),
1403            CellValue::Shared(s) => {
1404                match NaiveDate::parse_from_str(*s, "%Y-%m-%d") {
1405                    Ok(v) => Ok(Some((v - UNIX_DATE.clone()).num_days() as i32)),
1406                    Err(_) => {
1407                        match NaiveDate::parse_from_str(*s, "%Y/%m/%d") {
1408                            Ok(v) => Ok(Some((v - UNIX_DATE.clone()).num_days() as i32)),
1409                            Err(_) => {
1410                                if NULL_STRING.contains(*s) {
1411                                    Ok(None)
1412                                } else {
1413                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1414                                }
1415                            }
1416                        }
1417                    }
1418                }
1419            },
1420            CellValue::String(s) => {
1421                match NaiveDate::parse_from_str(s, "%Y-%m-%d") {
1422                    Ok(v) => Ok(Some((v - UNIX_DATE.clone()).num_days() as i32)),
1423                    Err(_) => {
1424                        match NaiveDate::parse_from_str(s, "%Y/%m/%d") {
1425                            Ok(v) => Ok(Some((v - UNIX_DATE.clone()).num_days() as i32)),
1426                            Err(_) => {
1427                                if NULL_STRING.contains(s) {
1428                                    Ok(None)
1429                                } else {
1430                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1431                                }
1432                            }
1433                        }
1434                    }
1435                }
1436            },
1437            CellValue::Error(_) => Err(anyhow!(format!("invalid date32-{:?}", val))),
1438            CellValue::Bool(_) => Err(anyhow!(format!("invalid date32-{:?}", val))),
1439            CellValue::Blank => Ok(None),
1440        }
1441    }
1442}
1443
1444impl FromCellValue for Timestamp {
1445    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1446        match val {
1447            // 1970-01-01的Excel值为25569
1448            CellValue::Number(n) => Ok(Some(((*n - 25569.0) * 86400.0).into())),
1449            CellValue::Date(n) => Ok(Some(((*n - 25569.0) * 86400.0).into())),
1450            CellValue::Time(n) => Ok(Some(((*n - 25569.0) * 86400.0).into())),
1451            CellValue::Datetime(n) => Ok(Some(((*n - 25569.0) * 86400.0).into())),
1452            CellValue::Shared(s) => {
1453                match NaiveDateTime::parse_from_str(*s, "%Y-%m-%d %H:%M:%S") {
1454                    Ok(v) => Ok(Some(v.and_utc().timestamp().into())),
1455                    Err(_) => {
1456                        match NaiveDateTime::parse_from_str(*s, "%Y-%m-%d %H:%M:%S") {
1457                            Ok(v) => Ok(Some(v.and_utc().timestamp().into())),
1458                            Err(_) => {
1459                                if NULL_STRING.contains(*s) {
1460                                    Ok(None)
1461                                } else {
1462                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1463                                }
1464                            }
1465                        }
1466                    }
1467                }
1468            },
1469            CellValue::String(s) => {
1470                match NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
1471                    Ok(v) => Ok(Some(v.and_utc().timestamp().into())),
1472                    Err(_) => {
1473                        match NaiveDateTime::parse_from_str(s, "%Y/%m/%d %H:%M:%S") {
1474                            Ok(v) => Ok(Some(v.and_utc().timestamp().into())),
1475                            Err(_) => {
1476                                if NULL_STRING.contains(s) {
1477                                    Ok(None)
1478                                } else {
1479                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1480                                }
1481                            }
1482                        }
1483                    }
1484                }
1485            },
1486            CellValue::Error(_) => Err(anyhow!(format!("invalid timestamp-{:?}", val))),
1487            CellValue::Bool(_) => Err(anyhow!(format!("invalid timestamp-{:?}", val))),
1488            CellValue::Blank => Ok(None),
1489        }
1490    }
1491}
1492
1493impl FromCellValue for Timesecond {
1494    fn try_from_cval(val: &CellValue<'_>) -> Result<Option<Self>> {
1495        match val {
1496            CellValue::Number(n) => {
1497                Ok(Some((((*n-n.trunc()) * 86400.0) as i32).into()))
1498            },
1499            CellValue::Date(n) => {
1500                Ok(Some((((*n-n.trunc()) * 86400.0) as i32).into()))
1501            },
1502            CellValue::Time(n) => {
1503                Ok(Some((((*n-n.trunc()) * 86400.0) as i32).into()))
1504            },
1505            CellValue::Datetime(n) => {
1506                Ok(Some((((*n-n.trunc()) * 86400.0) as i32).into()))
1507            },
1508            CellValue::Shared(s) => {
1509                match NaiveTime::parse_from_str(*s, "%H:%M:%S") {
1510                    Ok(v) => {Ok(Some((v.num_seconds_from_midnight() as i32).into()))},
1511                    Err(_) => {
1512                        match NaiveTime::parse_from_str(*s, "%H:%M:%S") {
1513                            Ok(v) =>Ok(Some((v.num_seconds_from_midnight() as i32).into())),
1514                            Err(_) => {
1515                                if NULL_STRING.contains(*s) {
1516                                    Ok(None)
1517                                } else {
1518                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1519                                }
1520                            }
1521                        }
1522                    }
1523                }
1524            },
1525            CellValue::String(s) => {
1526                match NaiveTime::parse_from_str(s, "%H:%M:%S") {
1527                    Ok(v) => {Ok(Some((v.num_seconds_from_midnight() as i32).into()))},
1528                    Err(_) => {
1529                        match NaiveTime::parse_from_str(s, "%H:%M:%S") {
1530                            Ok(v) =>Ok(Some((v.num_seconds_from_midnight() as i32).into())),
1531                            Err(_) => {
1532                                if NULL_STRING.contains(s) {
1533                                    Ok(None)
1534                                } else {
1535                                    Err(anyhow!(format!("invalid value-{:?}", val)))
1536                                }
1537                            }
1538                        }
1539                    }
1540                }
1541            },
1542            CellValue::Error(_) => Err(anyhow!(format!("invalid time{:?}", val))),
1543            CellValue::Bool(_) => Err(anyhow!(format!("invalid time{:?}", val))),
1544            CellValue::Blank => Ok(None),
1545        }
1546    }
1547}
1548
1549/// Into CellValue
1550impl Into<CellValue<'_>> for String {
1551    fn into(self) -> CellValue<'static> {
1552        CellValue::String(self)
1553    }
1554}
1555
1556impl Into<CellValue<'_>> for f64 {
1557    fn into(self) -> CellValue<'static> {
1558        CellValue::Number(self)
1559    }
1560}
1561
1562impl Into<CellValue<'_>> for i64 {
1563    fn into(self) -> CellValue<'static> {
1564        CellValue::Number(self as f64)
1565    }
1566}
1567
1568impl Into<CellValue<'_>> for bool {
1569    fn into(self) -> CellValue<'static> {
1570        CellValue::Bool(self)
1571    }
1572}
1573
1574/// make another type of data into cell value
1575pub trait IntoCellValue {
1576    fn try_into_cval(self) -> Result<CellValue<'static>>;
1577}
1578
1579impl IntoCellValue for NaiveDate {
1580    fn try_into_cval(self) -> Result<CellValue<'static>> {
1581        Ok(CellValue::Date((self.signed_duration_since(*BASE_DATE).num_days()) as f64))
1582    }
1583}
1584
1585impl IntoCellValue for NaiveDateTime {
1586    fn try_into_cval(self) -> Result<CellValue<'static>> {
1587        let (dt, tm) = (self.date(), self.time());
1588        Ok(CellValue::Datetime(((dt.signed_duration_since(*BASE_DATE).num_days()) as f64) + ((tm.num_seconds_from_midnight() as f64) / 86400.0)))
1589    }
1590}
1591
1592impl IntoCellValue for NaiveTime {
1593    fn try_into_cval(self) -> Result<CellValue<'static>> {
1594        Ok(CellValue::Time((self.num_seconds_from_midnight() as f64) / 86400.0))
1595    }
1596}
1597
1598impl IntoCellValue for Date32 {
1599    fn try_into_cval(self) -> Result<CellValue<'static>> {
1600        Ok(CellValue::Date((self + 25569) as f64))
1601    }
1602}
1603
1604// utc time-zone only
1605impl IntoCellValue for Timestamp {
1606    fn try_into_cval(self) -> Result<CellValue<'static>> {
1607        if let Some(v) = BASE_DATETIME.checked_add_signed(Duration::seconds(self.0)) {
1608            v.try_into_cval()
1609        } else {
1610            Ok(CellValue::Error(format!("Invalid Timestamp-{}", self.0)))
1611        }
1612    }
1613}
1614
1615impl IntoCellValue for Timesecond {
1616    fn try_into_cval(self) -> Result<CellValue<'static>> {
1617        Ok(CellValue::Time(self.0 as f64 / 86400.0))
1618    }
1619}
1620
1621// datetime sign
1622static FMT_DATE: u8 = 0;
1623static FMT_TIME: u8 = 1;
1624static FMT_DATETIME: u8 = 2;
1625static FMT_DEFAULT: u8 = 255;
1626
1627lazy_static! {
1628    static ref BASE_DATE: NaiveDate = NaiveDate::from_ymd_opt(1899, 12,30).unwrap();
1629    static ref BASE_DATETIME: NaiveDateTime = BASE_DATE.and_hms_opt(0, 0, 0).unwrap();
1630    static ref BASE_TIME: NaiveTime = NaiveTime::from_num_seconds_from_midnight_opt(0, 0).unwrap();
1631    static ref UNIX_DATE: NaiveDate = NaiveDate::from_ymd_opt(1970,  1, 1).unwrap();
1632    static ref NULL_STRING: HashSet<String> = {
1633        let mut v = HashSet::new();
1634        v.insert("".into());
1635        v.insert("-".into());
1636        v.insert("--".into());
1637        v.insert("#N/A".into());
1638        v
1639    };
1640    static ref DATETIME_FMTS: HashMap<u32, u8> = {
1641        let mut v = HashMap::new();
1642        v.extend((14..18).map(|n| (n, FMT_DATE)));
1643        v.extend((27..32).map(|n| (n, FMT_DATE)));
1644        v.extend((34..37).map(|n| (n, FMT_DATE)));
1645        v.extend((50..59).map(|n| (n, FMT_DATE)));  // FMT_DATE - 0
1646        v.extend((18..22).map(|n| (n, FMT_TIME)));
1647        v.extend((32..34).map(|n| (n, FMT_TIME)));
1648        v.extend((45..48).map(|n| (n, FMT_TIME)));  // FMT_TIME - 1
1649        v.insert(22, FMT_DATETIME);                 // FMT_DATETIME - 2
1650        v
1651    };
1652    static ref NUM_FMTS: HashMap<u32, String> = {
1653        let mut map: HashMap<u32, String> = HashMap::new();
1654        // General
1655        map.insert(0, "General".to_string());
1656        map.insert(1, "0".to_string());
1657        map.insert(2, "0.00".to_string());
1658        map.insert(3, "#,##0".to_string());
1659        map.insert(4, "#,##0.00".to_string());
1660
1661        map.insert(9, "0%".to_string());
1662        map.insert(10, "0.00%".to_string());
1663        map.insert(11, "0.00E+00".to_string());
1664        map.insert(12, "# ?/?".to_string());
1665        map.insert(13, "# ??/??".to_string());
1666        map.insert(14, "m/d/yyyy".to_string()); // Despite ECMA 'mm-dd-yy");
1667        map.insert(15, "d-mmm-yy".to_string());
1668        map.insert(16, "d-mmm".to_string());
1669        map.insert(17, "mmm-yy".to_string());
1670        map.insert(18, "h:mm AM/PM".to_string());
1671        map.insert(19, "h:mm:ss AM/PM".to_string());
1672        map.insert(20, "h:mm".to_string());
1673        map.insert(21, "h:mm:ss".to_string());
1674        map.insert(22, "m/d/yyyy h:mm".to_string()); // Despite ECMA 'm/d/yy h:mm");
1675
1676        map.insert(37, "#,##0_);(#,##0)".to_string()); //  Despite ECMA '#,##0 ;(#,##0)");
1677        map.insert(38, "#,##0_);[Red](#,##0)".to_string()); //  Despite ECMA '#,##0 ;[Red](#,##0)");
1678        map.insert(39, "#,##0.00_);(#,##0.00)".to_string()); //  Despite ECMA '#,##0.00;(#,##0.00)");
1679        map.insert(40, "#,##0.00_);[Red](#,##0.00)".to_string()); //  Despite ECMA '#,##0.00;[Red](#,##0.00)");
1680
1681        map.insert(44, r###"_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)"###.to_string());
1682        map.insert(45, "mm:ss".to_string());
1683        map.insert(46, "[h]:mm:ss".to_string());
1684        map.insert(47, "mm:ss.0".to_string()); //  Despite ECMA 'mmss.0");
1685        map.insert(48, "##0.0E+0".to_string());
1686        map.insert(49, "@".to_string());
1687
1688        // CHT
1689        map.insert(27, "[$-404]e/m/d".to_string());
1690        map.insert(30, "m/d/yy".to_string());
1691        map.insert(36, "[$-404]e/m/d".to_string());
1692        map.insert(50, "[$-404]e/m/d".to_string());
1693        map.insert(57, "[$-404]e/m/d".to_string());
1694
1695        // THA
1696        map.insert(59, "t0".to_string());
1697        map.insert(60, "t0.00".to_string());
1698        map.insert(61, "t#,##0".to_string());
1699        map.insert(62, "t#,##0.00".to_string());
1700        map.insert(67, "t0%".to_string());
1701        map.insert(68, "t0.00%".to_string());
1702        map.insert(69, "t# ?/?".to_string());
1703        map.insert(70, "t# ??/??".to_string());
1704
1705        // JPN
1706        map.insert(28, r###"[$-411]ggge"年"m"月"d"日""###.to_string());
1707        map.insert(29, r###"[$-411]ggge"年"m"月"d"日""###.to_string());
1708        map.insert(31, r###"yyyy"年"m"月"d"日""###.to_string());
1709        map.insert(32, r###"h"時"mm"分""###.to_string());
1710        map.insert(33, r###"h"時"mm"分"ss"秒""###.to_string());
1711        map.insert(34, r###"yyyy"年"m"月""###.to_string());
1712        map.insert(35, r###"m"月"d"日""###.to_string());
1713        map.insert(51, r###"[$-411]ggge"年"m"月"d"日""###.to_string());
1714        map.insert(52, r###"yyyy"年"m"月""###.to_string());
1715        map.insert(53, r###"m"月"d"日""###.to_string());
1716        map.insert(54, r###"[$-411]ggge"年"m"月"d"日""###.to_string());
1717        map.insert(55, r###"yyyy"年"m"月""###.to_string());
1718        map.insert(56, r###"m"月"d"日""###.to_string());
1719        map.insert(58, r###"[$-411]ggge"年"m"月"d"日""###.to_string());
1720
1721        map
1722    };
1723    static ref EMP_CELLS: Vec<CellValue<'static>> = vec![];
1724}