Skip to main content

calamine_styles/
xls.rs

1// SPDX-License-Identifier: MIT
2//
3// Copyright 2016-2025, Johann Tuffe.
4
5use std::cmp::min;
6use std::collections::BTreeMap;
7use std::fmt::{self, Write};
8use std::io::{Read, Seek, SeekFrom};
9
10use log::debug;
11
12use crate::cfb::{Cfb, XlsEncoding};
13use crate::formats::{
14    builtin_format_by_code, detect_custom_number_format, format_excel_f64, format_excel_i64,
15    CellFormat,
16};
17#[cfg(feature = "picture")]
18use crate::utils::read_usize;
19use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32};
20use crate::vba::VbaProject;
21use crate::{
22    Cell, CellErrorType, Data, Dimensions, HeaderRow, Metadata, Range, Reader, Sheet, SheetType,
23    SheetVisible, StyleRange, WorksheetLayout,
24};
25
26#[derive(Debug)]
27/// An enum to handle Xls specific errors
28pub enum XlsError {
29    /// Io error
30    Io(std::io::Error),
31    /// Cfb error
32    Cfb(crate::cfb::CfbError),
33    /// Vba error
34    Vba(crate::vba::VbaError),
35
36    /// Cannot parse formula, stack is too short
37    StackLen,
38    /// Unrecognized data
39    Unrecognized {
40        /// data type
41        typ: &'static str,
42        /// value found
43        val: u8,
44    },
45    /// Workbook is password protected
46    Password,
47    /// Invalid length
48    Len {
49        /// expected length
50        expected: usize,
51        /// found length
52        found: usize,
53        /// length type
54        typ: &'static str,
55    },
56    /// Continue Record is too short
57    ContinueRecordTooShort,
58    /// End of stream
59    EoStream(&'static str),
60
61    /// Invalid Formula
62    InvalidFormula {
63        /// stack size
64        stack_size: usize,
65    },
66    /// Invalid or unknown iftab
67    IfTab(usize),
68    /// Invalid etpg
69    Etpg(u8),
70    /// No vba project
71    NoVba,
72
73    /// Invalid OfficeArt Record.
74    #[cfg(feature = "picture")]
75    #[cfg_attr(docsrs, doc(cfg(feature = "picture")))]
76    Art(&'static str),
77
78    /// Worksheet not found
79    WorksheetNotFound(String),
80    /// Invalid iFmt value
81    InvalidFormat {
82        /// iFmt value, See 2.4.126 Format
83        ifmt: u16,
84    },
85}
86
87from_err!(std::io::Error, XlsError, Io);
88from_err!(crate::cfb::CfbError, XlsError, Cfb);
89from_err!(crate::vba::VbaError, XlsError, Vba);
90
91impl std::fmt::Display for XlsError {
92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93        match self {
94            XlsError::Io(e) => write!(f, "I/O error: {e}"),
95            XlsError::Cfb(e) => write!(f, "Cfb error: {e}"),
96            XlsError::Vba(e) => write!(f, "Vba error: {e}"),
97            XlsError::StackLen => write!(f, "Invalid stack length"),
98            XlsError::Unrecognized { typ, val } => write!(f, "Unrecognized {typ}: 0x{val:0X}"),
99            XlsError::Password => write!(f, "Workbook is password protected"),
100            XlsError::Len {
101                expected,
102                found,
103                typ,
104            } => write!(
105                f,
106                "Invalid {typ} length, expected at least {expected}, found {found}",
107            ),
108            XlsError::ContinueRecordTooShort => write!(
109                f,
110                "Continued record too short while reading extended string"
111            ),
112            XlsError::EoStream(s) => write!(f, "End of stream '{s}'"),
113            XlsError::InvalidFormula { stack_size } => {
114                write!(f, "Invalid formula (stack size: {stack_size})")
115            }
116            XlsError::IfTab(iftab) => write!(f, "Invalid iftab {iftab:X}"),
117            XlsError::Etpg(etpg) => write!(f, "Invalid etpg {etpg:X}"),
118            XlsError::NoVba => write!(f, "No VBA project"),
119            #[cfg(feature = "picture")]
120            XlsError::Art(s) => write!(f, "Invalid art record '{s}'"),
121            XlsError::WorksheetNotFound(name) => write!(f, "Worksheet '{name}' not found"),
122            XlsError::InvalidFormat { ifmt } => write!(f, "Invalid ifmt value: '{ifmt}'"),
123        }
124    }
125}
126
127impl std::error::Error for XlsError {
128    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
129        match self {
130            XlsError::Io(e) => Some(e),
131            XlsError::Cfb(e) => Some(e),
132            XlsError::Vba(e) => Some(e),
133            _ => None,
134        }
135    }
136}
137
138/// Options to perform specialized parsing.
139#[derive(Debug, Clone, Default)]
140#[non_exhaustive]
141pub struct XlsOptions {
142    /// Force a spreadsheet to be interpreted using a particular code page.
143    ///
144    /// XLS files can contain [code page] identifiers. If this identifier is missing or incorrect,
145    /// strings in the parsed spreadsheet may be decoded incorrectly. Setting this field causes
146    /// `calamine::Xls` to interpret strings using the specified code page, which may allow such
147    /// spreadsheets to be decoded properly.
148    ///
149    /// [code page]: https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
150    pub force_codepage: Option<u16>,
151    /// Row to use as header
152    pub header_row: HeaderRow,
153}
154
155struct SheetData {
156    range: Range<Data>,
157    formula: Range<String>,
158    merge_cells: Vec<Dimensions>,
159}
160
161/// A struct representing an old xls format file (CFB)
162pub struct Xls<RS> {
163    sheets: BTreeMap<String, SheetData>,
164    metadata: Metadata,
165    cfb: Cfb,
166    reader: RS,
167    options: XlsOptions,
168    formats: Vec<CellFormat>,
169    is_1904: bool,
170    #[cfg(feature = "picture")]
171    pictures: Option<Vec<(String, Vec<u8>)>>,
172}
173
174fn cfb<RS: Seek + Read>(reader: &mut RS) -> Result<Cfb, XlsError> {
175    let offset_end = reader.seek(SeekFrom::End(0))? as usize;
176    reader.seek(SeekFrom::Start(0))?;
177    let cfb = Cfb::new(reader, offset_end)?;
178    Ok(cfb)
179}
180
181impl<RS: Read + Seek> Xls<RS> {
182    /// Creates a new instance using `Options` to inform parsing.
183    ///
184    /// ```
185    /// use calamine::{Xls,XlsOptions};
186    /// # use std::io::Cursor;
187    /// # const BYTES: &[u8] = b"";
188    ///
189    /// # fn run() -> Result<Xls<Cursor<&'static [u8]>>, calamine::XlsError> {
190    /// # let reader = std::io::Cursor::new(BYTES);
191    /// let mut options = XlsOptions::default();
192    /// // ...set options...
193    /// let workbook = Xls::new_with_options(reader, options)?;
194    /// # Ok(workbook) }
195    /// # fn main() { assert!(run().is_err()); }
196    /// ```
197    pub fn new_with_options(mut reader: RS, options: XlsOptions) -> Result<Self, XlsError> {
198        let cfb = cfb(&mut reader)?;
199
200        debug!("cfb loaded");
201
202        let mut xls = Xls {
203            sheets: BTreeMap::new(),
204            cfb,
205            reader,
206            metadata: Metadata::default(),
207            options,
208            is_1904: false,
209            formats: Vec::new(),
210            #[cfg(feature = "picture")]
211            pictures: None,
212        };
213
214        xls.parse_workbook()?;
215
216        debug!("xls parsed");
217
218        Ok(xls)
219    }
220
221    /// Gets the worksheet merge cell dimensions
222    pub fn worksheet_merge_cells(&self, name: &str) -> Option<Vec<Dimensions>> {
223        self.sheets.get(name).map(|r| r.merge_cells.clone())
224    }
225
226    /// Get the nth worksheet. Shortcut for getting the nth
227    /// sheet name, then the corresponding worksheet.
228    pub fn worksheet_merge_cells_at(&self, n: usize) -> Option<Vec<Dimensions>> {
229        let sheet = self.metadata().sheets.get(n)?;
230
231        self.worksheet_merge_cells(&sheet.name)
232    }
233}
234
235impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
236    type Error = XlsError;
237
238    fn new(reader: RS) -> Result<Self, XlsError> {
239        Self::new_with_options(reader, XlsOptions::default())
240    }
241
242    fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
243        self.options.header_row = header_row;
244        self
245    }
246
247    fn vba_project(&mut self) -> Result<Option<VbaProject>, XlsError> {
248        // Reads vba once for all (better than reading all worksheets once for all)
249        if !self.cfb.has_directory("_VBA_PROJECT_CUR") {
250            return Ok(None);
251        }
252        let vba = VbaProject::from_cfb(&mut self.reader, &mut self.cfb)?;
253        Ok(Some(vba))
254    }
255
256    /// Parses Workbook stream, no need for the relationships variable
257    fn metadata(&self) -> &Metadata {
258        &self.metadata
259    }
260
261    fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, XlsError> {
262        let sheet = self
263            .sheets
264            .get(name)
265            .map(|r| r.range.clone())
266            .ok_or_else(|| XlsError::WorksheetNotFound(name.into()))?;
267
268        match self.options.header_row {
269            HeaderRow::FirstNonEmptyRow => Ok(sheet),
270            HeaderRow::Row(header_row_idx) => {
271                // If `header_row` is a row index, adjust the range
272                if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
273                    Ok(sheet.range((header_row_idx, start.1), end))
274                } else {
275                    Ok(sheet)
276                }
277            }
278        }
279    }
280
281    fn worksheet_style(&mut self, _name: &str) -> Result<StyleRange, XlsError> {
282        // TODO: Implement XLS style parsing
283        Ok(StyleRange::empty())
284    }
285
286    fn worksheet_layout(&mut self, _name: &str) -> Result<WorksheetLayout, XlsError> {
287        // XLS doesn't support column width/row height information in the same way as XLSX
288        Ok(WorksheetLayout::new())
289    }
290
291    fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
292        self.sheets
293            .iter()
294            .map(|(name, sheet)| (name.to_owned(), sheet.range.clone()))
295            .collect()
296    }
297
298    fn worksheet_formula(&mut self, name: &str) -> Result<Range<String>, XlsError> {
299        self.sheets
300            .get(name)
301            .ok_or_else(|| XlsError::WorksheetNotFound(name.into()))
302            .map(|r| r.formula.clone())
303    }
304
305    #[cfg(feature = "picture")]
306    fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
307        self.pictures.to_owned()
308    }
309}
310
311#[derive(Debug, Clone, Copy)]
312struct Xti {
313    _isup_book: u16,
314    itab_first: i16,
315    _itab_last: i16,
316}
317
318impl<RS: Read + Seek> Xls<RS> {
319    fn parse_workbook(&mut self) -> Result<(), XlsError> {
320        // gets workbook and worksheets stream, or early exit
321        let stream = self
322            .cfb
323            .get_stream("Workbook", &mut self.reader)
324            .or_else(|_| self.cfb.get_stream("Book", &mut self.reader))?;
325
326        let mut sheet_names = Vec::new();
327        let mut strings = Vec::new();
328        let mut defined_names = Vec::new();
329        let mut xtis = Vec::new();
330        let mut formats = BTreeMap::new();
331        let mut xfs = Vec::new();
332        let mut biff = Biff::Biff8; // Binary Interchange File Format (BIFF) version
333        let codepage = self.options.force_codepage.unwrap_or(1200);
334        let mut encoding = XlsEncoding::from_codepage(codepage)?;
335        #[cfg(feature = "picture")]
336        let mut draw_group: Vec<u8> = Vec::new();
337        {
338            let wb = &stream;
339            let records = RecordIter { stream: wb };
340            for record in records {
341                let mut r = record?;
342                match r.typ {
343                    // 2.4.117 FilePass
344                    0x002F if read_u16(r.data) != 0 => return Err(XlsError::Password),
345                    // CodePage
346                    0x0042 => {
347                        if self.options.force_codepage.is_none() {
348                            encoding = XlsEncoding::from_codepage(read_u16(r.data))?;
349                        }
350                    }
351                    0x013D => {
352                        let sheet_len = r.data.len() / 2;
353                        sheet_names.reserve(sheet_len);
354                        self.metadata.sheets.reserve(sheet_len);
355                    }
356                    // Date1904
357                    0x0022 => {
358                        if read_u16(r.data) == 1 {
359                            self.is_1904 = true;
360                        }
361                    }
362                    // 2.4.126 FORMATTING
363                    0x041E => match parse_format(&mut r, &encoding, biff) {
364                        Ok((idx, format)) => {
365                            formats.insert(idx, format);
366                        }
367                        Err(e) => log::warn!("{e}"),
368                    },
369                    // XFS
370                    0x00E0 => {
371                        xfs.push(parse_xf(&r)?);
372                    }
373                    // RRTabId
374                    0x0085 => {
375                        let (pos, sheet) = parse_sheet_metadata(&mut r, &encoding, biff)?;
376                        self.metadata.sheets.push(sheet.clone());
377                        sheet_names.push((pos, sheet.name)); // BoundSheet8
378                    }
379                    // BOF
380                    0x0809 => {
381                        let bof = parse_bof(&mut r)?;
382                        biff = bof.biff;
383                    }
384                    0x0018 => {
385                        // Lbl for defined_names
386                        let cch = r.data[3] as usize;
387                        let cce = read_u16(&r.data[4..]) as usize;
388                        let mut name = String::new();
389                        read_unicode_string_no_cch(&encoding, &r.data[14..], &cch, &mut name);
390                        let rgce = &r.data[r.data.len() - cce..];
391                        let formula = parse_defined_names(rgce)?;
392                        defined_names.push((name, formula));
393                    }
394                    0x0017 => {
395                        // ExternSheet
396                        let cxti = read_u16(r.data) as usize;
397                        xtis.extend(r.data[2..].chunks(6).take(cxti).map(|xti| Xti {
398                            _isup_book: read_u16(&xti[..2]),
399                            itab_first: read_i16(&xti[2..4]),
400                            _itab_last: read_i16(&xti[4..]),
401                        }));
402                    }
403                    0x00FC => strings = parse_sst(&mut r, &encoding)?, // SST
404                    #[cfg(feature = "picture")]
405                    0x00EB => {
406                        // MsoDrawingGroup
407                        draw_group.extend(r.data);
408                        draw_group.extend(r.cont.iter().flat_map(|v| *v));
409                    }
410                    0x000A => break, // EOF,
411                    _ => (),
412                }
413            }
414        }
415
416        self.formats = xfs
417            .into_iter()
418            .map(|fmt| match formats.get(&fmt) {
419                Some(s) => *s,
420                _ => builtin_format_by_code(fmt),
421            })
422            .collect();
423
424        debug!("formats: {:?}", self.formats);
425
426        let defined_names = defined_names
427            .into_iter()
428            .map(|(name, (i, mut f))| {
429                if let Some(i) = i {
430                    let sh = xtis
431                        .get(i)
432                        .and_then(|xti| sheet_names.get(xti.itab_first as usize))
433                        .map_or("#REF", |sh| &sh.1);
434                    f = format!("{sh}!{f}");
435                }
436                (name, f)
437            })
438            .collect::<Vec<_>>();
439
440        debug!("defined_names: {defined_names:?}");
441
442        let mut sheets = BTreeMap::new();
443        let fmla_sheet_names = sheet_names
444            .iter()
445            .map(|(_, n)| n.clone())
446            .collect::<Vec<_>>();
447        for (pos, name) in sheet_names {
448            let sh = &stream[pos..];
449            let records = RecordIter { stream: sh };
450            let mut cells = Vec::new();
451            let mut formulas = Vec::new();
452            let mut fmla_pos = (0, 0);
453            let mut merge_cells = Vec::new();
454            for record in records {
455                let r = record?;
456                match r.typ {
457                    // 512: Dimensions
458                    0x0200 => {
459                        let Dimensions { start, end } = parse_dimensions(r.data)?;
460                        let rows = (end.0 - start.0 + 1) as usize;
461                        let cols = (end.1 - start.1 + 1) as usize;
462                        cells.reserve(rows.saturating_mul(cols));
463                    }
464                    //0x0201 => cells.push(parse_blank(r.data)?), // 513: Blank
465                    0x0203 => cells.push(parse_number(r.data, &self.formats, self.is_1904)?), // 515: Number
466                    0x0204 => cells.push(parse_label(r.data, &encoding, biff)?), // 516: Label [MS-XLS 2.4.148]
467                    0x0205 => cells.push(parse_bool_err(r.data)?),               // 517: BoolErr
468                    0x0207 => {
469                        // 519 String (formula value)
470                        let val = Data::String(parse_string(r.data, &encoding, biff)?);
471                        cells.push(Cell::new(fmla_pos, val));
472                    }
473                    0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk
474                    0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst
475                    0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk
476                    0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells
477                    0x000A => break,                                        // 10: EOF,
478                    0x0006 => {
479                        // 6: Formula
480                        if r.data.len() < 20 {
481                            return Err(XlsError::Len {
482                                expected: 20,
483                                found: r.data.len(),
484                                typ: "Formula",
485                            });
486                        }
487                        let row = read_u16(r.data);
488                        let col = read_u16(&r.data[2..]);
489                        fmla_pos = (row as u32, col as u32);
490                        if let Some(val) = parse_formula_value(&r.data[6..14])? {
491                            // If the value is a string
492                            // it will appear in 0x0207 record coming next
493                            cells.push(Cell::new(fmla_pos, val));
494                        }
495                        let fmla = parse_formula(
496                            &r.data[20..],
497                            &fmla_sheet_names,
498                            &defined_names,
499                            &xtis,
500                            &encoding,
501                        )
502                        .unwrap_or_else(|e| {
503                            debug!("{e}");
504                            format!(
505                                "Unrecognised formula \
506                                 for cell ({row}, {col}): {e:?}"
507                            )
508                        });
509                        formulas.push(Cell::new(fmla_pos, fmla));
510                    }
511                    _ => (),
512                }
513            }
514            let range = Range::from_sparse(cells);
515            let formula = Range::from_sparse(formulas);
516            sheets.insert(
517                name,
518                SheetData {
519                    range,
520                    formula,
521                    merge_cells,
522                },
523            );
524        }
525
526        self.sheets = sheets;
527        self.metadata.names = defined_names;
528
529        #[cfg(feature = "picture")]
530        if !draw_group.is_empty() {
531            let pics = parse_pictures(&draw_group)?;
532            if !pics.is_empty() {
533                self.pictures = Some(pics);
534            }
535        }
536
537        Ok(())
538    }
539}
540
541/// <https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/4d6a3d1e-d7c5-405f-bbae-d01e9cb79366>
542struct Bof {
543    /// Binary Interchange File Format
544    biff: Biff,
545}
546
547/// <https://www.loc.gov/preservation/digital/formats/fdd/fdd000510.shtml#notes>
548#[derive(Clone, Copy, Debug, PartialEq, Eq)]
549enum Biff {
550    Biff2,
551    Biff3,
552    Biff4,
553    Biff5,
554    Biff8,
555    // Used by MS-XLSB Workbook(2.1.7.61) or Worksheet(2.1.7.61) which are not supported yet.
556    // Biff12,
557}
558
559/// BOF [MS-XLS] 2.4.21
560fn parse_bof(r: &mut Record<'_>) -> Result<Bof, XlsError> {
561    let mut dt = 0;
562    let biff_version = read_u16(&r.data[..2]);
563
564    if r.data.len() >= 4 {
565        dt = read_u16(&r.data[2..]);
566    }
567
568    let biff = match biff_version {
569        0x0200 | 0x0002 | 0x0007 => Biff::Biff2,
570        0x0300 => Biff::Biff3,
571        0x0400 => Biff::Biff4,
572        0x0500 => Biff::Biff5,
573        0x0600 => Biff::Biff8,
574        0 => {
575            if dt == 0x1000 {
576                Biff::Biff5
577            } else {
578                Biff::Biff8
579            }
580        }
581        _ => Biff::Biff8,
582    };
583
584    Ok(Bof { biff })
585}
586
587/// `BoundSheet8` [MS-XLS 2.4.28]
588fn parse_sheet_metadata(
589    r: &mut Record<'_>,
590    encoding: &XlsEncoding,
591    biff: Biff,
592) -> Result<(usize, Sheet), XlsError> {
593    let pos = read_u32(r.data) as usize;
594    let visible = match r.data[4] & 0b0011_1111 {
595        0x00 => SheetVisible::Visible,
596        0x01 => SheetVisible::Hidden,
597        0x02 => SheetVisible::VeryHidden,
598        e => {
599            return Err(XlsError::Unrecognized {
600                typ: "BoundSheet8:hsState",
601                val: e,
602            });
603        }
604    };
605    let typ = match r.data[5] {
606        0x00 => SheetType::WorkSheet,
607        0x01 => SheetType::MacroSheet,
608        0x02 => SheetType::ChartSheet,
609        0x06 => SheetType::Vba,
610        e => {
611            return Err(XlsError::Unrecognized {
612                typ: "BoundSheet8:dt",
613                val: e,
614            });
615        }
616    };
617    r.data = &r.data[6..];
618    let mut name = parse_short_string(r, encoding, biff)?;
619    name.retain(|c| c != '\0');
620    Ok((pos, Sheet { name, typ, visible }))
621}
622
623fn parse_number(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result<Cell<Data>, XlsError> {
624    if r.len() < 14 {
625        return Err(XlsError::Len {
626            typ: "number",
627            expected: 14,
628            found: r.len(),
629        });
630    }
631    let row = read_u16(r) as u32;
632    let col = read_u16(&r[2..]) as u32;
633    let v = read_f64(&r[6..]);
634    let format = formats.get(read_u16(&r[4..]) as usize);
635
636    Ok(Cell::new((row, col), format_excel_f64(v, format, is_1904)))
637}
638
639fn parse_bool_err(r: &[u8]) -> Result<Cell<Data>, XlsError> {
640    if r.len() < 8 {
641        return Err(XlsError::Len {
642            typ: "BoolErr",
643            expected: 8,
644            found: r.len(),
645        });
646    }
647    let row = read_u16(r);
648    let col = read_u16(&r[2..]);
649    let pos = (row as u32, col as u32);
650    match r[7] {
651        0x00 => Ok(Cell::new(pos, Data::Bool(r[6] != 0))),
652        0x01 => Ok(Cell::new(pos, parse_err(r[6])?)),
653        e => Err(XlsError::Unrecognized {
654            typ: "fError",
655            val: e,
656        }),
657    }
658}
659
660fn parse_err(e: u8) -> Result<Data, XlsError> {
661    match e {
662        0x00 => Ok(Data::Error(CellErrorType::Null)),
663        0x07 => Ok(Data::Error(CellErrorType::Div0)),
664        0x0F => Ok(Data::Error(CellErrorType::Value)),
665        0x17 => Ok(Data::Error(CellErrorType::Ref)),
666        0x1D => Ok(Data::Error(CellErrorType::Name)),
667        0x24 => Ok(Data::Error(CellErrorType::Num)),
668        0x2A => Ok(Data::Error(CellErrorType::NA)),
669        0x2B => Ok(Data::Error(CellErrorType::GettingData)),
670        e => Err(XlsError::Unrecognized {
671            typ: "error",
672            val: e,
673        }),
674    }
675}
676
677fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result<Cell<Data>, XlsError> {
678    if r.len() < 10 {
679        return Err(XlsError::Len {
680            typ: "rk",
681            expected: 10,
682            found: r.len(),
683        });
684    }
685    let row = read_u16(r);
686    let col = read_u16(&r[2..]);
687
688    Ok(Cell::new(
689        (row as u32, col as u32),
690        rk_num(&r[4..10], formats, is_1904),
691    ))
692}
693
694fn parse_merge_cells(r: &[u8], merge_cells: &mut Vec<Dimensions>) -> Result<(), XlsError> {
695    let count = read_u16(r);
696
697    for i in 0..count {
698        let offset: usize = (2 + i * 8).into();
699
700        let rf = read_u16(&r[offset..]);
701        let rl = read_u16(&r[offset + 2..]);
702        let cf = read_u16(&r[offset + 4..]);
703        let cl = read_u16(&r[offset + 6..]);
704
705        merge_cells.push(Dimensions {
706            start: (rf.into(), cf.into()),
707            end: (rl.into(), cl.into()),
708        });
709    }
710
711    Ok(())
712}
713
714fn parse_mul_rk(
715    r: &[u8],
716    cells: &mut Vec<Cell<Data>>,
717    formats: &[CellFormat],
718    is_1904: bool,
719) -> Result<(), XlsError> {
720    if r.len() < 6 {
721        return Err(XlsError::Len {
722            typ: "rk",
723            expected: 6,
724            found: r.len(),
725        });
726    }
727
728    let row = read_u16(r);
729    let col_first = read_u16(&r[2..]);
730    let col_last = read_u16(&r[r.len() - 2..]);
731
732    if r.len() != 6 + 6 * (col_last - col_first + 1) as usize {
733        return Err(XlsError::Len {
734            typ: "rk",
735            expected: 6 + 6 * (col_last - col_first + 1) as usize,
736            found: r.len(),
737        });
738    }
739
740    let mut col = col_first as u32;
741
742    for rk in r[4..r.len() - 2].chunks(6) {
743        cells.push(Cell::new((row as u32, col), rk_num(rk, formats, is_1904)));
744        col += 1;
745    }
746    Ok(())
747}
748
749fn rk_num(rk: &[u8], formats: &[CellFormat], is_1904: bool) -> Data {
750    let d100 = (rk[2] & 1) != 0;
751    let is_int = (rk[2] & 2) != 0;
752    let format = formats.get(read_u16(rk) as usize);
753
754    let mut v = [0u8; 8];
755    v[4..].copy_from_slice(&rk[2..]);
756    v[4] &= 0xFC;
757    if is_int {
758        let v = (read_i32(&v[4..8]) >> 2) as i64;
759        if d100 && v % 100 != 0 {
760            format_excel_f64(v as f64 / 100.0, format, is_1904)
761        } else {
762            format_excel_i64(if d100 { v / 100 } else { v }, format, is_1904)
763        }
764    } else {
765        let v = read_f64(&v);
766        format_excel_f64(if d100 { v / 100.0 } else { v }, format, is_1904)
767    }
768}
769
770/// `ShortXLUnicodeString` [MS-XLS 2.5.240]
771fn parse_short_string(
772    r: &mut Record<'_>,
773    encoding: &XlsEncoding,
774    biff: Biff,
775) -> Result<String, XlsError> {
776    if r.data.len() < 2 {
777        return Err(XlsError::Len {
778            typ: "short string",
779            expected: 2,
780            found: r.data.len(),
781        });
782    }
783
784    let cch = r.data[0] as usize;
785    r.data = &r.data[1..];
786    let mut high_byte = None;
787
788    if matches!(biff, Biff::Biff8) {
789        high_byte = Some(r.data[0] & 0x1 != 0);
790        r.data = &r.data[1..];
791    }
792
793    let mut s = String::with_capacity(cch);
794    encoding.decode_to(r.data, cch, &mut s, high_byte);
795    Ok(s)
796}
797
798/// `XLUnicodeString` [MS-XLS 2.5.294]
799fn parse_string(r: &[u8], encoding: &XlsEncoding, biff: Biff) -> Result<String, XlsError> {
800    let (mut high_byte, expected) = match biff {
801        Biff::Biff2 | Biff::Biff3 | Biff::Biff4 | Biff::Biff5 => (None, 2),
802        Biff::Biff8 => (Some(false), 3),
803    };
804    if r.len() < expected {
805        if 2 == r.len() && read_u16(r) == 0 {
806            // tests/OOM_alloc2.xls
807            return Ok(String::new());
808        }
809        return Err(XlsError::Len {
810            typ: "string",
811            expected,
812            found: r.len(),
813        });
814    }
815    // delay populating Some(_) variant until length checks guarantee r[2] can't crash
816    high_byte = high_byte.map(|_| r[2] & 0x1 != 0);
817
818    let cch = read_u16(r) as usize;
819    let mut s = String::with_capacity(cch);
820    encoding.decode_to(&r[expected..], cch, &mut s, high_byte);
821    Ok(s)
822}
823
824fn parse_label(r: &[u8], encoding: &XlsEncoding, biff: Biff) -> Result<Cell<Data>, XlsError> {
825    if r.len() < 6 {
826        return Err(XlsError::Len {
827            typ: "label",
828            expected: 6,
829            found: r.len(),
830        });
831    }
832    let row = read_u16(r);
833    let col = read_u16(&r[2..]);
834    let _ixfe = read_u16(&r[4..]);
835    Ok(Cell::new(
836        (row as u32, col as u32),
837        Data::String(parse_string(&r[6..], encoding, biff)?),
838    ))
839}
840
841fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<Data>>, XlsError> {
842    if r.len() < 10 {
843        return Err(XlsError::Len {
844            typ: "label sst",
845            expected: 10,
846            found: r.len(),
847        });
848    }
849    let row = read_u16(r);
850    let col = read_u16(&r[2..]);
851    let i = read_u32(&r[6..]) as usize;
852    if let Some(s) = strings.get(i) {
853        if !s.is_empty() {
854            return Ok(Some(Cell::new(
855                (row as u32, col as u32),
856                Data::String(s.clone()),
857            )));
858        }
859    }
860    Ok(None)
861}
862
863fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
864    let (rf, rl, mut cf, cl) = match r.len() {
865        10 => (
866            read_u16(&r[0..2]) as u32,
867            read_u16(&r[2..4]) as u32,
868            read_u16(&r[4..6]) as u32,
869            read_u16(&r[6..8]) as u32,
870        ),
871        14 => (
872            read_u32(&r[0..4]),
873            read_u32(&r[4..8]),
874            read_u16(&r[8..10]) as u32,
875            read_u16(&r[10..12]) as u32,
876        ),
877        _ => {
878            return Err(XlsError::Len {
879                typ: "dimensions",
880                expected: 14,
881                found: r.len(),
882            });
883        }
884    };
885    // 2.5.53 ColU must be <= 0xFF, if larger, reasonable to assume
886    // starts at 0
887    // tests/OOM_alloc2.xls
888    if 0xFF < cf || cl < cf {
889        cf = 0;
890    }
891    if 1 <= rl && 1 <= cl {
892        Ok(Dimensions {
893            start: (rf, cf),
894            end: (rl - 1, cl - 1),
895        })
896    } else {
897        Ok(Dimensions {
898            start: (rf, cf),
899            end: (rf, cf),
900        })
901    }
902}
903
904// Parse the Excel xls Shared String Table (SST). See [MS-XLS] 2.4.265.
905//
906// https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/b6231b92-d32e-4626-badd-c3310a672bab
907fn parse_sst(r: &mut Record<'_>, encoding: &XlsEncoding) -> Result<Vec<String>, XlsError> {
908    if r.data.len() < 8 {
909        return Err(XlsError::Len {
910            typ: "sst",
911            expected: 8,
912            found: r.data.len(),
913        });
914    }
915    let mut sst = vec![];
916
917    // Skip cstTotal and cstUnique headers in SST record.
918    r.data = &r.data[8..];
919
920    while !r.data.is_empty() || r.continue_record() {
921        sst.push(read_rich_extended_string(r, encoding)?);
922    }
923
924    Ok(sst)
925}
926
927/// Decode XF (extract only ifmt - Format identifier)
928///
929/// See: <https://learn.microsoft.com/ru-ru/openspecs/office_file_formats/ms-xls/993d15c4-ec04-43e9-ba36-594dfb336c6d>
930fn parse_xf(r: &Record<'_>) -> Result<u16, XlsError> {
931    if r.data.len() < 4 {
932        return Err(XlsError::Len {
933            typ: "xf",
934            expected: 4,
935            found: r.data.len(),
936        });
937    }
938
939    Ok(read_u16(&r.data[2..]))
940}
941
942/// Decode Format [MS-XLS 2.4.126]
943///
944/// See: <https://learn.microsoft.com/ru-ru/openspecs/office_file_formats/ms-xls/300280fd-e4fe-4675-a924-4d383af48d3b>
945fn parse_format(
946    r: &mut Record<'_>,
947    encoding: &XlsEncoding,
948    biff: Biff,
949) -> Result<(u16, CellFormat), XlsError> {
950    if r.data.len() < 2 {
951        return Err(XlsError::Len {
952            typ: "format",
953            expected: 2,
954            found: r.data.len(),
955        });
956    }
957    let ifmt = read_u16(r.data);
958    match ifmt {
959        5..=8 | 23..=26 | 41..=44 | 63..=66 | 164..=382 => (),
960        _ => return Err(XlsError::InvalidFormat { ifmt }),
961    }
962
963    let s = parse_string(&r.data[2..], encoding, biff)?;
964    Ok((ifmt, detect_custom_number_format(&s)))
965}
966
967/// Decode `XLUnicodeRichExtendedString` [MS-XLS 2.5.293].
968///
969/// See: <https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/173d9f51-e5d3-43da-8de2-be7f22e119b9>
970fn read_rich_extended_string(
971    r: &mut Record<'_>,
972    encoding: &XlsEncoding,
973) -> Result<String, XlsError> {
974    if r.data.is_empty() {
975        // spec violation: at very least cch and flags should be present
976        return Ok(String::new());
977    }
978    if r.data.len() < 3 {
979        return Err(XlsError::Len {
980            typ: "rich extended string",
981            expected: 3,
982            found: r.data.len(),
983        });
984    }
985
986    let cch = read_u16(r.data) as usize;
987    let flags = r.data[2];
988
989    r.data = &r.data[3..];
990
991    let high_byte = flags & 0x1 != 0;
992
993    // how many FormatRun in rgRun data block
994    let mut c_run = 0;
995
996    // how many bytes in ExtRst data block
997    let mut cb_ext_rst = 0;
998
999    // if flag fRichSt exists, read cRun and forward.
1000    if flags & 0x8 != 0 {
1001        c_run = read_u16(r.data) as usize;
1002        r.data = &r.data[2..];
1003    }
1004
1005    // if flag fExtSt exists, read cbExtRst and forward.
1006    if flags & 0x4 != 0 {
1007        cb_ext_rst = read_i32(r.data) as usize;
1008        r.data = &r.data[4..];
1009    }
1010
1011    // read rgb data block for the string we want
1012    let s = read_dbcs(encoding, cch, r, high_byte)?;
1013
1014    // skip rgRun data block. Note: each FormatRun contain 4 bytes.
1015    r.skip(c_run * 4)?;
1016
1017    // skip ExtRst data block.
1018    r.skip(cb_ext_rst)?;
1019
1020    Ok(s)
1021}
1022
1023fn read_dbcs(
1024    encoding: &XlsEncoding,
1025    mut len: usize,
1026    r: &mut Record<'_>,
1027    mut high_byte: bool,
1028) -> Result<String, XlsError> {
1029    let mut s = String::with_capacity(len);
1030    while len > 0 {
1031        let (l, at) = encoding.decode_to(r.data, len, &mut s, Some(high_byte));
1032        r.data = &r.data[at..];
1033        len -= l;
1034        if len > 0 {
1035            if r.continue_record() {
1036                high_byte = r.data[0] & 0x1 != 0;
1037                r.data = &r.data[1..];
1038            } else {
1039                return Err(XlsError::EoStream("dbcs"));
1040            }
1041        }
1042    }
1043    Ok(s)
1044}
1045
1046fn read_unicode_string_no_cch(encoding: &XlsEncoding, buf: &[u8], len: &usize, s: &mut String) {
1047    encoding.decode_to(&buf[1..=*len], *len, s, Some(buf[0] & 0x1 != 0));
1048}
1049
1050struct Record<'a> {
1051    typ: u16,
1052    data: &'a [u8],
1053    cont: Vec<&'a [u8]>,
1054}
1055
1056impl<'a> Record<'a> {
1057    fn continue_record(&mut self) -> bool {
1058        if self.cont.is_empty() {
1059            false
1060        } else {
1061            self.data = self.cont.remove(0);
1062            true
1063        }
1064    }
1065
1066    fn skip(&mut self, mut len: usize) -> Result<(), XlsError> {
1067        while len > 0 {
1068            if self.data.is_empty() && !self.continue_record() {
1069                return Err(XlsError::ContinueRecordTooShort);
1070            }
1071            let l = min(len, self.data.len());
1072            let (_, next) = self.data.split_at(l);
1073            self.data = next;
1074            len -= l;
1075        }
1076        Ok(())
1077    }
1078}
1079
1080// Simple Debug impl to dump record data in hex format.
1081impl fmt::Debug for Record<'_> {
1082    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1083        writeln!(
1084            f,
1085            "\nRecord = 0x{:04X}, Length = 0x{:04X}, {}",
1086            self.typ,
1087            self.data.len(),
1088            self.data.len()
1089        )?;
1090
1091        let mut iter = self.data.chunks(16);
1092        for chunk in iter.by_ref() {
1093            for byte in chunk {
1094                write!(f, "{byte:02X} ")?;
1095            }
1096            writeln!(f)?;
1097        }
1098
1099        Ok(())
1100    }
1101}
1102
1103struct RecordIter<'a> {
1104    stream: &'a [u8],
1105}
1106
1107impl<'a> Iterator for RecordIter<'a> {
1108    type Item = Result<Record<'a>, XlsError>;
1109    fn next(&mut self) -> Option<Self::Item> {
1110        if self.stream.len() < 4 {
1111            return if self.stream.is_empty() {
1112                None
1113            } else {
1114                Some(Err(XlsError::EoStream("record type and length")))
1115            };
1116        }
1117        let t = read_u16(self.stream);
1118        let mut len = read_u16(&self.stream[2..]) as usize;
1119        if self.stream.len() < len + 4 {
1120            return Some(Err(XlsError::EoStream("record length")));
1121        }
1122        let (data, next) = self.stream.split_at(len + 4);
1123        self.stream = next;
1124        let d = &data[4..];
1125
1126        // Append next record data if it is a Continue record
1127        let mut cont = Vec::new();
1128        if next.len() > 4 && read_u16(next) == 0x003C {
1129            while self.stream.len() > 4 && read_u16(self.stream) == 0x003C {
1130                len = read_u16(&self.stream[2..]) as usize;
1131                if self.stream.len() < len + 4 {
1132                    return Some(Err(XlsError::EoStream("continue record length")));
1133                }
1134                let sp = self.stream.split_at(len + 4);
1135                cont.push(&sp.0[4..]);
1136                self.stream = sp.1;
1137            }
1138        }
1139
1140        Some(Ok(Record {
1141            typ: t,
1142            data: d,
1143            cont,
1144        }))
1145    }
1146}
1147
1148/// Formula parsing
1149///
1150/// Does not implement ALL possibilities, only Area are parsed
1151fn parse_defined_names(rgce: &[u8]) -> Result<(Option<usize>, String), XlsError> {
1152    if rgce.is_empty() {
1153        // TODO: do something better here ...
1154        return Ok((None, "empty rgce".to_string()));
1155    }
1156    let ptg = rgce[0];
1157    let res = match ptg {
1158        0x3a | 0x5a | 0x7a => {
1159            // PtgRef3d
1160            let ixti = read_u16(&rgce[1..3]) as usize;
1161            let mut f = String::new();
1162            // TODO: check with relative columns
1163            f.push('$');
1164            push_column(read_u16(&rgce[5..7]) as u32, &mut f);
1165            f.push('$');
1166            f.push_str(&format!("{}", read_u16(&rgce[3..5]) as u32 + 1));
1167            (Some(ixti), f)
1168        }
1169        0x3b | 0x5b | 0x7b => {
1170            // PtgArea3d
1171            let ixti = read_u16(&rgce[1..3]) as usize;
1172            let mut f = String::new();
1173            // TODO: check with relative columns
1174            f.push('$');
1175            push_column(read_u16(&rgce[7..9]) as u32, &mut f);
1176            f.push('$');
1177            write!(&mut f, "{}", read_u16(&rgce[3..5]) as u32 + 1).unwrap();
1178            f.push(':');
1179            f.push('$');
1180            push_column(read_u16(&rgce[9..11]) as u32, &mut f);
1181            f.push('$');
1182            write!(&mut f, "{}", read_u16(&rgce[5..7]) as u32 + 1).unwrap();
1183            (Some(ixti), f)
1184        }
1185        0x3c | 0x5c | 0x7c | 0x3d | 0x5d | 0x7d => {
1186            // PtgAreaErr3d or PtfRefErr3d
1187            let ixti = read_u16(&rgce[1..3]) as usize;
1188            (Some(ixti), "#REF!".to_string())
1189        }
1190        _ => (None, format!("Unsupported ptg: {ptg:x}")),
1191    };
1192    Ok(res)
1193}
1194
1195/// Formula parsing
1196///
1197/// `CellParsedFormula` [MS-XLS 2.5.198.3]
1198fn parse_formula(
1199    mut rgce: &[u8],
1200    sheets: &[String],
1201    names: &[(String, String)],
1202    xtis: &[Xti],
1203    encoding: &XlsEncoding,
1204) -> Result<String, XlsError> {
1205    let mut stack = Vec::new();
1206    let mut formula = String::with_capacity(rgce.len());
1207    let cce = read_u16(rgce) as usize;
1208    rgce = &rgce[2..2 + cce];
1209    while !rgce.is_empty() {
1210        let ptg = rgce[0];
1211        rgce = &rgce[1..];
1212        match ptg {
1213            0x3a | 0x5a | 0x7a => {
1214                // PtgRef3d
1215                let ixti = read_u16(&rgce[0..2]);
1216                let rowu = read_u16(&rgce[2..]);
1217                let colu = read_u16(&rgce[4..]);
1218                let sh = xtis
1219                    .get(ixti as usize)
1220                    .and_then(|xti| sheets.get(xti.itab_first as usize))
1221                    .map_or("#REF", |sh| sh);
1222                stack.push(formula.len());
1223                formula.push_str(sh);
1224                formula.push('!');
1225                let col = colu << 2; // first 14 bits only
1226                if colu & 2 != 0 {
1227                    formula.push('$');
1228                }
1229                push_column(col as u32, &mut formula);
1230                if colu & 1 != 0 {
1231                    formula.push('$');
1232                }
1233                write!(&mut formula, "{}", rowu + 1).unwrap();
1234                rgce = &rgce[6..];
1235            }
1236            0x3b | 0x5b | 0x7b => {
1237                // PtgArea3d
1238                let ixti = read_u16(&rgce[0..2]);
1239                stack.push(formula.len());
1240                formula.push_str(sheets.get(ixti as usize).map_or("#REF", |s| &**s));
1241                formula.push('!');
1242                // TODO: check with relative columns
1243                formula.push('$');
1244                push_column(read_u16(&rgce[6..8]) as u32, &mut formula);
1245                write!(&mut formula, "${}:$", read_u16(&rgce[2..4]) as u32 + 1).unwrap();
1246                push_column(read_u16(&rgce[8..10]) as u32, &mut formula);
1247                write!(&mut formula, "${}", read_u16(&rgce[4..6]) as u32 + 1).unwrap();
1248                rgce = &rgce[10..];
1249            }
1250            0x3c | 0x5c | 0x7c => {
1251                // PtfRefErr3d
1252                let ixti = read_u16(&rgce[0..2]);
1253                stack.push(formula.len());
1254                formula.push_str(sheets.get(ixti as usize).map_or("#REF", |s| &**s));
1255                formula.push('!');
1256                formula.push_str("#REF!");
1257                rgce = &rgce[6..];
1258            }
1259            0x3d | 0x5d | 0x7d => {
1260                // PtgAreaErr3d
1261                let ixti = read_u16(&rgce[0..2]);
1262                stack.push(formula.len());
1263                formula.push_str(sheets.get(ixti as usize).map_or("#REF", |s| &**s));
1264                formula.push('!');
1265                formula.push_str("#REF!");
1266                rgce = &rgce[10..];
1267            }
1268            0x01 => {
1269                // PtgExp: array/shared formula, ignore
1270                debug!("ignoring PtgExp array/shared formula");
1271                stack.push(formula.len());
1272                rgce = &rgce[4..];
1273            }
1274            0x03..=0x11 => {
1275                // binary operation
1276                let e2 = stack.pop().ok_or(XlsError::StackLen)?;
1277                // imaginary 'e1' will actually already be the start of the binary op
1278                let op = match ptg {
1279                    0x03 => "+",
1280                    0x04 => "-",
1281                    0x05 => "*",
1282                    0x06 => "/",
1283                    0x07 => "^",
1284                    0x08 => "&",
1285                    0x09 => "<",
1286                    0x0A => "<=",
1287                    0x0B => "=",
1288                    0x0C => ">",
1289                    0x0D => ">=",
1290                    0x0E => "<>",
1291                    0x0F => " ",
1292                    0x10 => ",",
1293                    0x11 => ":",
1294                    _ => unreachable!(),
1295                };
1296                let e2 = formula.split_off(e2);
1297                write!(&mut formula, "{op}{e2}").unwrap();
1298            }
1299            0x12 => {
1300                let e = stack.last().ok_or(XlsError::StackLen)?;
1301                formula.insert(*e, '+');
1302            }
1303            0x13 => {
1304                let e = stack.last().ok_or(XlsError::StackLen)?;
1305                formula.insert(*e, '-');
1306            }
1307            0x14 => {
1308                formula.push('%');
1309            }
1310            0x15 => {
1311                let e = stack.last().ok_or(XlsError::StackLen)?;
1312                formula.insert(*e, '(');
1313                formula.push(')');
1314            }
1315            0x16 => {
1316                stack.push(formula.len());
1317            }
1318            0x17 => {
1319                stack.push(formula.len());
1320                formula.push('\"');
1321                let cch = rgce[0] as usize;
1322                read_unicode_string_no_cch(encoding, &rgce[1..], &cch, &mut formula);
1323                formula.push('\"');
1324                rgce = &rgce[2 + cch..];
1325            }
1326            0x18 => {
1327                rgce = &rgce[5..];
1328            }
1329            0x19 => {
1330                let etpg = rgce[0];
1331                rgce = &rgce[1..];
1332                match etpg {
1333                    0x01 | 0x02 | 0x08 | 0x20 | 0x21 => rgce = &rgce[2..],
1334                    0x04 => {
1335                        // PtgAttrChoose
1336                        let n = read_u16(&rgce[..2]) as usize + 1;
1337                        rgce = &rgce[2 + 2 * n..]; // ignore
1338                    }
1339                    0x10 => {
1340                        rgce = &rgce[2..];
1341                        let e = *stack.last().ok_or(XlsError::StackLen)?;
1342                        let e = formula.split_off(e);
1343                        write!(&mut formula, "SUM({e})").unwrap();
1344                    }
1345                    0x40 | 0x41 => {
1346                        // PtfAttrSpace
1347                        let e = *stack.last().ok_or(XlsError::StackLen)?;
1348                        let space = match rgce[0] {
1349                            0x00 | 0x02 | 0x04 | 0x06 => ' ',
1350                            0x01 | 0x03 | 0x05 => '\r',
1351                            val => {
1352                                return Err(XlsError::Unrecognized {
1353                                    typ: "PtgAttrSpaceType",
1354                                    val,
1355                                });
1356                            }
1357                        };
1358                        let cch = rgce[1];
1359                        for _ in 0..cch {
1360                            formula.insert(e, space);
1361                        }
1362                        rgce = &rgce[2..];
1363                    }
1364                    e => return Err(XlsError::Etpg(e)),
1365                }
1366            }
1367            0x1C => {
1368                stack.push(formula.len());
1369                let err = rgce[0];
1370                rgce = &rgce[1..];
1371                match err {
1372                    0x00 => formula.push_str("#NULL!"),
1373                    0x07 => formula.push_str("#DIV/0!"),
1374                    0x0F => formula.push_str("#VALUE!"),
1375                    0x17 => formula.push_str("#REF!"),
1376                    0x1D => formula.push_str("#NAME?"),
1377                    0x24 => formula.push_str("#NUM!"),
1378                    0x2A => formula.push_str("#N/A"),
1379                    0x2B => formula.push_str("#GETTING_DATA"),
1380                    e => {
1381                        return Err(XlsError::Unrecognized {
1382                            typ: "BErr",
1383                            val: e,
1384                        });
1385                    }
1386                }
1387            }
1388            0x1D => {
1389                stack.push(formula.len());
1390                formula.push_str(if rgce[0] == 0 { "FALSE" } else { "TRUE" });
1391                rgce = &rgce[1..];
1392            }
1393            0x1E => {
1394                stack.push(formula.len());
1395                write!(&mut formula, "{}", read_u16(rgce)).unwrap();
1396                rgce = &rgce[2..];
1397            }
1398            0x1F => {
1399                stack.push(formula.len());
1400                write!(&mut formula, "{}", read_f64(rgce)).unwrap();
1401                rgce = &rgce[8..];
1402            }
1403            0x20 | 0x40 | 0x60 => {
1404                // PtgArray: ignore
1405                stack.push(formula.len());
1406                formula.push_str("{PtgArray}");
1407                rgce = &rgce[7..];
1408            }
1409            0x21 | 0x22 | 0x41 | 0x42 | 0x61 | 0x62 => {
1410                let (iftab, argc) = match ptg {
1411                    0x22 | 0x42 | 0x62 => {
1412                        let iftab = read_u16(&rgce[1..]) as usize;
1413                        let argc = rgce[0] as usize;
1414                        rgce = &rgce[3..];
1415                        (iftab, argc)
1416                    }
1417                    _ => {
1418                        let iftab = read_u16(rgce) as usize;
1419                        if iftab > crate::utils::FTAB_LEN {
1420                            return Err(XlsError::IfTab(iftab));
1421                        }
1422                        rgce = &rgce[2..];
1423                        let argc = crate::utils::FTAB_ARGC[iftab] as usize;
1424                        (iftab, argc)
1425                    }
1426                };
1427                if stack.len() < argc {
1428                    return Err(XlsError::StackLen);
1429                }
1430                if argc > 0 {
1431                    let args_start = stack.len() - argc;
1432                    let mut args = stack.split_off(args_start);
1433                    let start = args[0];
1434                    for s in &mut args {
1435                        *s -= start;
1436                    }
1437                    let fargs = formula.split_off(start);
1438                    stack.push(formula.len());
1439                    args.push(fargs.len());
1440                    formula.push_str(
1441                        crate::utils::FTAB
1442                            .get(iftab)
1443                            .ok_or(XlsError::IfTab(iftab))?,
1444                    );
1445                    formula.push('(');
1446                    for w in args.windows(2) {
1447                        formula.push_str(&fargs[w[0]..w[1]]);
1448                        formula.push(',');
1449                    }
1450                    formula.pop();
1451                    formula.push(')');
1452                } else {
1453                    stack.push(formula.len());
1454                    formula.push_str(crate::utils::FTAB[iftab]);
1455                    formula.push_str("()");
1456                }
1457            }
1458            0x23 | 0x43 | 0x63 => {
1459                let iname = read_u32(rgce) as usize - 1; // one-based
1460                stack.push(formula.len());
1461                formula.push_str(names.get(iname).map_or("#REF!", |n| &*n.0));
1462                rgce = &rgce[4..];
1463            }
1464            0x24 | 0x44 | 0x64 => {
1465                stack.push(formula.len());
1466                let row = read_u16(rgce) + 1;
1467                let col = read_u16(&[rgce[2], rgce[3] & 0x3F]);
1468                if rgce[3] & 0x80 != 0x80 {
1469                    formula.push('$');
1470                }
1471                push_column(col as u32, &mut formula);
1472                if rgce[3] & 0x40 != 0x40 {
1473                    formula.push('$');
1474                }
1475                formula.push_str(&format!("{row}"));
1476                rgce = &rgce[4..];
1477            }
1478            0x25 | 0x45 | 0x65 => {
1479                stack.push(formula.len());
1480                formula.push('$');
1481                push_column(read_u16(&rgce[4..6]) as u32, &mut formula);
1482                write!(&mut formula, "${}:$", read_u16(&rgce[0..2]) as u32 + 1).unwrap();
1483                push_column(read_u16(&rgce[6..8]) as u32, &mut formula);
1484                write!(&mut formula, "${}", read_u16(&rgce[2..4]) as u32 + 1).unwrap();
1485                rgce = &rgce[8..];
1486            }
1487            0x2A | 0x4A | 0x6A => {
1488                stack.push(formula.len());
1489                formula.push_str("#REF!");
1490                rgce = &rgce[4..];
1491            }
1492            0x2B | 0x4B | 0x6B => {
1493                stack.push(formula.len());
1494                formula.push_str("#REF!");
1495                rgce = &rgce[8..];
1496            }
1497            0x39 | 0x59 => {
1498                // PfgNameX
1499                stack.push(formula.len());
1500                formula.push_str("[PtgNameX]");
1501                rgce = &rgce[6..];
1502            }
1503            _ => {
1504                return Err(XlsError::Unrecognized {
1505                    typ: "ptg",
1506                    val: ptg,
1507                });
1508            }
1509        }
1510    }
1511    if stack.len() == 1 {
1512        Ok(formula)
1513    } else {
1514        Err(XlsError::InvalidFormula {
1515            stack_size: stack.len(),
1516        })
1517    }
1518}
1519
1520/// `FormulaValue` [MS-XLS 2.5.133]
1521fn parse_formula_value(r: &[u8]) -> Result<Option<Data>, XlsError> {
1522    match *r {
1523        // String, value should be in next record
1524        [0x00, .., 0xFF, 0xFF] => Ok(None),
1525        [0x01, _, b, .., 0xFF, 0xFF] => Ok(Some(Data::Bool(b != 0))),
1526        [0x02, _, e, .., 0xFF, 0xFF] => parse_err(e).map(Some),
1527        // ignore, return blank string value
1528        [0x03, _, .., 0xFF, 0xFF] => Ok(Some(Data::String("".to_string()))),
1529        [e, .., 0xFF, 0xFF] => Err(XlsError::Unrecognized {
1530            typ: "error",
1531            val: e,
1532        }),
1533        _ => Ok(Some(Data::Float(read_f64(r)))),
1534    }
1535}
1536
1537// OfficeArtRecord [MS-ODRAW 1.3.1].
1538#[cfg(feature = "picture")]
1539struct ArtRecord<'a> {
1540    instance: u16,
1541    typ: u16,
1542    data: &'a [u8],
1543}
1544
1545#[cfg(feature = "picture")]
1546struct ArtRecordIter<'a> {
1547    stream: &'a [u8],
1548}
1549
1550#[cfg(feature = "picture")]
1551impl<'a> Iterator for ArtRecordIter<'a> {
1552    type Item = Result<ArtRecord<'a>, XlsError>;
1553    fn next(&mut self) -> Option<Self::Item> {
1554        if self.stream.len() < 8 {
1555            return if self.stream.is_empty() {
1556                None
1557            } else {
1558                Some(Err(XlsError::EoStream("art record header")))
1559            };
1560        }
1561        let ver_ins = read_u16(self.stream);
1562        let instance = ver_ins >> 4;
1563        let typ = read_u16(&self.stream[2..]);
1564        if typ < 0xF000 {
1565            return Some(Err(XlsError::Art("type range 0xF000 - 0xFFFF")));
1566        }
1567        let len = read_usize(&self.stream[4..]);
1568        if self.stream.len() < len + 8 {
1569            return Some(Err(XlsError::EoStream("art record length")));
1570        }
1571        let (d, next) = self.stream.split_at(len + 8);
1572        self.stream = next;
1573        let data = &d[8..];
1574
1575        Some(Ok(ArtRecord {
1576            instance,
1577            typ,
1578            data,
1579        }))
1580    }
1581}
1582
1583// Parsing pictures.
1584#[cfg(feature = "picture")]
1585fn parse_pictures(stream: &[u8]) -> Result<Vec<(String, Vec<u8>)>, XlsError> {
1586    let mut pics = Vec::new();
1587    let records = ArtRecordIter { stream };
1588    for record in records {
1589        let r = record?;
1590        match r.typ {
1591            // OfficeArtDggContainer [MS-ODRAW 2.2.12]
1592            // OfficeArtBStoreContainer [MS-ODRAW 2.2.20]
1593            0xF000 | 0xF001 => pics.extend(parse_pictures(r.data)?),
1594            // OfficeArtFBSE [MS-ODRAW 2.2.32]
1595            0xF007 => {
1596                let skip = 36 + r.data[33] as usize;
1597                pics.extend(parse_pictures(&r.data[skip..])?);
1598            }
1599            // OfficeArtBlip [MS-ODRAW 2.2.23]
1600            0xF01A | 0xF01B | 0xF01C | 0xF01D | 0xF01E | 0xF01F | 0xF029 | 0xF02A => {
1601                let ext_skip = match r.typ {
1602                    // OfficeArtBlipEMF [MS-ODRAW 2.2.24]
1603                    0xF01A => {
1604                        let skip = match r.instance {
1605                            0x3D4 => 50usize,
1606                            0x3D5 => 66,
1607                            _ => unreachable!(),
1608                        };
1609                        Ok(("emf", skip))
1610                    }
1611                    // OfficeArtBlipWMF [MS-ODRAW 2.2.25]
1612                    0xF01B => {
1613                        let skip = match r.instance {
1614                            0x216 => 50usize,
1615                            0x217 => 66,
1616                            _ => unreachable!(),
1617                        };
1618                        Ok(("wmf", skip))
1619                    }
1620                    // OfficeArtBlipPICT [MS-ODRAW 2.2.26]
1621                    0xF01C => {
1622                        let skip = match r.instance {
1623                            0x542 => 50usize,
1624                            0x543 => 66,
1625                            _ => unreachable!(),
1626                        };
1627                        Ok(("pict", skip))
1628                    }
1629                    // OfficeArtBlipJPEG [MS-ODRAW 2.2.27]
1630                    0xF01D | 0xF02A => {
1631                        let skip = match r.instance {
1632                            0x46A | 0x6E2 => 17usize,
1633                            0x46B | 0x6E3 => 33,
1634                            _ => unreachable!(),
1635                        };
1636                        Ok(("jpg", skip))
1637                    }
1638                    // OfficeArtBlipPNG [MS-ODRAW 2.2.28]
1639                    0xF01E => {
1640                        let skip = match r.instance {
1641                            0x6E0 => 17usize,
1642                            0x6E1 => 33,
1643                            _ => unreachable!(),
1644                        };
1645                        Ok(("png", skip))
1646                    }
1647                    // OfficeArtBlipDIB [MS-ODRAW 2.2.29]
1648                    0xF01F => {
1649                        let skip = match r.instance {
1650                            0x7A8 => 17usize,
1651                            0x7A9 => 33,
1652                            _ => unreachable!(),
1653                        };
1654                        Ok(("dib", skip))
1655                    }
1656                    // OfficeArtBlipTIFF [MS-ODRAW 2.2.30]
1657                    0xF029 => {
1658                        let skip = match r.instance {
1659                            0x6E4 => 17usize,
1660                            0x6E5 => 33,
1661                            _ => unreachable!(),
1662                        };
1663                        Ok(("tiff", skip))
1664                    }
1665                    _ => Err(XlsError::Art("picture type not support")),
1666                };
1667                let ext_skip = ext_skip?;
1668                pics.push((ext_skip.0.to_string(), Vec::from(&r.data[ext_skip.1..])));
1669            }
1670            _ => {}
1671        }
1672    }
1673    Ok(pics)
1674}
1675
1676#[cfg(test)]
1677mod tests {
1678    use super::*;
1679
1680    #[test]
1681    fn test_parse_string() {
1682        let enc = XlsEncoding::from_codepage(1252).unwrap();
1683        parse_string(&[0, 1], &enc, Biff::Biff8).unwrap_err();
1684    }
1685}