Skip to main content

calamine_styles/xlsx/
mod.rs

1// SPDX-License-Identifier: MIT
2//
3// Copyright 2016-2025, Johann Tuffe.
4
5#![warn(missing_docs)]
6
7mod cells_reader;
8mod style_parser;
9
10use std::borrow::Cow;
11use std::collections::BTreeMap;
12use std::collections::HashMap;
13use std::io::BufReader;
14use std::io::{Read, Seek};
15use std::str::FromStr;
16
17use log::warn;
18use quick_xml::events::attributes::{AttrError, Attribute, Attributes};
19use quick_xml::events::BytesStart;
20use quick_xml::events::Event;
21use quick_xml::name::QName;
22use quick_xml::Decoder;
23use quick_xml::Reader as XmlReader;
24use zip::read::{ZipArchive, ZipFile};
25use zip::result::ZipError;
26
27use crate::datatype::DataRef;
28use crate::formats::{builtin_format_by_id, detect_custom_number_format, CellFormat};
29use crate::style::{
30    ColumnWidth, Font, FontStyle, FontWeight, RichText, RowHeight, StyleRange, TextRun,
31    UnderlineStyle, WorksheetLayout,
32};
33use crate::utils::{unescape_entity_to_buffer, unescape_xml};
34use crate::vba::VbaProject;
35use crate::{
36    Cell, CellErrorType, Data, Dimensions, HeaderRow, Metadata, Range, Reader, ReaderRef, Sheet,
37    SheetType, SheetVisible, Style, Table,
38};
39pub use cells_reader::XlsxCellReader;
40
41pub(crate) type XlReader<'a, RS> = XmlReader<BufReader<ZipFile<'a, RS>>>;
42
43/// Maximum number of rows allowed in an XLSX file.
44pub const MAX_ROWS: u32 = 1_048_576;
45
46/// Maximum number of columns allowed in an XLSX file.
47pub const MAX_COLUMNS: u32 = 16_384;
48
49/// An enum for Xlsx specific errors.
50#[derive(Debug)]
51pub enum XlsxError {
52    /// A wrapper for a variety of [`std::io::Error`] errors such as file
53    /// permissions when reading an XLSX file. This can be caused by a
54    /// non-existent file or parent directory or, commonly on Windows, if the
55    /// file is already open in Excel.
56    Io(std::io::Error),
57
58    /// A wrapper for a variety of [`zip::result::ZipError`] errors from
59    /// [`zip::ZipWriter`]. These relate to errors arising from reading the XLSX
60    /// file zip container.
61    Zip(zip::result::ZipError),
62
63    /// A general error when reading a VBA project from an XLSX file.
64    Vba(crate::vba::VbaError),
65
66    /// A wrapper for a variety of [`quick_xml::Error`] XML parsing errors, but
67    /// most commonly for missing data in the target file.
68    Xml(quick_xml::Error),
69
70    /// A wrapper for a variety of [`quick_xml::events::attributes::AttrError`]
71    /// errors related to attributes in XML elements.
72    XmlAttr(quick_xml::events::attributes::AttrError),
73
74    /// A wrapper for a variety of [`std::string::ParseError`] errors when
75    /// parsing strings.
76    Parse(std::string::ParseError),
77
78    /// A wrapper for a variety of [`std::num::ParseFloatError`] errors when
79    /// parsing floats.
80    ParseFloat(std::num::ParseFloatError),
81
82    /// A wrapper for a variety of [`std::num::ParseIntError`] errors when
83    /// parsing integers.
84    ParseInt(std::num::ParseIntError),
85
86    /// Unexpected end of XML file, usually when an end tag is missing.
87    XmlEof(&'static str),
88
89    /// Unexpected node in XML.
90    UnexpectedNode(&'static str),
91
92    /// XML file not found in XLSX container.
93    FileNotFound(String),
94
95    /// Relationship file not found in XLSX container.
96    RelationshipNotFound,
97
98    /// Non alphanumeric character found when parsing `A1` style range string.
99    Alphanumeric(u8),
100
101    /// Error when parsing the column name in a `A1` style range string.
102    NumericColumn(u8),
103
104    /// Missing column name when parsing an `A1` style range string.
105    RangeWithoutColumnComponent,
106
107    /// Missing row number when parsing an `A1` style range string.
108    RangeWithoutRowComponent,
109
110    /// Column number exceeds maximum allowed columns.
111    ColumnNumberOverflow,
112
113    /// Row number exceeds maximum allowed rows.
114    RowNumberOverflow,
115
116    /// Error when parsing dimensions of a worksheet.
117    DimensionCount(usize),
118
119    /// Unknown cell type (`t`) attribute.
120    CellTAttribute(String),
121
122    /// Unexpected XML element or attribute error.
123    Unexpected(&'static str),
124
125    /// Unrecognized worksheet type or state.
126    Unrecognized {
127        /// The data type.
128        typ: &'static str,
129
130        /// The value found.
131        val: String,
132    },
133
134    /// Unrecognized cell error type.
135    CellError(String),
136
137    /// Workbook is password protected.
138    Password,
139
140    /// Specified worksheet was not found.
141    WorksheetNotFound(String),
142
143    /// Specified worksheet Table was not found.
144    TableNotFound(String),
145
146    /// The specified sheet is not a worksheet.
147    NotAWorksheet(String),
148
149    /// A wrapper for a variety of [`quick_xml::encoding::EncodingError`]
150    /// encoding errors.
151    Encoding(quick_xml::encoding::EncodingError),
152
153    /// Specified Pivot Table was not found on worksheet.
154    PivotTableNotFound(String),
155}
156
157from_err!(std::io::Error, XlsxError, Io);
158from_err!(zip::result::ZipError, XlsxError, Zip);
159from_err!(crate::vba::VbaError, XlsxError, Vba);
160from_err!(quick_xml::Error, XlsxError, Xml);
161from_err!(std::num::ParseFloatError, XlsxError, ParseFloat);
162from_err!(std::num::ParseIntError, XlsxError, ParseInt);
163from_err!(quick_xml::encoding::EncodingError, XlsxError, Encoding);
164from_err!(quick_xml::events::attributes::AttrError, XlsxError, XmlAttr);
165
166impl std::fmt::Display for XlsxError {
167    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
168        match self {
169            XlsxError::Io(e) => write!(f, "I/O error: {e}"),
170            XlsxError::Zip(e) => write!(f, "Zip error: {e}"),
171            XlsxError::Xml(e) => write!(f, "Xml error: {e}"),
172            XlsxError::XmlAttr(e) => write!(f, "Xml attribute error: {e}"),
173            XlsxError::Vba(e) => write!(f, "Vba error: {e}"),
174            XlsxError::Parse(e) => write!(f, "Parse string error: {e}"),
175            XlsxError::ParseInt(e) => write!(f, "Parse integer error: {e}"),
176            XlsxError::ParseFloat(e) => write!(f, "Parse float error: {e}"),
177
178            XlsxError::XmlEof(e) => write!(f, "Unexpected end of xml, expecting '</{e}>'"),
179            XlsxError::UnexpectedNode(e) => write!(f, "Expecting '{e}' node"),
180            XlsxError::FileNotFound(e) => write!(f, "File not found '{e}'"),
181            XlsxError::RelationshipNotFound => write!(f, "Relationship not found"),
182            XlsxError::Alphanumeric(e) => {
183                write!(f, "Expecting alphanumeric character, got {e:X}")
184            }
185            XlsxError::NumericColumn(e) => write!(
186                f,
187                "Numeric character is not allowed for column name, got {e}",
188            ),
189            XlsxError::DimensionCount(e) => {
190                write!(f, "Range dimension must be lower than 2. Got {e}")
191            }
192            XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {e:?}"),
193            XlsxError::RangeWithoutColumnComponent => {
194                write!(f, "Range is missing the expected column component.")
195            }
196            XlsxError::RangeWithoutRowComponent => {
197                write!(f, "Range is missing the expected row component.")
198            }
199            XlsxError::ColumnNumberOverflow => write!(f, "column number overflow"),
200            XlsxError::RowNumberOverflow => write!(f, "row number overflow"),
201            XlsxError::Unexpected(e) => write!(f, "{e}"),
202            XlsxError::Unrecognized { typ, val } => write!(f, "Unrecognized {typ}: {val}"),
203            XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{e}'"),
204            XlsxError::WorksheetNotFound(n) => write!(f, "Worksheet '{n}' not found"),
205            XlsxError::Password => write!(f, "Workbook is password protected"),
206            XlsxError::TableNotFound(n) => write!(f, "Table '{n}' not found"),
207            XlsxError::NotAWorksheet(typ) => write!(f, "Expecting a worksheet, got {typ}"),
208            XlsxError::Encoding(e) => write!(f, "XML encoding error: {e}"),
209            XlsxError::PivotTableNotFound(pt) => {
210                write!(f, "Pivot Table '{pt}' was not found on worksheet")
211            }
212        }
213    }
214}
215
216impl std::error::Error for XlsxError {
217    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
218        match self {
219            XlsxError::Io(e) => Some(e),
220            XlsxError::Zip(e) => Some(e),
221            XlsxError::Xml(e) => Some(e),
222            XlsxError::Vba(e) => Some(e),
223            XlsxError::Parse(e) => Some(e),
224            XlsxError::ParseInt(e) => Some(e),
225            XlsxError::ParseFloat(e) => Some(e),
226            XlsxError::Encoding(e) => Some(e),
227            _ => None,
228        }
229    }
230}
231
232impl FromStr for CellErrorType {
233    type Err = XlsxError;
234    fn from_str(s: &str) -> Result<Self, Self::Err> {
235        match s {
236            "#DIV/0!" => Ok(CellErrorType::Div0),
237            "#N/A" => Ok(CellErrorType::NA),
238            "#NAME?" => Ok(CellErrorType::Name),
239            "#NULL!" => Ok(CellErrorType::Null),
240            "#NUM!" => Ok(CellErrorType::Num),
241            "#REF!" => Ok(CellErrorType::Ref),
242            "#VALUE!" => Ok(CellErrorType::Value),
243            _ => Err(XlsxError::CellError(s.into())),
244        }
245    }
246}
247
248type Tables = Option<Vec<(String, String, Vec<String>, Dimensions)>>;
249
250/// A struct representing xml zipped excel file
251/// Xlsx, Xlsm, Xlam
252pub struct Xlsx<RS> {
253    zip: ZipArchive<RS>,
254    /// Shared strings (can be plain strings or rich text)
255    strings: Vec<Data>,
256    /// Sheets paths
257    sheets: Vec<(String, String)>,
258    /// Tables: Name, Sheet, Columns, Data dimensions
259    tables: Tables,
260    /// Cell (number) formats
261    formats: Vec<CellFormat>,
262    /// Cell styles
263    pub styles: Vec<Style>,
264    /// 1904 datetime system
265    is_1904: bool,
266    /// Metadata
267    metadata: Metadata,
268    /// Pictures
269    #[cfg(feature = "picture")]
270    pictures: Option<Vec<(String, Vec<u8>)>>,
271    /// Merged Regions: Name, Sheet, Merged Dimensions
272    merged_regions: Option<Vec<(String, String, Dimensions)>>,
273    /// Reader options
274    options: XlsxOptions,
275}
276
277/// Xlsx reader options
278#[derive(Debug, Default)]
279#[non_exhaustive]
280struct XlsxOptions {
281    pub header_row: HeaderRow,
282}
283
284impl<RS: Read + Seek> Xlsx<RS> {
285    fn read_shared_strings(&mut self) -> Result<(), XlsxError> {
286        let mut xml = match xml_reader(&mut self.zip, "xl/sharedStrings.xml") {
287            None => return Ok(()),
288            Some(x) => x?,
289        };
290        let mut buf = Vec::with_capacity(1024);
291        loop {
292            buf.clear();
293            match xml.read_event_into(&mut buf) {
294                Ok(Event::Start(e)) if e.local_name().as_ref() == b"si" => {
295                    if let Some(data) = read_rich_string(&mut xml, e.name())? {
296                        self.strings.push(data);
297                    }
298                }
299                Ok(Event::End(e)) if e.local_name().as_ref() == b"sst" => break,
300                Ok(Event::Eof) => return Err(XlsxError::XmlEof("sst")),
301                Err(e) => return Err(XlsxError::Xml(e)),
302                _ => (),
303            }
304        }
305        Ok(())
306    }
307
308    fn read_styles(&mut self) -> Result<(), XlsxError> {
309        let mut xml = match xml_reader(&mut self.zip, "xl/styles.xml") {
310            None => return Ok(()),
311            Some(x) => x?,
312        };
313
314        let mut number_formats = BTreeMap::new();
315        let mut fonts = Vec::new();
316        let mut fills = Vec::new();
317        let mut borders = Vec::new();
318
319        let mut buf = Vec::with_capacity(1024);
320        let mut inner_buf = Vec::with_capacity(1024);
321        loop {
322            buf.clear();
323            match xml.read_event_into(&mut buf) {
324                Ok(Event::Start(e)) if e.local_name().as_ref() == b"numFmts" => loop {
325                    inner_buf.clear();
326                    match xml.read_event_into(&mut inner_buf) {
327                        Ok(Event::Start(e)) if e.local_name().as_ref() == b"numFmt" => {
328                            let mut id = Vec::new();
329                            let mut format = String::new();
330                            for a in e.attributes() {
331                                let a = a?;
332                                match a {
333                                    Attribute {
334                                        key: QName(b"numFmtId"),
335                                        value: v,
336                                    } => id.extend_from_slice(&v),
337                                    Attribute {
338                                        key: QName(b"formatCode"),
339                                        ..
340                                    } => {
341                                        let format_code = a
342                                            .decode_and_unescape_value(xml.decoder())?
343                                            .into_owned();
344                                        // Excel format codes use backslashes to escape special characters
345                                        // Remove escape backslashes (backslash followed by any character becomes just the character)
346                                        let mut unescaped = String::new();
347                                        let mut chars = format_code.chars().peekable();
348                                        while let Some(ch) = chars.next() {
349                                            if ch == '\\' {
350                                                // If there's a next character, use it without the backslash
351                                                if let Some(next_ch) = chars.next() {
352                                                    unescaped.push(next_ch);
353                                                } else {
354                                                    // Trailing backslash, keep it
355                                                    unescaped.push(ch);
356                                                }
357                                            } else {
358                                                unescaped.push(ch);
359                                            }
360                                        }
361                                        format = unescaped;
362                                    }
363                                    _ => (),
364                                }
365                            }
366                            if !format.is_empty() {
367                                number_formats.insert(id, format);
368                            }
369                        }
370                        Ok(Event::End(e)) if e.local_name().as_ref() == b"numFmts" => break,
371                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("numFmts")),
372                        Err(e) => return Err(XlsxError::Xml(e)),
373                        _ => (),
374                    }
375                },
376                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"fonts" => loop {
377                    inner_buf.clear();
378                    match xml.read_event_into(&mut inner_buf) {
379                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"font" => {
380                            let font = style_parser::parse_font(&mut xml, e)?;
381                            fonts.push(font);
382                        }
383                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"fonts" => break,
384                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("fonts")),
385                        Err(e) => return Err(XlsxError::Xml(e)),
386                        _ => (),
387                    }
388                },
389                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"fills" => loop {
390                    inner_buf.clear();
391                    match xml.read_event_into(&mut inner_buf) {
392                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"fill" => {
393                            let fill = style_parser::parse_fill(&mut xml, e)?;
394                            fills.push(fill);
395                        }
396                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"fills" => break,
397                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("fills")),
398                        Err(e) => return Err(XlsxError::Xml(e)),
399                        _ => (),
400                    }
401                },
402                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"borders" => loop {
403                    inner_buf.clear();
404                    match xml.read_event_into(&mut inner_buf) {
405                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"border" => {
406                            let border = style_parser::parse_border(&mut xml, e)?;
407                            borders.push(border);
408                        }
409                        Ok(Event::End(ref e)) if e.local_name().as_ref() == b"borders" => break,
410                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("borders")),
411                        Err(e) => return Err(XlsxError::Xml(e)),
412                        _ => (),
413                    }
414                },
415                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"cellXfs" => loop {
416                    inner_buf.clear();
417                    match xml.read_event_into(&mut inner_buf) {
418                        Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"xf" => {
419                            // Parse the style by building it from referenced components
420                            let mut style = Style::new();
421                            let mut num_fmt_id_bytes: Option<Vec<u8>> = None;
422
423                            // Parse attributes to get references to fonts, fills, borders
424                            for a in e.attributes() {
425                                let a = a?;
426                                match a.key.as_ref() {
427                                    b"fontId" => {
428                                        if let Ok(font_id) =
429                                            xml.decoder().decode(&a.value)?.parse::<usize>()
430                                        {
431                                            if let Some(font) = fonts.get(font_id) {
432                                                style = style.with_font(font.clone());
433                                            }
434                                        }
435                                    }
436                                    b"fillId" => {
437                                        if let Ok(fill_id) =
438                                            xml.decoder().decode(&a.value)?.parse::<usize>()
439                                        {
440                                            if let Some(fill) = fills.get(fill_id) {
441                                                style = style.with_fill(fill.clone());
442                                            }
443                                        }
444                                    }
445                                    b"borderId" => {
446                                        if let Ok(border_id) =
447                                            xml.decoder().decode(&a.value)?.parse::<usize>()
448                                        {
449                                            if let Some(border) = borders.get(border_id) {
450                                                style = style.with_borders(border.clone());
451                                            }
452                                        }
453                                    }
454                                    b"numFmtId" => {
455                                        // Store for both Style and CellFormat
456                                        num_fmt_id_bytes = Some(a.value.to_vec());
457
458                                        if let Ok(num_fmt_id) =
459                                            xml.decoder().decode(&a.value)?.parse::<u32>()
460                                        {
461                                            let mut fmt_id_bytes = Vec::new();
462                                            fmt_id_bytes.extend_from_slice(&a.value);
463                                            let format_code = match number_formats
464                                                .get(&fmt_id_bytes)
465                                            {
466                                                Some(fmt) => fmt.clone(),
467                                                None => {
468                                                    // Use built-in format
469                                                    match num_fmt_id {
470                                                        0 => "General".to_string(),
471                                                        1 => "0".to_string(),
472                                                        2 => "0.00".to_string(),
473                                                        3 => "#,##0".to_string(),
474                                                        4 => "#,##0.00".to_string(),
475                                                        9 => "0%".to_string(),
476                                                        10 => "0.00%".to_string(),
477                                                        11 => "0.00E+00".to_string(),
478                                                        12 => "# ?/?".to_string(),
479                                                        13 => "# ??/??".to_string(),
480                                                        14 => "mm-dd-yy".to_string(),
481                                                        15 => "d-mmm-yy".to_string(),
482                                                        16 => "d-mmm".to_string(),
483                                                        17 => "mmm-yy".to_string(),
484                                                        18 => "h:mm AM/PM".to_string(),
485                                                        19 => "h:mm:ss AM/PM".to_string(),
486                                                        20 => "h:mm".to_string(),
487                                                        21 => "h:mm:ss".to_string(),
488                                                        22 => "m/d/yy h:mm".to_string(),
489                                                        37 => "#,##0 ;(#,##0)".to_string(),
490                                                        38 => "#,##0 ;[Red](#,##0)".to_string(),
491                                                        39 => "#,##0.00;(#,##0.00)".to_string(),
492                                                        40 => "#,##0.00;[Red](#,##0.00)".to_string(),
493                                                        41 => "_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)".to_string(),
494                                                        42 => "_($* #,##0_);_($* (#,##0);_($* \"-\"_);_(@_)".to_string(),
495                                                        43 => "_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)".to_string(),
496                                                        44 => "_($* #,##0.00_);_($* (#,##0.00);_($* \"-\"??_);_(@_)".to_string(),
497                                                        45 => "mm:ss".to_string(),
498                                                        46 => "[h]:mm:ss".to_string(),
499                                                        47 => "mmss.0".to_string(),
500                                                        48 => "##0.0E+0".to_string(),
501                                                        49 => "@".to_string(),
502                                                        _ => "General".to_string(),
503                                                    }
504                                                }
505                                            };
506
507                                            use crate::style::NumberFormat;
508                                            let number_format =
509                                                NumberFormat::new(format_code).with_id(num_fmt_id);
510                                            style = style.with_number_format(number_format);
511                                        }
512                                    }
513                                    _ => {}
514                                }
515                            }
516
517                            // Also parse any nested elements like alignment and protection
518                            let mut nested_buf = Vec::with_capacity(512);
519                            loop {
520                                nested_buf.clear();
521                                match xml.read_event_into(&mut nested_buf) {
522                                    Ok(Event::Start(ref nested_e)) => match nested_e
523                                        .local_name()
524                                        .as_ref()
525                                    {
526                                        b"alignment" => {
527                                            let alignment =
528                                                style_parser::parse_alignment(&mut xml, nested_e)?;
529                                            style = style.with_alignment(alignment);
530                                        }
531                                        b"protection" => {
532                                            let protection =
533                                                style_parser::parse_protection(&mut xml, nested_e)?;
534                                            style = style.with_protection(protection);
535                                        }
536                                        _ => {
537                                            // Skip unknown nested elements
538                                            xml.read_to_end_into(nested_e.name(), &mut Vec::new())?;
539                                        }
540                                    },
541                                    Ok(Event::End(ref end_e))
542                                        if end_e.local_name().as_ref() == b"xf" =>
543                                    {
544                                        break
545                                    }
546                                    Ok(Event::Eof) => return Err(XlsxError::XmlEof("xf")),
547                                    Err(e) => return Err(XlsxError::Xml(e)),
548                                    _ => {}
549                                }
550                            }
551
552                            self.styles.push(style);
553
554                            // Also add format for backward compatibility
555                            self.formats.push(num_fmt_id_bytes.map_or(
556                                CellFormat::Other,
557                                |id_bytes| match number_formats.get(&id_bytes) {
558                                    Some(fmt) => detect_custom_number_format(fmt),
559                                    None => builtin_format_by_id(&id_bytes),
560                                },
561                            ));
562                        }
563                        Ok(Event::End(e)) if e.local_name().as_ref() == b"cellXfs" => break,
564                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("cellXfs")),
565                        Err(e) => return Err(XlsxError::Xml(e)),
566                        _ => (),
567                    }
568                },
569                Ok(Event::End(e)) if e.local_name().as_ref() == b"styleSheet" => break,
570                Ok(Event::Eof) => return Err(XlsxError::XmlEof("styleSheet")),
571                Err(e) => return Err(XlsxError::Xml(e)),
572                _ => (),
573            }
574        }
575        Ok(())
576    }
577
578    fn read_workbook(
579        &mut self,
580        relationships: &BTreeMap<Vec<u8>, String>,
581    ) -> Result<(), XlsxError> {
582        let mut xml = match xml_reader(&mut self.zip, "xl/workbook.xml") {
583            None => return Ok(()),
584            Some(x) => x?,
585        };
586        let mut defined_names = Vec::new();
587        let mut buf = Vec::with_capacity(1024);
588        let mut val_buf = Vec::with_capacity(1024);
589        loop {
590            buf.clear();
591            match xml.read_event_into(&mut buf) {
592                Ok(Event::Start(e)) if e.local_name().as_ref() == b"sheet" => {
593                    let mut name = String::new();
594                    let mut path = String::new();
595                    let mut visible = SheetVisible::Visible;
596                    for a in e.attributes() {
597                        let a = a?;
598                        match a {
599                            Attribute {
600                                key: QName(b"name"),
601                                ..
602                            } => {
603                                name = a.decode_and_unescape_value(xml.decoder())?.to_string();
604                            }
605                            Attribute {
606                                key: QName(b"state"),
607                                ..
608                            } => {
609                                visible = match a.decode_and_unescape_value(xml.decoder())?.as_ref()
610                                {
611                                    "visible" => SheetVisible::Visible,
612                                    "hidden" => SheetVisible::Hidden,
613                                    "veryHidden" => SheetVisible::VeryHidden,
614                                    v => {
615                                        return Err(XlsxError::Unrecognized {
616                                            typ: "sheet:state",
617                                            val: v.to_string(),
618                                        })
619                                    }
620                                }
621                            }
622                            Attribute {
623                                key: QName(b"r:id" | b"relationships:id"),
624                                value: v,
625                            } => {
626                                let r = &relationships
627                                    .get(&*v)
628                                    .ok_or(XlsxError::RelationshipNotFound)?[..];
629                                // target may have prepended "/xl/" or "xl/" path;
630                                // strip if present
631                                path = if r.starts_with("/xl/") {
632                                    r[1..].to_string()
633                                } else if r.starts_with("xl/") {
634                                    r.to_string()
635                                } else {
636                                    format!("xl/{r}")
637                                };
638                            }
639                            _ => (),
640                        }
641                    }
642                    let typ = match path.split('/').nth(1) {
643                        Some("worksheets") => SheetType::WorkSheet,
644                        Some("chartsheets") => SheetType::ChartSheet,
645                        Some("dialogsheets") => SheetType::DialogSheet,
646                        _ => {
647                            return Err(XlsxError::Unrecognized {
648                                typ: "sheet:type",
649                                val: path.to_string(),
650                            })
651                        }
652                    };
653                    self.metadata.sheets.push(Sheet {
654                        name: name.to_string(),
655                        typ,
656                        visible,
657                    });
658                    self.sheets.push((name, path));
659                }
660                Ok(Event::Start(e)) if e.name().as_ref() == b"workbookPr" => {
661                    self.is_1904 = match e.try_get_attribute("date1904")? {
662                        Some(c) => ["1", "true"].contains(
663                            &c.decode_and_unescape_value(xml.decoder())
664                                .map_err(XlsxError::Xml)?
665                                .as_ref(),
666                        ),
667                        None => false,
668                    };
669                }
670                Ok(Event::Start(e)) if e.local_name().as_ref() == b"definedName" => {
671                    if let Some(a) = e
672                        .attributes()
673                        .filter_map(std::result::Result::ok)
674                        .find(|a| a.key == QName(b"name"))
675                    {
676                        let name = a.decode_and_unescape_value(xml.decoder())?.to_string();
677                        val_buf.clear();
678                        let mut value = String::new();
679                        loop {
680                            match xml.read_event_into(&mut val_buf)? {
681                                Event::Text(t) => value.push_str(&t.xml10_content()?),
682                                Event::GeneralRef(e) => unescape_entity_to_buffer(&e, &mut value)?,
683                                Event::End(end) if end.name() == e.name() => break,
684                                Event::Eof => return Err(XlsxError::XmlEof("workbook")),
685                                _ => (),
686                            }
687                        }
688                        defined_names.push((name, value));
689                    }
690                }
691                Ok(Event::End(e)) if e.local_name().as_ref() == b"workbook" => break,
692                Ok(Event::Eof) => return Err(XlsxError::XmlEof("workbook")),
693                Err(e) => return Err(XlsxError::Xml(e)),
694                _ => (),
695            }
696        }
697        self.metadata.names = defined_names;
698        Ok(())
699    }
700
701    fn read_relationships(&mut self) -> Result<BTreeMap<Vec<u8>, String>, XlsxError> {
702        let mut xml = match xml_reader(&mut self.zip, "xl/_rels/workbook.xml.rels") {
703            None => {
704                return Err(XlsxError::FileNotFound(
705                    "xl/_rels/workbook.xml.rels".to_string(),
706                ));
707            }
708            Some(x) => x?,
709        };
710        let mut relationships = BTreeMap::new();
711        let mut buf = Vec::with_capacity(64);
712        loop {
713            buf.clear();
714            match xml.read_event_into(&mut buf) {
715                Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => {
716                    let mut id = Vec::new();
717                    let mut target = String::new();
718                    for a in e.attributes() {
719                        match a? {
720                            Attribute {
721                                key: QName(b"Id"),
722                                value: v,
723                            } => id.extend_from_slice(&v),
724                            Attribute {
725                                key: QName(b"Target"),
726                                value: v,
727                            } => target = xml.decoder().decode(&v)?.into_owned(),
728                            _ => (),
729                        }
730                    }
731                    relationships.insert(id, target);
732                }
733                Ok(Event::End(e)) if e.local_name().as_ref() == b"Relationships" => break,
734                Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
735                Err(e) => return Err(XlsxError::Xml(e)),
736                _ => (),
737            }
738        }
739        Ok(relationships)
740    }
741
742    // sheets must be added before this is called!!
743    fn read_table_metadata(&mut self) -> Result<(), XlsxError> {
744        let mut new_tables = Vec::new();
745        for (sheet_name, sheet_path) in &self.sheets {
746            let last_folder_index = sheet_path.rfind('/').expect("should be in a folder");
747            let (base_folder, file_name) = sheet_path.split_at(last_folder_index);
748            let rel_path = format!("{base_folder}/_rels{file_name}.rels");
749
750            let mut table_locations = Vec::new();
751            let mut buf = Vec::with_capacity(64);
752            // we need another mutable borrow of self.zip later so we enclose this borrow within braces
753            {
754                let mut xml = match xml_reader(&mut self.zip, &rel_path) {
755                    None => continue,
756                    Some(x) => x?,
757                };
758                loop {
759                    buf.clear();
760                    match xml.read_event_into(&mut buf) {
761                        Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => {
762                            let mut id = Vec::new();
763                            let mut target = String::new();
764                            let mut table_type = false;
765                            for a in e.attributes() {
766                                match a? {
767                                    Attribute {
768                                        key: QName(b"Id"),
769                                        value: v,
770                                    } => id.extend_from_slice(&v),
771                                    Attribute {
772                                        key: QName(b"Target"),
773                                        value: v,
774                                    } => target = xml.decoder().decode(&v)?.into_owned(),
775                                    Attribute {
776                                        key: QName(b"Type"),
777                                        value: v,
778                                    } => table_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"[..],
779                                    _ => (),
780                                }
781                            }
782                            if table_type {
783                                if target.starts_with("../") {
784                                    // Relative path.
785                                    let new_index =
786                                        base_folder.rfind('/').expect("Must be a parent folder");
787                                    let full_path =
788                                        format!("{}{}", &base_folder[..new_index], &target[2..]);
789                                    table_locations.push(full_path);
790                                } else if let Some(stripped) = target.strip_prefix('/') {
791                                    // Absolute path.
792                                    table_locations.push(stripped.to_string());
793                                } else if !target.is_empty() {
794                                    // Assume absolute path without leading slash.
795                                    table_locations.push(target);
796                                }
797                            }
798                        }
799                        Ok(Event::End(e)) if e.local_name().as_ref() == b"Relationships" => break,
800                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
801                        Err(e) => return Err(XlsxError::Xml(e)),
802                        _ => (),
803                    }
804                }
805            }
806            for table_file in table_locations {
807                let mut xml = match xml_reader(&mut self.zip, &table_file) {
808                    None => continue,
809                    Some(x) => x?,
810                };
811                let mut column_names = Vec::new();
812                let mut table_meta = InnerTableMetadata::new();
813                loop {
814                    buf.clear();
815                    match xml.read_event_into(&mut buf) {
816                        Ok(Event::Start(e)) if e.local_name().as_ref() == b"table" => {
817                            for a in e.attributes() {
818                                match a? {
819                                    Attribute {
820                                        key: QName(b"displayName"),
821                                        value: v,
822                                    } => {
823                                        table_meta.display_name =
824                                            xml.decoder().decode(&v)?.into_owned();
825                                    }
826                                    Attribute {
827                                        key: QName(b"ref"),
828                                        value: v,
829                                    } => {
830                                        table_meta.ref_cells =
831                                            xml.decoder().decode(&v)?.into_owned();
832                                    }
833                                    Attribute {
834                                        key: QName(b"headerRowCount"),
835                                        value: v,
836                                    } => {
837                                        table_meta.header_row_count =
838                                            xml.decoder().decode(&v)?.parse()?;
839                                    }
840                                    Attribute {
841                                        key: QName(b"totalsRowCount"),
842                                        value: v,
843                                    } => {
844                                        table_meta.totals_row_count =
845                                            xml.decoder().decode(&v)?.parse()?;
846                                    }
847                                    _ => (),
848                                }
849                            }
850                        }
851                        Ok(Event::Start(e)) if e.local_name().as_ref() == b"tableColumn" => {
852                            for a in e.attributes().flatten() {
853                                if let Attribute {
854                                    key: QName(b"name"),
855                                    value: v,
856                                } = a
857                                {
858                                    column_names.push(xml.decoder().decode(&v)?.into_owned());
859                                }
860                            }
861                        }
862                        Ok(Event::End(e)) if e.local_name().as_ref() == b"table" => break,
863                        Ok(Event::Eof) => return Err(XlsxError::XmlEof("Table")),
864                        Err(e) => return Err(XlsxError::Xml(e)),
865                        _ => (),
866                    }
867                }
868                let mut dims = get_dimension(table_meta.ref_cells.as_bytes())?;
869                if table_meta.header_row_count != 0 {
870                    dims.start.0 += table_meta.header_row_count;
871                }
872                if table_meta.totals_row_count != 0 {
873                    dims.end.0 -= table_meta.header_row_count;
874                }
875
876                new_tables.push((
877                    table_meta.display_name,
878                    sheet_name.clone(),
879                    column_names,
880                    dims,
881                ));
882            }
883        }
884        self.tables = Some(new_tables);
885        Ok(())
886    }
887
888    // Read pictures.
889    #[cfg(feature = "picture")]
890    fn read_pictures(&mut self) -> Result<(), XlsxError> {
891        let mut pics = Vec::new();
892        for i in 0..self.zip.len() {
893            let mut zfile = self.zip.by_index(i)?;
894            let zname = zfile.name();
895            if zname.starts_with("xl/media") {
896                if let Some(ext) = zname.split('.').next_back() {
897                    if [
898                        "emf", "wmf", "pict", "jpeg", "jpg", "png", "dib", "gif", "tiff", "eps",
899                        "bmp", "wpg",
900                    ]
901                    .contains(&ext)
902                    {
903                        let ext = ext.to_string();
904                        let mut buf: Vec<u8> = Vec::new();
905                        zfile.read_to_end(&mut buf)?;
906                        pics.push((ext, buf));
907                    }
908                }
909            }
910        }
911        if !pics.is_empty() {
912            self.pictures = Some(pics);
913        }
914        Ok(())
915    }
916
917    /// Get all Pivot Tables in a workbook.
918    ///
919    /// # Note
920    ///
921    /// This function is required before working with Pivot Table Data due to reliance on metadata in `PivotTableRef`.
922    pub fn pivot_tables(&mut self) -> Result<PivotTables, XlsxError>
923    where
924        RS: Read + Seek,
925    {
926        let mut pivot_tables = PivotTables::new();
927        for (sheet_name, sheet_path) in self.sheets.iter() {
928            for pivot_path in find_pivot_table_paths_from_sheet(&mut self.zip, sheet_path)?.iter() {
929                let name = find_pivot_name_from_pivot_path(&mut self.zip, pivot_path)?;
930                let definition_cache_path =
931                    find_pivot_cache_definitions_from_pivot(&mut self.zip, pivot_path)?;
932                let record_cache_path = find_pivot_cache_records_from_definitions(
933                    &mut self.zip,
934                    &definition_cache_path,
935                )?;
936
937                pivot_tables.push(PivotTableRef::new(
938                    name,
939                    sheet_name.to_string(),
940                    record_cache_path,
941                    definition_cache_path,
942                ));
943            }
944        }
945        Ok(pivot_tables)
946    }
947
948    /// Get an iterator over a pivot table's cached data.
949    ///
950    /// Invalid Pivot Table names will return None.
951    ///
952    /// # Examples
953    ///
954    /// An example of retrieving pivot data for a Pivot Table named PivotTable1 on sheet PivotSheet1.
955    ///
956    /// ```
957    /// use calamine::{open_workbook, Error, Xlsx};
958    ///
959    /// fn main() -> Result<(), Error> {
960    ///
961    ///     let path = "tests/pivots.xlsx";
962    ///
963    ///     // Open the workbook.
964    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
965    ///
966    ///     // Must retrieve necessary metadata before reading Pivot Table data.
967    ///     let pivot_tables = workbook.pivot_tables()?;
968    ///
969    ///    // Get the Pivot Table data by referencing the pivot table name and the worksheet it resides.
970    ///     for row in workbook.pivot_table_data(&pivot_tables, "PivotSheet1", "PivotTable1")? {
971    ///             // Do something.
972    ///     }
973    ///
974    ///     Ok(())
975    /// }
976    /// ```
977    ///
978    pub fn pivot_table_data(
979        &'_ mut self,
980        pivot_tables: &PivotTables,
981        sheet_name: &str,
982        pivot_table_name: &str,
983    ) -> Result<PivotCacheIter<'_, RS>, XlsxError> {
984        match pivot_tables.0.iter().find(|pivot_table| {
985            pivot_table.name() == pivot_table_name && pivot_table.sheet() == sheet_name
986        }) {
987            Some(pt_ref) => get_pivot_cache_iter(self, pt_ref),
988            None => Err(XlsxError::PivotTableNotFound(pivot_table_name.to_string())),
989        }
990    }
991
992    // sheets must be added before this is called!!
993    fn read_merged_regions(&mut self) -> Result<(), XlsxError> {
994        let mut regions = Vec::new();
995        for (sheet_name, sheet_path) in &self.sheets {
996            // we need another mutable borrow of self.zip later so we enclose this borrow within braces
997            {
998                let mut xml = match xml_reader(&mut self.zip, sheet_path) {
999                    None => continue,
1000                    Some(x) => x?,
1001                };
1002                let mut buf = Vec::new();
1003                loop {
1004                    buf.clear();
1005                    match xml.read_event_into(&mut buf) {
1006                        Ok(Event::Start(e)) if e.local_name() == QName(b"mergeCell").into() => {
1007                            if let Some(attr) = get_attribute(e.attributes(), QName(b"ref"))? {
1008                                let dimension = get_dimension(attr)?;
1009                                regions.push((
1010                                    sheet_name.to_string(),
1011                                    sheet_path.to_string(),
1012                                    dimension,
1013                                ));
1014                            }
1015                        }
1016                        Ok(Event::Eof) => break,
1017                        Err(e) => return Err(XlsxError::Xml(e)),
1018                        _ => (),
1019                    }
1020                }
1021            }
1022        }
1023        self.merged_regions = Some(regions);
1024        Ok(())
1025    }
1026
1027    #[inline]
1028    fn get_table_meta(&self, table_name: &str) -> Result<TableMetadata, XlsxError> {
1029        let match_table_meta = self
1030            .tables
1031            .as_ref()
1032            .expect("Tables must be loaded before they are referenced")
1033            .iter()
1034            .find(|(table, ..)| table == table_name)
1035            .ok_or_else(|| XlsxError::TableNotFound(table_name.into()))?;
1036
1037        let name = match_table_meta.0.to_owned();
1038        let sheet_name = match_table_meta.1.clone();
1039        let columns = match_table_meta.2.clone();
1040        let dimensions = Dimensions {
1041            start: match_table_meta.3.start,
1042            end: match_table_meta.3.end,
1043        };
1044
1045        Ok(TableMetadata {
1046            name,
1047            sheet_name,
1048            columns,
1049            dimensions,
1050        })
1051    }
1052
1053    /// Load the merged regions in the workbook.
1054    ///
1055    /// A merged region in Excel is a range of cells that have been merged to
1056    /// act as a single cell. It is often used to create headers or titles that
1057    /// span multiple columns or rows.
1058    ///
1059    /// This method must be called before accessing the merged regions using the
1060    /// methods:
1061    ///
1062    /// - [`Xlsx::merged_regions()`].
1063    /// - [`Xlsx::merged_regions_by_sheet()`].
1064    ///
1065    /// These methods are explained below.
1066    ///
1067    /// # Errors
1068    ///
1069    /// - [`XlsxError::Xml`].
1070    ///
1071    pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> {
1072        if self.merged_regions.is_none() {
1073            self.read_merged_regions()
1074        } else {
1075            Ok(())
1076        }
1077    }
1078
1079    /// Get the merged regions for all the worksheets in a workbook.
1080    ///
1081    /// The function returns a ref to a vector of tuples containing the sheet
1082    /// name, the sheet path, and the [`Dimensions`] of the merged region. The
1083    /// middle element of the tuple can generally be ignored.
1084    ///
1085    /// The [`Xlsx::load_merged_regions()`] method must be called before calling
1086    /// this method.
1087    ///
1088    /// # Examples
1089    ///
1090    /// An example of getting all the merged regions in an Excel workbook.
1091    ///
1092    /// ```
1093    /// use calamine::{open_workbook, Error, Xlsx};
1094    ///
1095    /// fn main() -> Result<(), Error> {
1096    ///     let path = "tests/merged_range.xlsx";
1097    ///
1098    ///     // Open the workbook.
1099    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1100    ///
1101    ///     // Load the merged regions in the workbook.
1102    ///     workbook.load_merged_regions()?;
1103    ///
1104    ///     // Get all the merged regions in the workbook.
1105    ///     let merged_regions = workbook.merged_regions();
1106    ///
1107    ///     // Print the sheet name and dimensions of each merged region.
1108    ///     for (sheet_name, _, dimensions) in merged_regions {
1109    ///         println!("{sheet_name}: {dimensions:?}");
1110    ///     }
1111    ///
1112    ///     Ok(())
1113    /// }
1114    ///
1115    /// ```
1116    ///
1117    /// Output:
1118    ///
1119    /// ```text
1120    /// Sheet1: Dimensions { start: (0, 7), end: (1, 7) }
1121    /// Sheet1: Dimensions { start: (0, 0), end: (1, 0) }
1122    /// Sheet1: Dimensions { start: (0, 1), end: (1, 1) }
1123    /// Sheet1: Dimensions { start: (0, 2), end: (1, 3) }
1124    /// Sheet1: Dimensions { start: (2, 2), end: (2, 3) }
1125    /// Sheet1: Dimensions { start: (3, 2), end: (3, 3) }
1126    /// Sheet1: Dimensions { start: (0, 4), end: (1, 4) }
1127    /// Sheet1: Dimensions { start: (0, 5), end: (1, 5) }
1128    /// Sheet1: Dimensions { start: (0, 6), end: (1, 6) }
1129    /// Sheet2: Dimensions { start: (0, 0), end: (3, 0) }
1130    /// Sheet2: Dimensions { start: (2, 2), end: (3, 3) }
1131    /// Sheet2: Dimensions { start: (0, 5), end: (3, 7) }
1132    /// Sheet2: Dimensions { start: (0, 1), end: (1, 1) }
1133    /// Sheet2: Dimensions { start: (0, 2), end: (1, 3) }
1134    /// Sheet2: Dimensions { start: (0, 4), end: (1, 4) }
1135    /// ```
1136    ///
1137    pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> {
1138        self.merged_regions
1139            .as_ref()
1140            .expect("Merged Regions must be loaded before the are referenced")
1141    }
1142
1143    /// Get the merged regions in a workbook by the sheet name.
1144    ///
1145    /// The function returns a vector of tuples containing the sheet name, the
1146    /// sheet path, and the [`Dimensions`] of the merged region. The first two
1147    /// elements of the tuple can generally be ignored.
1148    ///
1149    /// The [`Xlsx::load_merged_regions()`] method must be called before calling
1150    /// this method.
1151    ///
1152    /// # Parameters
1153    ///
1154    /// - `sheet_name`: The name of the worksheet to get the merged regions from.
1155    ///
1156    /// # Examples
1157    ///
1158    /// An example of getting the merged regions in an Excel workbook, by individual
1159    /// worksheet.
1160    ///
1161    /// ```
1162    /// use calamine::{open_workbook, Error, Reader, Xlsx};
1163    ///
1164    /// fn main() -> Result<(), Error> {
1165    ///     let path = "tests/merged_range.xlsx";
1166    ///
1167    ///     // Open the workbook.
1168    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1169    ///
1170    ///     // Get the names of all the sheets in the workbook.
1171    ///     let sheet_names = workbook.sheet_names();
1172    ///
1173    ///     // Load the merged regions in the workbook.
1174    ///     workbook.load_merged_regions()?;
1175    ///
1176    ///     for sheet_name in &sheet_names {
1177    ///         println!("{sheet_name}: ");
1178    ///
1179    ///         // Get the merged regions in the current sheet.
1180    ///         let merged_regions = workbook.merged_regions_by_sheet(sheet_name);
1181    ///
1182    ///         for (_, _, dimensions) in &merged_regions {
1183    ///             // Print the dimensions of each merged region.
1184    ///             println!("    {dimensions:?}");
1185    ///         }
1186    ///     }
1187    ///
1188    ///     Ok(())
1189    /// }
1190    ///
1191    /// ```
1192    ///
1193    /// Output:
1194    ///
1195    ///
1196    /// ```text
1197    /// Sheet1:
1198    ///     Dimensions { start: (0, 7), end: (1, 7) }
1199    ///     Dimensions { start: (0, 0), end: (1, 0) }
1200    ///     Dimensions { start: (0, 1), end: (1, 1) }
1201    ///     Dimensions { start: (0, 2), end: (1, 3) }
1202    ///     Dimensions { start: (2, 2), end: (2, 3) }
1203    ///     Dimensions { start: (3, 2), end: (3, 3) }
1204    ///     Dimensions { start: (0, 4), end: (1, 4) }
1205    ///     Dimensions { start: (0, 5), end: (1, 5) }
1206    ///     Dimensions { start: (0, 6), end: (1, 6) }
1207    /// Sheet2:
1208    ///     Dimensions { start: (0, 0), end: (3, 0) }
1209    ///     Dimensions { start: (2, 2), end: (3, 3) }
1210    ///     Dimensions { start: (0, 5), end: (3, 7) }
1211    ///     Dimensions { start: (0, 1), end: (1, 1) }
1212    ///     Dimensions { start: (0, 2), end: (1, 3) }
1213    ///     Dimensions { start: (0, 4), end: (1, 4) }
1214    /// ```
1215    ///
1216    pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> {
1217        self.merged_regions()
1218            .iter()
1219            .filter(|s| s.0 == name)
1220            .map(|(name, sheet, region)| (name, sheet, region))
1221            .collect()
1222    }
1223
1224    /// Load the worksheet tables from the XLSX file.
1225    ///
1226    /// Tables in Excel are a way of grouping a range of cells into a single
1227    /// entity that has common formatting or that can be referenced in formulas.
1228    /// In `calamine`, tables can be read as a [`Table`] object and converted to
1229    /// a data [`Range`] for further processing.
1230    ///
1231    /// Calamine does not automatically load table data from a workbook to avoid
1232    /// unnecessary overhead. Instead you must explicitly load the table data
1233    /// using the `Xlsx::load_tables()` method. Once the tables have been loaded
1234    /// the following methods can be used to extract and work with individual
1235    /// tables:
1236    ///
1237    /// - [`Xlsx::table_by_name()`].
1238    /// - [`Xlsx::table_by_name_ref()`].
1239    /// - [`Xlsx::table_names()`].
1240    /// - [`Xlsx::table_names_in_sheet()`].
1241    ///
1242    /// These methods are explained below. See also the [`Table`] struct for
1243    /// additional methods that can be used when working with tables.
1244    ///
1245    /// # Errors
1246    ///
1247    /// - [`XlsxError::XmlAttr`].
1248    /// - [`XlsxError::XmlEof`].
1249    /// - [`XlsxError::Xml`].
1250    ///
1251    ///
1252    pub fn load_tables(&mut self) -> Result<(), XlsxError> {
1253        if self.tables.is_none() {
1254            self.read_table_metadata()
1255        } else {
1256            Ok(())
1257        }
1258    }
1259
1260    /// Get the names of all the tables in the workbook.
1261    ///
1262    /// Read all the table names in the workbook. This can be used in
1263    /// conjunction with [`Xlsx::table_by_name()`] to iterate over the tables in
1264    /// the workbook.
1265    ///
1266    /// # Panics
1267    ///
1268    /// Panics if tables have not been loaded via [`Xlsx::load_tables()`].
1269    ///
1270    /// # Examples
1271    ///
1272    /// An example of getting the names of all the tables in an Excel workbook.
1273    ///
1274    /// ```
1275    /// use calamine::{open_workbook, Error, Xlsx};
1276    ///
1277    /// fn main() -> Result<(), Error> {
1278    ///     let path = "tests/table-multiple.xlsx";
1279    ///
1280    ///     // Open the workbook.
1281    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1282    ///
1283    ///     // Load the tables in the workbook.
1284    ///     workbook.load_tables()?;
1285    ///
1286    ///     // Get all the table names in the workbook.
1287    ///     let table_names = workbook.table_names();
1288    ///
1289    ///     // Check the table names.
1290    ///     assert_eq!(
1291    ///         table_names,
1292    ///         vec!["Inventory", "Pricing", "Sales_Bob", "Sales_Alice"]
1293    ///     );
1294    ///
1295    ///     Ok(())
1296    /// }
1297    /// ```
1298    ///
1299    pub fn table_names(&self) -> Vec<&String> {
1300        self.tables
1301            .as_ref()
1302            .expect("Tables must be loaded before they are referenced")
1303            .iter()
1304            .map(|(name, ..)| name)
1305            .collect()
1306    }
1307
1308    /// Get the names of all the tables in a worksheet.
1309    ///
1310    /// Read all the table names in a worksheet. This can be used in conjunction
1311    /// with [`Xlsx::table_by_name()`] to iterate over the tables in the
1312    /// worksheet.
1313    ///
1314    /// # Parameters
1315    ///
1316    /// - `sheet_name`: The name of the worksheet to get the table names from.
1317    ///
1318    /// # Panics
1319    ///
1320    /// Panics if tables have not been loaded via [`Xlsx::load_tables()`].
1321    ///
1322    /// # Examples
1323    ///
1324    /// An example of getting the names of all the tables in an Excel workbook,
1325    /// sheet by sheet.
1326    ///
1327    /// ```
1328    /// use calamine::{open_workbook, Error, Reader, Xlsx};
1329    ///
1330    /// fn main() -> Result<(), Error> {
1331    ///     let path = "tests/table-multiple.xlsx";
1332    ///
1333    ///     // Open the workbook.
1334    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1335    ///
1336    ///     // Get the names of all the sheets in the workbook.
1337    ///     let sheet_names = workbook.sheet_names();
1338    ///
1339    ///     // Load the tables in the workbook.
1340    ///     workbook.load_tables()?;
1341    ///
1342    ///     for sheet_name in &sheet_names {
1343    ///         // Get the table names in the current sheet.
1344    ///         let table_names = workbook.table_names_in_sheet(sheet_name);
1345    ///
1346    ///         // Print the associated table names.
1347    ///         println!("{sheet_name} contains tables: {table_names:?}");
1348    ///     }
1349    ///
1350    ///     Ok(())
1351    /// }
1352    /// ```
1353    ///
1354    /// Output:
1355    ///
1356    /// ```text
1357    /// Sheet1 contains tables: ["Inventory"]
1358    /// Sheet2 contains tables: ["Pricing"]
1359    /// Sheet3 contains tables: ["Sales_Bob", "Sales_Alice"]
1360    /// ```
1361    ///
1362    pub fn table_names_in_sheet(&self, sheet_name: &str) -> Vec<&String> {
1363        self.tables
1364            .as_ref()
1365            .expect("Tables must be loaded before they are referenced")
1366            .iter()
1367            .filter(|(_, sheet, ..)| sheet == sheet_name)
1368            .map(|(name, ..)| name)
1369            .collect()
1370    }
1371
1372    /// Get a worksheet table by name.
1373    ///
1374    /// This method retrieves a [`Table`] from the workbook by its name. The
1375    /// table will contain an owned copy of the worksheet data in the table
1376    /// range.
1377    ///
1378    /// # Parameters
1379    ///
1380    /// - `table_name`: The name of the table to retrieve.
1381    ///
1382    /// # Errors
1383    ///
1384    /// - [`XlsxError::TableNotFound`].
1385    /// - [`XlsxError::NotAWorksheet`].
1386    ///
1387    /// # Panics
1388    ///
1389    /// Panics if tables have not been loaded via [`Xlsx::load_tables()`].
1390    ///
1391    /// # Examples
1392    ///
1393    /// An example of getting an Excel worksheet table by its name. The file in
1394    /// this example contains 4 tables spread across 3 worksheets. This example
1395    /// gets an owned copy of the worksheet data in the table area.
1396    ///
1397    /// ```
1398    /// use calamine::{open_workbook, Data, Error, Xlsx};
1399    ///
1400    /// fn main() -> Result<(), Error> {
1401    ///     let path = "tests/table-multiple.xlsx";
1402    ///
1403    ///     // Open the workbook.
1404    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1405    ///
1406    ///     // Load the tables in the workbook.
1407    ///     workbook.load_tables()?;
1408    ///
1409    ///     // Get the table by name.
1410    ///     let table = workbook.table_by_name("Inventory")?;
1411    ///
1412    ///     // Get the data range of the table. The data type is `&Range<Data>`.
1413    ///     let data_range = table.data();
1414    ///
1415    ///     // Do something with the data using the `Range` APIs. In this case
1416    ///     // we will just check for a cell value.
1417    ///     assert_eq!(
1418    ///         data_range.get((0, 1)),
1419    ///         Some(&Data::String("Apple".to_string()))
1420    ///     );
1421    ///
1422    ///     Ok(())
1423    /// }
1424    /// ```
1425    ///
1426    pub fn table_by_name(&mut self, table_name: &str) -> Result<Table<Data>, XlsxError> {
1427        let TableMetadata {
1428            name,
1429            sheet_name,
1430            columns,
1431            dimensions,
1432        } = self.get_table_meta(table_name)?;
1433        let Dimensions { start, end } = dimensions;
1434        let range = self.worksheet_range(&sheet_name)?;
1435        let tbl_rng = range.range(start, end);
1436
1437        Ok(Table {
1438            name,
1439            sheet_name,
1440            columns,
1441            data: tbl_rng,
1442        })
1443    }
1444
1445    /// Get a worksheet table by name, with referenced data.
1446    ///
1447    /// This method retrieves a [`Table`] from the workbook by its name. The
1448    /// table will contain an borrowed/referenced copy of the worksheet data in
1449    /// the table range. This is more efficient than [`Xlsx::table_by_name()`]
1450    /// for large tables.
1451    ///
1452    /// # Parameters
1453    ///
1454    /// - `table_name`: The name of the table to retrieve.
1455    ///
1456    /// # Errors
1457    ///
1458    /// - [`XlsxError::TableNotFound`].
1459    /// - [`XlsxError::NotAWorksheet`].
1460    ///
1461    /// # Panics
1462    ///
1463    /// Panics if tables have not been loaded via [`Xlsx::load_tables()`].
1464    ///
1465    /// # Examples
1466    ///
1467    /// An example of getting an Excel worksheet table by its name. The file in
1468    /// this example contains 4 tables spread across 3 worksheets. This example
1469    /// gets a borrowed/referenced copy of the worksheet data in the table area.
1470    ///
1471    /// ```
1472    /// use calamine::{open_workbook, DataRef, Error, Xlsx};
1473    ///
1474    /// fn main() -> Result<(), Error> {
1475    ///     let path = "tests/table-multiple.xlsx";
1476    ///
1477    ///     // Open the workbook.
1478    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1479    ///
1480    ///     // Load the tables in the workbook.
1481    ///     workbook.load_tables()?;
1482    ///
1483    ///     // Get the table by name.
1484    ///     let table = workbook.table_by_name_ref("Inventory")?;
1485    ///
1486    ///     // Get the data range of the table. The data type is `&Range<DataRef<'_>>`.
1487    ///     let data_range = table.data();
1488    ///
1489    ///     // Do something with the data using the `Range` APIs. In this case
1490    ///     // we will just check for a cell value.
1491    ///     assert_eq!(
1492    ///         data_range.get((0, 1)),
1493    ///         Some(&DataRef::SharedString("Apple"))
1494    ///     );
1495    ///
1496    ///     Ok(())
1497    /// }
1498    /// ```
1499    ///
1500    pub fn table_by_name_ref(&mut self, table_name: &str) -> Result<Table<DataRef<'_>>, XlsxError> {
1501        let TableMetadata {
1502            name,
1503            sheet_name,
1504            columns,
1505            dimensions,
1506        } = self.get_table_meta(table_name)?;
1507        let Dimensions { start, end } = dimensions;
1508        let range = self.worksheet_range_ref(&sheet_name)?;
1509        let tbl_rng = range.range(start, end);
1510
1511        Ok(Table {
1512            name,
1513            sheet_name,
1514            columns,
1515            data: tbl_rng,
1516        })
1517    }
1518
1519    /// Get the merged cells/regions in a workbook by the sheet name.
1520    ///
1521    /// Merged cells in Excel are a range of cells that have been merged to act
1522    /// as a single cell. It is often used to create headers or titles that span
1523    /// multiple columns or rows.
1524    ///
1525    /// The function returns a vector of [`Dimensions`] of the merged region.
1526    /// This is wrapped in a [`Result`] and an [`Option`].
1527    ///
1528    /// # Parameters
1529    ///
1530    /// - `sheet_name`: The name of the worksheet to get the merged regions
1531    ///   from.
1532    ///
1533    /// # Errors
1534    ///
1535    /// - [`XlsxError::Xml`].
1536    ///
1537    /// # Examples
1538    ///
1539    /// An example of getting the merged regions/cells in an Excel workbook, by
1540    /// individual worksheet.
1541    ///
1542    /// ```
1543    /// use calamine::{open_workbook, Error, Reader, Xlsx};
1544    ///
1545    /// fn main() -> Result<(), Error> {
1546    ///     let path = "tests/merged_range.xlsx";
1547    ///
1548    ///     // Open the workbook.
1549    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1550    ///
1551    ///     // Get the names of all the sheets in the workbook.
1552    ///     let sheet_names = workbook.sheet_names();
1553    ///
1554    ///     for sheet_name in &sheet_names {
1555    ///         println!("{sheet_name}: ");
1556    ///
1557    ///         // Get the merged cells in the current sheet.
1558    ///         let merge_cells = workbook.worksheet_merge_cells(sheet_name);
1559    ///
1560    ///         if let Some(dimensions) = merge_cells {
1561    ///             let dimensions = dimensions?;
1562    ///
1563    ///             // Print the dimensions of each merged region.
1564    ///             for dimension in &dimensions {
1565    ///                 println!("    {dimension:?}");
1566    ///             }
1567    ///         }
1568    ///     }
1569    ///
1570    ///     Ok(())
1571    /// }
1572    ///
1573    /// ```
1574    ///
1575    /// Output:
1576    ///
1577    /// ```text
1578    /// Sheet1:
1579    ///     Dimensions { start: (0, 7), end: (1, 7) }
1580    ///     Dimensions { start: (0, 0), end: (1, 0) }
1581    ///     Dimensions { start: (0, 1), end: (1, 1) }
1582    ///     Dimensions { start: (0, 2), end: (1, 3) }
1583    ///     Dimensions { start: (2, 2), end: (2, 3) }
1584    ///     Dimensions { start: (3, 2), end: (3, 3) }
1585    ///     Dimensions { start: (0, 4), end: (1, 4) }
1586    ///     Dimensions { start: (0, 5), end: (1, 5) }
1587    ///     Dimensions { start: (0, 6), end: (1, 6) }
1588    /// Sheet2:
1589    ///     Dimensions { start: (0, 0), end: (3, 0) }
1590    ///     Dimensions { start: (2, 2), end: (3, 3) }
1591    ///     Dimensions { start: (0, 5), end: (3, 7) }
1592    ///     Dimensions { start: (0, 1), end: (1, 1) }
1593    ///     Dimensions { start: (0, 2), end: (1, 3) }
1594    ///     Dimensions { start: (0, 4), end: (1, 4) }
1595    /// ```
1596    ///
1597    pub fn worksheet_merge_cells(
1598        &mut self,
1599        name: &str,
1600    ) -> Option<Result<Vec<Dimensions>, XlsxError>> {
1601        let (_, path) = self.sheets.iter().find(|(n, _)| n == name)?;
1602        let xml = xml_reader(&mut self.zip, path);
1603
1604        xml.map(|xml| {
1605            let mut xml = xml?;
1606            let mut merge_cells = Vec::new();
1607            let mut buffer = Vec::new();
1608
1609            loop {
1610                buffer.clear();
1611
1612                match xml.read_event_into(&mut buffer) {
1613                    Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCells" => {
1614                        if let Ok(cells) = read_merge_cells(&mut xml) {
1615                            merge_cells = cells;
1616                        }
1617
1618                        break;
1619                    }
1620                    Ok(Event::Eof) => break,
1621                    Err(e) => return Err(XlsxError::Xml(e)),
1622                    _ => (),
1623                }
1624            }
1625
1626            Ok(merge_cells)
1627        })
1628    }
1629
1630    /// Get the merged cells/regions in a workbook by the sheet index.
1631    ///
1632    /// Merged cells in Excel are a range of cells that have been merged to act
1633    /// as a single cell. It is often used to create headers or titles that span
1634    /// multiple columns or rows.
1635    ///
1636    /// The function returns a vector of [`Dimensions`] of the merged region.
1637    /// This is wrapped in a [`Result`] and an [`Option`].
1638    ///
1639    /// # Parameters
1640    ///
1641    /// - `sheet_index`: The zero index of the worksheet to get the merged
1642    ///   regions from.
1643    ///
1644    /// # Errors
1645    ///
1646    /// - [`XlsxError::Xml`].
1647    ///
1648    /// # Examples
1649    ///
1650    /// An example of getting the merged regions/cells in an Excel workbook, by
1651    /// worksheet index.
1652    ///
1653    /// ```
1654    /// use calamine::{open_workbook, Error, Xlsx};
1655    ///
1656    /// fn main() -> Result<(), Error> {
1657    ///     let path = "tests/merged_range.xlsx";
1658    ///
1659    ///     // Open the workbook.
1660    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1661    ///
1662    ///     // Get the merged cells in the first worksheet.
1663    ///     let merge_cells = workbook.worksheet_merge_cells_at(0);
1664    ///
1665    ///     if let Some(dimensions) = merge_cells {
1666    ///         let dimensions = dimensions?;
1667    ///
1668    ///         // Print the dimensions of each merged region.
1669    ///         for dimension in &dimensions {
1670    ///             println!("{dimension:?}");
1671    ///         }
1672    ///     }
1673    ///
1674    ///     Ok(())
1675    /// }
1676    ///
1677    /// ```
1678    ///
1679    /// Output:
1680    ///
1681    /// ```text
1682    /// Dimensions { start: (0, 7), end: (1, 7) }
1683    /// Dimensions { start: (0, 0), end: (1, 0) }
1684    /// Dimensions { start: (0, 1), end: (1, 1) }
1685    /// Dimensions { start: (0, 2), end: (1, 3) }
1686    /// Dimensions { start: (2, 2), end: (2, 3) }
1687    /// Dimensions { start: (3, 2), end: (3, 3) }
1688    /// Dimensions { start: (0, 4), end: (1, 4) }
1689    /// Dimensions { start: (0, 5), end: (1, 5) }
1690    /// Dimensions { start: (0, 6), end: (1, 6) }
1691    /// ```
1692    ///
1693    pub fn worksheet_merge_cells_at(
1694        &mut self,
1695        sheet_index: usize,
1696    ) -> Option<Result<Vec<Dimensions>, XlsxError>> {
1697        let name = self
1698            .metadata()
1699            .sheets
1700            .get(sheet_index)
1701            .map(|sheet| sheet.name.clone())?;
1702
1703        self.worksheet_merge_cells(&name)
1704    }
1705
1706    /// Get the cells reader for a worksheet.
1707    ///
1708    /// This function returns a [`XlsxCellReader`] for the specified worksheet.
1709    /// The reader can be used to iterate over the cells in the worksheet.
1710    ///
1711    /// # Parameters
1712    ///
1713    /// - `name`: The name of the worksheet to get the cells reader for.
1714    ///
1715    /// # Errors
1716    ///
1717    /// - [`XlsxError::WorksheetNotFound`].
1718    ///
1719    /// # Examples
1720    ///
1721    /// An example of getting the cells reader for a worksheet.
1722    ///
1723    /// ```
1724    /// use calamine::{open_workbook, Error, Xlsx};
1725    ///
1726    /// fn main() -> Result<(), Error> {
1727    ///     let path = "tests/merged_range.xlsx";
1728    ///
1729    ///     // Open the workbook.
1730    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
1731    ///
1732    ///     // Get the cells reader for the first worksheet.
1733    ///     let mut reader = workbook.worksheet_cells_reader("Sheet1")?;
1734    ///
1735    ///     // Iterate over the cells in the worksheet.
1736    ///     while let Some(cell) = reader.next_cell()? {
1737    ///         println!("{:?}", cell);
1738    ///     }
1739    ///
1740    ///     Ok(())
1741    /// }
1742    /// ```
1743    ///
1744    /// Output:
1745    ///
1746    /// ```text
1747    /// Cell {
1748    ///     row: 0,
1749    ///     col: 0,
1750    ///     val: "Hello, world!".to_string(),
1751    ///     err: None,
1752    ///     typ: DataType::String,
1753    /// }
1754    /// ```
1755    pub fn worksheet_cells_reader<'a>(
1756        &'a mut self,
1757        name: &str,
1758    ) -> Result<XlsxCellReader<'a, RS>, XlsxError> {
1759        let (_, path) = self
1760            .sheets
1761            .iter()
1762            .find(|&(n, _)| n == name)
1763            .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))?;
1764        let xml = xml_reader(&mut self.zip, path)
1765            .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))??;
1766        let is_1904 = self.is_1904;
1767        let strings = &self.strings;
1768        let formats = &self.formats;
1769        let styles = &self.styles;
1770        XlsxCellReader::new(xml, strings, formats, styles, is_1904)
1771    }
1772
1773    /// Get the styles for a worksheet.
1774    ///
1775    /// Get worksheet styles as an RLE-compressed [`StyleRange`].
1776    ///
1777    /// This function returns styles for all cells with explicit formatting,
1778    /// stored in run-length encoded format for memory efficiency.
1779    ///
1780    /// # Parameters
1781    ///
1782    /// - `name`: The name of the worksheet to get the styles for.
1783    ///
1784    /// # Example
1785    ///
1786    /// ```ignore
1787    /// let styles = xlsx.worksheet_style("Sheet1")?;
1788    /// println!("Unique styles: {}", styles.unique_style_count());
1789    /// println!("Compression ratio: {:.1}x", styles.compression_ratio());
1790    /// for (row, col, style) in styles.cells() {
1791    ///     // process style
1792    /// }
1793    /// ```
1794    pub fn worksheet_style(&mut self, name: &str) -> Result<StyleRange, XlsxError> {
1795        let mut cell_reader = match self.worksheet_cells_reader(name) {
1796            Ok(reader) => reader,
1797            Err(XlsxError::NotAWorksheet(typ)) => {
1798                warn!("'{typ}' not a worksheet");
1799                return Ok(StyleRange::empty());
1800            }
1801            Err(e) => return Err(e),
1802        };
1803
1804        let len = cell_reader.dimensions().len();
1805        let mut cells = Vec::new();
1806        if len < 100_000 {
1807            cells.reserve(len as usize);
1808        }
1809
1810        // Use zero-copy path: collect (row, col, style_id) without cloning styles
1811        while let Some((row, col, style_id)) = cell_reader.next_style_id()? {
1812            cells.push((row, col, style_id));
1813        }
1814
1815        // Get the palette from the cell_reader (clone once, not per cell)
1816        let palette = cell_reader.styles().to_vec();
1817
1818        Ok(StyleRange::from_style_ids(cells, palette))
1819    }
1820
1821    /// Get the layout for a worksheet.
1822    ///
1823    /// This function returns a [`WorksheetLayout`] for the specified worksheet.
1824    /// The layout contains the column widths and row heights for the cells in the worksheet.
1825    ///
1826    /// # Parameters
1827    ///
1828    /// - `name`: The name of the worksheet to get the layout for.
1829    ///
1830    pub fn worksheet_layout(&mut self, name: &str) -> Result<WorksheetLayout, XlsxError> {
1831        let (_, path) = self
1832            .sheets
1833            .iter()
1834            .find(|&(n, _)| n == name)
1835            .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))?;
1836
1837        let mut xml = xml_reader(&mut self.zip, path)
1838            .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))??;
1839
1840        let mut layout = WorksheetLayout::new();
1841        let mut buf = Vec::with_capacity(1024);
1842
1843        loop {
1844            buf.clear();
1845            match xml.read_event_into(&mut buf) {
1846                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"sheetFormatPr" => {
1847                    // Parse default column width and row height
1848                    for attr in e.attributes() {
1849                        let attr = attr.map_err(XlsxError::XmlAttr)?;
1850                        match attr.key.as_ref() {
1851                            b"defaultColWidth" => {
1852                                if let Ok(width_str) = xml.decoder().decode(&attr.value) {
1853                                    if let Ok(width) = width_str.parse::<f64>() {
1854                                        layout = layout.with_default_column_width(width);
1855                                    }
1856                                }
1857                            }
1858                            b"defaultRowHeight" => {
1859                                if let Ok(height_str) = xml.decoder().decode(&attr.value) {
1860                                    if let Ok(height) = height_str.parse::<f64>() {
1861                                        layout = layout.with_default_row_height(height);
1862                                    }
1863                                }
1864                            }
1865                            _ => {}
1866                        }
1867                    }
1868                }
1869                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"cols" => {
1870                    // Parse column definitions
1871                    loop {
1872                        buf.clear();
1873                        match xml.read_event_into(&mut buf) {
1874                            Ok(Event::Start(ref col_e))
1875                                if col_e.local_name().as_ref() == b"col" =>
1876                            {
1877                                let mut col_info = None;
1878                                let mut width = 0.0;
1879                                let mut custom_width = false;
1880                                let mut hidden = false;
1881                                let mut best_fit = false;
1882
1883                                for attr in col_e.attributes() {
1884                                    let attr = attr.map_err(XlsxError::XmlAttr)?;
1885                                    match attr.key.as_ref() {
1886                                        b"min" => {
1887                                            if let Ok(min_str) = xml.decoder().decode(&attr.value) {
1888                                                if let Ok(min_col) = min_str.parse::<u32>() {
1889                                                    col_info = Some(min_col - 1);
1890                                                    // Convert to 0-based
1891                                                }
1892                                            }
1893                                        }
1894                                        b"width" => {
1895                                            if let Ok(width_str) = xml.decoder().decode(&attr.value)
1896                                            {
1897                                                if let Ok(w) = width_str.parse::<f64>() {
1898                                                    width = w;
1899                                                }
1900                                            }
1901                                        }
1902                                        b"customWidth" => {
1903                                            custom_width = attr.value.as_ref() != b"0";
1904                                        }
1905                                        b"hidden" => {
1906                                            hidden = attr.value.as_ref() != b"0";
1907                                        }
1908                                        b"bestFit" => {
1909                                            best_fit = attr.value.as_ref() != b"0";
1910                                        }
1911                                        _ => {}
1912                                    }
1913                                }
1914
1915                                if let Some(col) = col_info {
1916                                    let column_width = ColumnWidth::new(col, width)
1917                                        .with_custom_width(custom_width)
1918                                        .with_hidden(hidden)
1919                                        .with_best_fit(best_fit);
1920                                    layout = layout.add_column_width(column_width);
1921                                }
1922                            }
1923                            Ok(Event::End(ref end_e)) if end_e.local_name().as_ref() == b"cols" => {
1924                                break;
1925                            }
1926                            Ok(Event::Eof) => return Err(XlsxError::XmlEof("cols")),
1927                            Err(e) => return Err(XlsxError::Xml(e)),
1928                            _ => {}
1929                        }
1930                    }
1931                }
1932                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"sheetData" => {
1933                    // Parse row definitions
1934                    loop {
1935                        buf.clear();
1936                        match xml.read_event_into(&mut buf) {
1937                            Ok(Event::Start(ref row_e))
1938                                if row_e.local_name().as_ref() == b"row" =>
1939                            {
1940                                let mut row_num = None;
1941                                let mut height = 0.0;
1942                                let mut custom_height = false;
1943                                let mut hidden = false;
1944                                let mut thick_top = false;
1945                                let mut thick_bottom = false;
1946
1947                                for attr in row_e.attributes() {
1948                                    let attr = attr.map_err(XlsxError::XmlAttr)?;
1949                                    match attr.key.as_ref() {
1950                                        b"r" => {
1951                                            if let Ok(row_str) = xml.decoder().decode(&attr.value) {
1952                                                if let Ok(r) = row_str.parse::<u32>() {
1953                                                    row_num = Some(r - 1); // Convert to 0-based
1954                                                }
1955                                            }
1956                                        }
1957                                        b"ht" => {
1958                                            if let Ok(height_str) =
1959                                                xml.decoder().decode(&attr.value)
1960                                            {
1961                                                if let Ok(h) = height_str.parse::<f64>() {
1962                                                    height = h;
1963                                                }
1964                                            }
1965                                        }
1966                                        b"customHeight" => {
1967                                            custom_height = attr.value.as_ref() != b"0";
1968                                        }
1969                                        b"hidden" => {
1970                                            hidden = attr.value.as_ref() != b"0";
1971                                        }
1972                                        b"thickTop" => {
1973                                            thick_top = attr.value.as_ref() != b"0";
1974                                        }
1975                                        b"thickBot" => {
1976                                            thick_bottom = attr.value.as_ref() != b"0";
1977                                        }
1978                                        _ => {}
1979                                    }
1980                                }
1981
1982                                // Only add row height if it's custom or has special properties
1983                                if let Some(row) = row_num {
1984                                    if custom_height
1985                                        || hidden
1986                                        || thick_top
1987                                        || thick_bottom
1988                                        || height > 0.0
1989                                    {
1990                                        let row_height = RowHeight::new(row, height)
1991                                            .with_custom_height(custom_height)
1992                                            .with_hidden(hidden)
1993                                            .with_thick_top(thick_top)
1994                                            .with_thick_bottom(thick_bottom);
1995                                        layout = layout.add_row_height(row_height);
1996                                    }
1997                                }
1998
1999                                // Skip to the end of this row element
2000                                xml.read_to_end_into(row_e.name(), &mut Vec::new())?;
2001                            }
2002                            Ok(Event::End(ref end_e))
2003                                if end_e.local_name().as_ref() == b"sheetData" =>
2004                            {
2005                                break;
2006                            }
2007                            Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
2008                            Err(e) => return Err(XlsxError::Xml(e)),
2009                            _ => {}
2010                        }
2011                    }
2012                    break; // We're done after processing sheetData
2013                }
2014                Ok(Event::Eof) => break,
2015                Err(e) => return Err(XlsxError::Xml(e)),
2016                _ => {}
2017            }
2018        }
2019
2020        Ok(layout)
2021    }
2022
2023    /// Get all worksheets in the workbook.
2024    ///
2025    /// This function returns a vector of tuples, where each tuple contains the name of a worksheet and the range of cells in the worksheet.
2026    ///
2027    /// # Returns
2028    ///
2029    /// A vector of tuples, where each tuple contains the name of a worksheet and the range of cells in the worksheet.
2030    ///
2031    pub fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
2032        let names = self
2033            .sheets
2034            .iter()
2035            .map(|(n, _)| n.clone())
2036            .collect::<Vec<_>>();
2037        names
2038            .into_iter()
2039            .filter_map(|n| {
2040                let rge = self.worksheet_range(&n).ok()?;
2041                Some((n, rge))
2042            })
2043            .collect()
2044    }
2045
2046    #[cfg(feature = "picture")]
2047    fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
2048        self.pictures.to_owned()
2049    }
2050}
2051
2052struct TableMetadata {
2053    name: String,
2054    sheet_name: String,
2055    columns: Vec<String>,
2056    dimensions: Dimensions,
2057}
2058
2059struct InnerTableMetadata {
2060    display_name: String,
2061    ref_cells: String,
2062    header_row_count: u32,
2063    totals_row_count: u32,
2064}
2065
2066impl InnerTableMetadata {
2067    fn new() -> Self {
2068        Self {
2069            display_name: String::new(),
2070            ref_cells: String::new(),
2071            header_row_count: 1,
2072            totals_row_count: 0,
2073        }
2074    }
2075}
2076
2077impl<RS: Read + Seek> Reader<RS> for Xlsx<RS> {
2078    type Error = XlsxError;
2079
2080    fn new(mut reader: RS) -> Result<Self, XlsxError> {
2081        check_for_password_protected(&mut reader)?;
2082
2083        let mut xlsx = Xlsx {
2084            zip: ZipArchive::new(reader)?,
2085            strings: Vec::new(),
2086            formats: Vec::new(),
2087            styles: Vec::new(),
2088            is_1904: false,
2089            sheets: Vec::new(),
2090            tables: None,
2091            metadata: Metadata::default(),
2092            #[cfg(feature = "picture")]
2093            pictures: None,
2094            merged_regions: None,
2095            options: XlsxOptions::default(),
2096        };
2097        xlsx.read_shared_strings()?;
2098        xlsx.read_styles()?;
2099        let relationships = xlsx.read_relationships()?;
2100        xlsx.read_workbook(&relationships)?;
2101        #[cfg(feature = "picture")]
2102        xlsx.read_pictures()?;
2103
2104        Ok(xlsx)
2105    }
2106
2107    fn with_header_row(&mut self, header_row: HeaderRow) -> &mut Self {
2108        self.options.header_row = header_row;
2109        self
2110    }
2111
2112    fn vba_project(&mut self) -> Result<Option<VbaProject>, XlsxError> {
2113        let Some(mut f) = self.zip.by_name("xl/vbaProject.bin").ok() else {
2114            return Ok(None);
2115        };
2116        let len = f.size() as usize;
2117        let vba = VbaProject::new(&mut f, len)?;
2118        Ok(Some(vba))
2119    }
2120
2121    fn metadata(&self) -> &Metadata {
2122        &self.metadata
2123    }
2124
2125    fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, XlsxError> {
2126        let rge = self.worksheet_range_ref(name)?;
2127        let inner = rge.inner.into_iter().map(|v| v.into()).collect();
2128        Ok(Range {
2129            start: rge.start,
2130            end: rge.end,
2131            inner,
2132        })
2133    }
2134
2135    fn worksheet_formula(&mut self, name: &str) -> Result<Range<String>, XlsxError> {
2136        let mut cell_reader = match self.worksheet_cells_reader(name) {
2137            Ok(reader) => reader,
2138            Err(XlsxError::NotAWorksheet(typ)) => {
2139                warn!("'{typ}' not a worksheet");
2140                return Ok(Range::default());
2141            }
2142            Err(e) => return Err(e),
2143        };
2144        let len = cell_reader.dimensions().len();
2145        let mut cells = Vec::new();
2146        if len < 100_000 {
2147            cells.reserve(len as usize);
2148        }
2149        while let Some(cell) = cell_reader.next_formula()? {
2150            if !cell.val.is_empty() {
2151                cells.push(cell);
2152            }
2153        }
2154        Ok(Range::from_sparse(cells))
2155    }
2156
2157    fn worksheet_style(&mut self, name: &str) -> Result<StyleRange, XlsxError> {
2158        Xlsx::worksheet_style(self, name)
2159    }
2160
2161    fn worksheet_layout(&mut self, name: &str) -> Result<WorksheetLayout, XlsxError> {
2162        let (_, path) = self
2163            .sheets
2164            .iter()
2165            .find(|&(n, _)| n == name)
2166            .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))?;
2167
2168        let mut xml = xml_reader(&mut self.zip, path)
2169            .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))??;
2170
2171        let mut layout = WorksheetLayout::new();
2172        let mut buf = Vec::with_capacity(1024);
2173
2174        loop {
2175            buf.clear();
2176            match xml.read_event_into(&mut buf) {
2177                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"sheetFormatPr" => {
2178                    // Parse default column width and row height
2179                    for attr in e.attributes() {
2180                        let attr = attr.map_err(XlsxError::XmlAttr)?;
2181                        match attr.key.as_ref() {
2182                            b"defaultColWidth" => {
2183                                if let Ok(width_str) = xml.decoder().decode(&attr.value) {
2184                                    if let Ok(width) = width_str.parse::<f64>() {
2185                                        layout = layout.with_default_column_width(width);
2186                                    }
2187                                }
2188                            }
2189                            b"defaultRowHeight" => {
2190                                if let Ok(height_str) = xml.decoder().decode(&attr.value) {
2191                                    if let Ok(height) = height_str.parse::<f64>() {
2192                                        layout = layout.with_default_row_height(height);
2193                                    }
2194                                }
2195                            }
2196                            _ => {}
2197                        }
2198                    }
2199                }
2200                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"cols" => {
2201                    // Parse column definitions
2202                    loop {
2203                        buf.clear();
2204                        match xml.read_event_into(&mut buf) {
2205                            Ok(Event::Start(ref col_e))
2206                                if col_e.local_name().as_ref() == b"col" =>
2207                            {
2208                                let mut col_info = None;
2209                                let mut width = 0.0;
2210                                let mut custom_width = false;
2211                                let mut hidden = false;
2212                                let mut best_fit = false;
2213
2214                                for attr in col_e.attributes() {
2215                                    let attr = attr.map_err(XlsxError::XmlAttr)?;
2216                                    match attr.key.as_ref() {
2217                                        b"min" => {
2218                                            if let Ok(min_str) = xml.decoder().decode(&attr.value) {
2219                                                if let Ok(min_col) = min_str.parse::<u32>() {
2220                                                    col_info = Some(min_col - 1);
2221                                                    // Convert to 0-based
2222                                                }
2223                                            }
2224                                        }
2225                                        b"width" => {
2226                                            if let Ok(width_str) = xml.decoder().decode(&attr.value)
2227                                            {
2228                                                if let Ok(w) = width_str.parse::<f64>() {
2229                                                    width = w;
2230                                                }
2231                                            }
2232                                        }
2233                                        b"customWidth" => {
2234                                            custom_width = attr.value.as_ref() != b"0";
2235                                        }
2236                                        b"hidden" => {
2237                                            hidden = attr.value.as_ref() != b"0";
2238                                        }
2239                                        b"bestFit" => {
2240                                            best_fit = attr.value.as_ref() != b"0";
2241                                        }
2242                                        _ => {}
2243                                    }
2244                                }
2245
2246                                if let Some(col) = col_info {
2247                                    let column_width = ColumnWidth::new(col, width)
2248                                        .with_custom_width(custom_width)
2249                                        .with_hidden(hidden)
2250                                        .with_best_fit(best_fit);
2251                                    layout = layout.add_column_width(column_width);
2252                                }
2253                            }
2254                            Ok(Event::End(ref end_e)) if end_e.local_name().as_ref() == b"cols" => {
2255                                break;
2256                            }
2257                            Ok(Event::Eof) => return Err(XlsxError::XmlEof("cols")),
2258                            Err(e) => return Err(XlsxError::Xml(e)),
2259                            _ => {}
2260                        }
2261                    }
2262                }
2263                Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"sheetData" => {
2264                    // Parse row definitions
2265                    loop {
2266                        buf.clear();
2267                        match xml.read_event_into(&mut buf) {
2268                            Ok(Event::Start(ref row_e))
2269                                if row_e.local_name().as_ref() == b"row" =>
2270                            {
2271                                let mut row_num = None;
2272                                let mut height = 0.0;
2273                                let mut custom_height = false;
2274                                let mut hidden = false;
2275                                let mut thick_top = false;
2276                                let mut thick_bottom = false;
2277
2278                                for attr in row_e.attributes() {
2279                                    let attr = attr.map_err(XlsxError::XmlAttr)?;
2280                                    match attr.key.as_ref() {
2281                                        b"r" => {
2282                                            if let Ok(row_str) = xml.decoder().decode(&attr.value) {
2283                                                if let Ok(r) = row_str.parse::<u32>() {
2284                                                    row_num = Some(r - 1); // Convert to 0-based
2285                                                }
2286                                            }
2287                                        }
2288                                        b"ht" => {
2289                                            if let Ok(height_str) =
2290                                                xml.decoder().decode(&attr.value)
2291                                            {
2292                                                if let Ok(h) = height_str.parse::<f64>() {
2293                                                    height = h;
2294                                                }
2295                                            }
2296                                        }
2297                                        b"customHeight" => {
2298                                            custom_height = attr.value.as_ref() != b"0";
2299                                        }
2300                                        b"hidden" => {
2301                                            hidden = attr.value.as_ref() != b"0";
2302                                        }
2303                                        b"thickTop" => {
2304                                            thick_top = attr.value.as_ref() != b"0";
2305                                        }
2306                                        b"thickBot" => {
2307                                            thick_bottom = attr.value.as_ref() != b"0";
2308                                        }
2309                                        _ => {}
2310                                    }
2311                                }
2312
2313                                // Only add row height if it's custom or has special properties
2314                                if let Some(row) = row_num {
2315                                    if custom_height
2316                                        || hidden
2317                                        || thick_top
2318                                        || thick_bottom
2319                                        || height > 0.0
2320                                    {
2321                                        let row_height = RowHeight::new(row, height)
2322                                            .with_custom_height(custom_height)
2323                                            .with_hidden(hidden)
2324                                            .with_thick_top(thick_top)
2325                                            .with_thick_bottom(thick_bottom);
2326                                        layout = layout.add_row_height(row_height);
2327                                    }
2328                                }
2329
2330                                // Skip to the end of this row element
2331                                xml.read_to_end_into(row_e.name(), &mut Vec::new())?;
2332                            }
2333                            Ok(Event::End(ref end_e))
2334                                if end_e.local_name().as_ref() == b"sheetData" =>
2335                            {
2336                                break;
2337                            }
2338                            Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
2339                            Err(e) => return Err(XlsxError::Xml(e)),
2340                            _ => {}
2341                        }
2342                    }
2343                    break; // We're done after processing sheetData
2344                }
2345                Ok(Event::Eof) => break,
2346                Err(e) => return Err(XlsxError::Xml(e)),
2347                _ => {}
2348            }
2349        }
2350
2351        Ok(layout)
2352    }
2353
2354    fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
2355        let names = self
2356            .sheets
2357            .iter()
2358            .map(|(n, _)| n.clone())
2359            .collect::<Vec<_>>();
2360        names
2361            .into_iter()
2362            .filter_map(|n| {
2363                let rge = self.worksheet_range(&n).ok()?;
2364                Some((n, rge))
2365            })
2366            .collect()
2367    }
2368
2369    #[cfg(feature = "picture")]
2370    fn pictures(&self) -> Option<Vec<(String, Vec<u8>)>> {
2371        self.pictures.to_owned()
2372    }
2373}
2374
2375impl<RS: Read + Seek> ReaderRef<RS> for Xlsx<RS> {
2376    fn worksheet_range_ref<'a>(&'a mut self, name: &str) -> Result<Range<DataRef<'a>>, XlsxError> {
2377        let header_row = self.options.header_row;
2378        let mut cell_reader = match self.worksheet_cells_reader(name) {
2379            Ok(reader) => reader,
2380            Err(XlsxError::NotAWorksheet(typ)) => {
2381                log::warn!("'{typ}' not a valid worksheet");
2382                return Ok(Range::default());
2383            }
2384            Err(e) => return Err(e),
2385        };
2386        let len = cell_reader.dimensions().len();
2387        let mut cells = Vec::new();
2388        if len < 100_000 {
2389            cells.reserve(len as usize);
2390        }
2391
2392        match header_row {
2393            HeaderRow::FirstNonEmptyRow => {
2394                // the header row is the row of the first non-empty cell
2395                loop {
2396                    match cell_reader.next_cell() {
2397                        Ok(Some(Cell {
2398                            val: DataRef::Empty,
2399                            ..
2400                        })) => (),
2401                        Ok(Some(cell)) => cells.push(cell),
2402                        Ok(None) => break,
2403                        Err(e) => return Err(e),
2404                    }
2405                }
2406            }
2407            HeaderRow::Row(header_row_idx) => {
2408                // If `header_row` is a row index, we only add non-empty cells after this index.
2409                loop {
2410                    match cell_reader.next_cell() {
2411                        Ok(Some(Cell {
2412                            val: DataRef::Empty,
2413                            ..
2414                        })) => (),
2415                        Ok(Some(cell)) => {
2416                            if cell.pos.0 >= header_row_idx {
2417                                cells.push(cell);
2418                            }
2419                        }
2420                        Ok(None) => break,
2421                        Err(e) => return Err(e),
2422                    }
2423                }
2424
2425                // If `header_row` is set and the first non-empty cell is not at the `header_row`, we add
2426                // an empty cell at the beginning with row `header_row` and same column as the first non-empty cell.
2427                if cells.first().is_some_and(|c| c.pos.0 != header_row_idx) {
2428                    cells.insert(
2429                        0,
2430                        Cell {
2431                            pos: (
2432                                header_row_idx,
2433                                cells.first().expect("cells should not be empty").pos.1,
2434                            ),
2435                            val: DataRef::Empty,
2436                            style: None,
2437                        },
2438                    );
2439                }
2440            }
2441        }
2442
2443        Ok(Range::from_sparse(cells))
2444    }
2445}
2446
2447fn xml_reader<'a, RS: Read + Seek>(
2448    zip: &'a mut ZipArchive<RS>,
2449    path: &str,
2450) -> Option<Result<XlReader<'a, RS>, XlsxError>> {
2451    let zip_path = path_to_zip_path(zip, path);
2452
2453    match zip.by_name(&zip_path) {
2454        Ok(f) => {
2455            let mut r = XmlReader::from_reader(BufReader::new(f));
2456            let config = r.config_mut();
2457            config.check_end_names = false;
2458            config.trim_text(false);
2459            config.check_comments = false;
2460            config.expand_empty_elements = true;
2461            Some(Ok(r))
2462        }
2463        Err(ZipError::FileNotFound) => None,
2464        Err(e) => Some(Err(e.into())),
2465    }
2466}
2467
2468/// search through an Element's attributes for the named one
2469pub(crate) fn get_attribute<'a>(
2470    atts: Attributes<'a>,
2471    n: QName,
2472) -> Result<Option<&'a [u8]>, XlsxError> {
2473    for a in atts {
2474        match a {
2475            Ok(Attribute {
2476                key,
2477                value: Cow::Borrowed(value),
2478            }) if key == n => return Ok(Some(value)),
2479            Err(e) => return Err(XlsxError::XmlAttr(e)),
2480            _ => {} // ignore other attributes
2481        }
2482    }
2483    Ok(None)
2484}
2485
2486/// converts a text representation (e.g. "A6:G67") of a dimension into integers
2487/// - top left (row, column),
2488/// - bottom right (row, column)
2489pub(crate) fn get_dimension(dimension: &[u8]) -> Result<Dimensions, XlsxError> {
2490    let parts: Vec<_> = dimension
2491        .split(|c| *c == b':')
2492        .map(get_row_column)
2493        .collect::<Result<Vec<_>, XlsxError>>()?;
2494
2495    match parts.len() {
2496        0 => Err(XlsxError::DimensionCount(0)),
2497        1 => Ok(Dimensions {
2498            start: parts[0],
2499            end: parts[0],
2500        }),
2501        2 => {
2502            let rows = parts[1].0 - parts[0].0;
2503            let columns = parts[1].1 - parts[0].1;
2504            if rows > MAX_ROWS {
2505                warn!("xlsx has more than maximum number of rows ({rows} > {MAX_ROWS})");
2506            }
2507            if columns > MAX_COLUMNS {
2508                warn!("xlsx has more than maximum number of columns ({columns} > {MAX_COLUMNS})");
2509            }
2510            Ok(Dimensions {
2511                start: parts[0],
2512                end: parts[1],
2513            })
2514        }
2515        len => Err(XlsxError::DimensionCount(len)),
2516    }
2517}
2518
2519/// Converts a text range name into its position (row, column) (0 based index).
2520/// If the row or column component in the range is missing, an Error is returned.
2521pub(crate) fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> {
2522    let (row, col) = get_row_and_optional_column(range)?;
2523    let col = col.ok_or(XlsxError::RangeWithoutColumnComponent)?;
2524    Ok((row, col))
2525}
2526
2527/// Converts a text row name into its position (0 based index).
2528/// If the row component in the range is missing, an Error is returned.
2529/// If the text row name also contains a column component, it is ignored.
2530pub(crate) fn get_row(range: &[u8]) -> Result<u32, XlsxError> {
2531    get_row_and_optional_column(range).map(|(row, _)| row)
2532}
2533
2534/// Converts a text range name into its position (row, column) (0 based index).
2535/// If the row component in the range is missing, an Error is returned.
2536/// If the column component in the range is missing, an None is returned for the column.
2537fn get_row_and_optional_column(range: &[u8]) -> Result<(u32, Option<u32>), XlsxError> {
2538    let (mut row, mut col) = (0, 0);
2539    let mut pow = 1;
2540    let mut readrow = true;
2541    for c in range.iter().rev() {
2542        match *c {
2543            c @ b'0'..=b'9' => {
2544                if readrow {
2545                    row += ((c - b'0') as u32) * pow;
2546                    pow *= 10;
2547                } else {
2548                    return Err(XlsxError::NumericColumn(c));
2549                }
2550            }
2551            c @ b'A'..=b'Z' => {
2552                if readrow {
2553                    if row == 0 {
2554                        return Err(XlsxError::RangeWithoutRowComponent);
2555                    }
2556                    pow = 1;
2557                    readrow = false;
2558                }
2559                col += ((c - b'A') as u32 + 1) * pow;
2560                pow *= 26;
2561            }
2562            c @ b'a'..=b'z' => {
2563                if readrow {
2564                    if row == 0 {
2565                        return Err(XlsxError::RangeWithoutRowComponent);
2566                    }
2567                    pow = 1;
2568                    readrow = false;
2569                }
2570                col += ((c - b'a') as u32 + 1) * pow;
2571                pow *= 26;
2572            }
2573            _ => return Err(XlsxError::Alphanumeric(*c)),
2574        }
2575    }
2576    let row = row
2577        .checked_sub(1)
2578        .ok_or(XlsxError::RangeWithoutRowComponent)?;
2579    Ok((row, col.checked_sub(1)))
2580}
2581
2582/// attempts to read either a simple or richtext string
2583pub(crate) fn read_string<RS>(
2584    xml: &mut XlReader<'_, RS>,
2585    closing: QName,
2586) -> Result<Option<String>, XlsxError>
2587where
2588    RS: Read + Seek,
2589{
2590    let mut buf = Vec::with_capacity(1024);
2591    let mut val_buf = Vec::with_capacity(1024);
2592    let mut rich_buffer: Option<String> = None;
2593    let mut is_phonetic_text = false;
2594    loop {
2595        buf.clear();
2596        match xml.read_event_into(&mut buf) {
2597            Ok(Event::Start(e)) if e.local_name().as_ref() == b"r" => {
2598                if rich_buffer.is_none() {
2599                    // use a buffer since richtext has multiples <r> and <t> for the same cell
2600                    rich_buffer = Some(String::new());
2601                }
2602            }
2603            Ok(Event::Start(e)) if e.local_name().as_ref() == b"rPh" => {
2604                is_phonetic_text = true;
2605            }
2606            Ok(Event::End(e)) if e.name() == closing => {
2607                if rich_buffer.is_none() {
2608                    // An empty <s></s> element, without <t> or other
2609                    // subelements, is treated as a valid empty string in Excel.
2610                    rich_buffer = Some(String::new());
2611                }
2612
2613                return Ok(rich_buffer);
2614            }
2615            Ok(Event::End(e)) if e.local_name().as_ref() == b"rPh" => {
2616                is_phonetic_text = false;
2617            }
2618            Ok(Event::Start(e)) if e.local_name().as_ref() == b"t" && !is_phonetic_text => {
2619                val_buf.clear();
2620                let mut value = String::new();
2621                loop {
2622                    match xml.read_event_into(&mut val_buf)? {
2623                        Event::Text(t) => value.push_str(&unescape_xml(&t.xml10_content()?)),
2624                        Event::GeneralRef(e) => unescape_entity_to_buffer(&e, &mut value)?,
2625                        Event::End(end) if end.name() == e.name() => break,
2626                        Event::Eof => return Err(XlsxError::XmlEof("t")),
2627                        _ => (),
2628                    }
2629                }
2630                if let Some(s) = &mut rich_buffer {
2631                    s.push_str(&value);
2632                } else {
2633                    // consume any remaining events up to expected closing tag
2634                    xml.read_to_end_into(closing, &mut val_buf)?;
2635                    return Ok(Some(value));
2636                }
2637            }
2638            Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
2639            Err(e) => return Err(XlsxError::Xml(e)),
2640            _ => (),
2641        }
2642    }
2643}
2644
2645/// Result of parsing run properties - includes both the font and whether there's "rich" formatting
2646struct RunPropertiesResult {
2647    font: Font,
2648    /// Whether the font has "rich" formatting (bold, italic, underline, strikethrough, color)
2649    /// that would make this text run distinct from plain text
2650    has_rich_formatting: bool,
2651}
2652
2653/// Parse run properties (rPr) to extract font formatting for rich text
2654fn parse_run_properties<RS>(
2655    xml: &mut XlReader<'_, RS>,
2656    closing: QName,
2657) -> Result<Option<RunPropertiesResult>, XlsxError>
2658where
2659    RS: Read + Seek,
2660{
2661    let mut font = Font::new();
2662    let mut has_any_props = false;
2663    let mut has_rich_formatting = false;
2664    let mut buf = Vec::with_capacity(256);
2665
2666    loop {
2667        buf.clear();
2668        match xml.read_event_into(&mut buf) {
2669            Ok(Event::Start(e) | Event::Empty(e)) => {
2670                match e.local_name().as_ref() {
2671                    b"b" => {
2672                        // Bold - check for val attribute
2673                        let mut is_bold = true;
2674                        for attr in e.attributes().flatten() {
2675                            if attr.key.as_ref() == b"val" {
2676                                let val = String::from_utf8_lossy(&attr.value);
2677                                is_bold = val != "0" && val != "false";
2678                            }
2679                        }
2680                        if is_bold {
2681                            font = font.with_weight(FontWeight::Bold);
2682                            has_any_props = true;
2683                            has_rich_formatting = true;
2684                        }
2685                    }
2686                    b"i" => {
2687                        // Italic - check for val attribute
2688                        let mut is_italic = true;
2689                        for attr in e.attributes().flatten() {
2690                            if attr.key.as_ref() == b"val" {
2691                                let val = String::from_utf8_lossy(&attr.value);
2692                                is_italic = val != "0" && val != "false";
2693                            }
2694                        }
2695                        if is_italic {
2696                            font = font.with_style(FontStyle::Italic);
2697                            has_any_props = true;
2698                            has_rich_formatting = true;
2699                        }
2700                    }
2701                    b"u" => {
2702                        // Underline
2703                        let mut underline = UnderlineStyle::Single;
2704                        for attr in e.attributes().flatten() {
2705                            if attr.key.as_ref() == b"val" {
2706                                let val = String::from_utf8_lossy(&attr.value);
2707                                underline = match val.as_ref() {
2708                                    "double" => UnderlineStyle::Double,
2709                                    "singleAccounting" => UnderlineStyle::SingleAccounting,
2710                                    "doubleAccounting" => UnderlineStyle::DoubleAccounting,
2711                                    "none" => UnderlineStyle::None,
2712                                    _ => UnderlineStyle::Single,
2713                                };
2714                            }
2715                        }
2716                        if underline != UnderlineStyle::None {
2717                            font = font.with_underline(underline);
2718                            has_any_props = true;
2719                            has_rich_formatting = true;
2720                        }
2721                    }
2722                    b"strike" => {
2723                        // Strikethrough
2724                        let mut is_strike = true;
2725                        for attr in e.attributes().flatten() {
2726                            if attr.key.as_ref() == b"val" {
2727                                let val = String::from_utf8_lossy(&attr.value);
2728                                is_strike = val != "0" && val != "false";
2729                            }
2730                        }
2731                        if is_strike {
2732                            font = font.with_strikethrough(true);
2733                            has_any_props = true;
2734                            has_rich_formatting = true;
2735                        }
2736                    }
2737                    b"sz" => {
2738                        // Font size - captured but doesn't count as "rich" formatting
2739                        for attr in e.attributes().flatten() {
2740                            if attr.key.as_ref() == b"val" {
2741                                if let Ok(size) =
2742                                    String::from_utf8_lossy(&attr.value).parse::<f64>()
2743                                {
2744                                    font = font.with_size(size);
2745                                    has_any_props = true;
2746                                }
2747                            }
2748                        }
2749                    }
2750                    b"color" => {
2751                        // Font color - counts as rich formatting
2752                        if let Some(color) = parse_run_color(&e)? {
2753                            font = font.with_color(color);
2754                            has_any_props = true;
2755                            has_rich_formatting = true;
2756                        }
2757                    }
2758                    b"rFont" => {
2759                        // Font name - captured but doesn't count as "rich" formatting
2760                        for attr in e.attributes().flatten() {
2761                            if attr.key.as_ref() == b"val" {
2762                                font = font
2763                                    .with_name(String::from_utf8_lossy(&attr.value).to_string());
2764                                has_any_props = true;
2765                            }
2766                        }
2767                    }
2768                    b"family" => {
2769                        // Font family - captured but doesn't count as "rich" formatting
2770                        for attr in e.attributes().flatten() {
2771                            if attr.key.as_ref() == b"val" {
2772                                font = font
2773                                    .with_family(String::from_utf8_lossy(&attr.value).to_string());
2774                                has_any_props = true;
2775                            }
2776                        }
2777                    }
2778                    _ => {}
2779                }
2780            }
2781            Ok(Event::End(e)) if e.name() == closing => break,
2782            Ok(Event::Eof) => return Err(XlsxError::XmlEof("rPr")),
2783            Err(e) => return Err(XlsxError::Xml(e)),
2784            _ => {}
2785        }
2786    }
2787
2788    Ok(if has_any_props {
2789        Some(RunPropertiesResult {
2790            font,
2791            has_rich_formatting,
2792        })
2793    } else {
2794        None
2795    })
2796}
2797
2798/// Parse color from a run properties color element
2799fn parse_run_color(
2800    e: &quick_xml::events::BytesStart<'_>,
2801) -> Result<Option<crate::Color>, XlsxError> {
2802    use crate::Color;
2803
2804    for attr in e.attributes().flatten() {
2805        match attr.key.as_ref() {
2806            b"rgb" => {
2807                let rgb_str = attr.value.as_ref();
2808                if rgb_str.len() == 6 {
2809                    let r = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[0..2]), 16)
2810                        .map_err(|_| XlsxError::Unexpected("Invalid red color value"))?;
2811                    let g = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[2..4]), 16)
2812                        .map_err(|_| XlsxError::Unexpected("Invalid green color value"))?;
2813                    let b = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[4..6]), 16)
2814                        .map_err(|_| XlsxError::Unexpected("Invalid blue color value"))?;
2815                    return Ok(Some(Color::rgb(r, g, b)));
2816                } else if rgb_str.len() == 8 {
2817                    let a = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[0..2]), 16)
2818                        .map_err(|_| XlsxError::Unexpected("Invalid alpha color value"))?;
2819                    let r = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[2..4]), 16)
2820                        .map_err(|_| XlsxError::Unexpected("Invalid red color value"))?;
2821                    let g = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[4..6]), 16)
2822                        .map_err(|_| XlsxError::Unexpected("Invalid green color value"))?;
2823                    let b = u8::from_str_radix(&String::from_utf8_lossy(&rgb_str[6..8]), 16)
2824                        .map_err(|_| XlsxError::Unexpected("Invalid blue color value"))?;
2825                    return Ok(Some(Color::new(a, r, g, b)));
2826                }
2827            }
2828            b"theme" => {
2829                // Theme colors - using a simplified palette
2830                let theme_str = String::from_utf8_lossy(&attr.value);
2831                if let Ok(theme) = theme_str.parse::<u8>() {
2832                    let color = match theme {
2833                        0 => Color::rgb(255, 255, 255), // Light 1
2834                        1 => Color::rgb(0, 0, 0),       // Dark 1
2835                        2 => Color::rgb(68, 84, 106),   // Light 2
2836                        3 => Color::rgb(31, 73, 125),   // Dark 2
2837                        4 => Color::rgb(79, 129, 189),  // Accent 1
2838                        5 => Color::rgb(192, 80, 77),   // Accent 2
2839                        6 => Color::rgb(155, 187, 89),  // Accent 3
2840                        7 => Color::rgb(128, 100, 162), // Accent 4
2841                        8 => Color::rgb(75, 172, 198),  // Accent 5
2842                        9 => Color::rgb(247, 150, 70),  // Accent 6
2843                        _ => Color::rgb(0, 0, 0),
2844                    };
2845                    return Ok(Some(color));
2846                }
2847            }
2848            b"indexed" => {
2849                // Indexed colors
2850                let idx_str = String::from_utf8_lossy(&attr.value);
2851                if let Ok(idx) = idx_str.parse::<u8>() {
2852                    let color = match idx {
2853                        1 => Color::rgb(0, 0, 0),
2854                        2 => Color::rgb(255, 255, 255),
2855                        3 => Color::rgb(255, 0, 0),
2856                        4 => Color::rgb(0, 255, 0),
2857                        5 => Color::rgb(0, 0, 255),
2858                        6 => Color::rgb(255, 255, 0),
2859                        7 => Color::rgb(255, 0, 255),
2860                        8 => Color::rgb(0, 255, 255),
2861                        _ => Color::rgb(0, 0, 0),
2862                    };
2863                    return Ok(Some(color));
2864                }
2865            }
2866            _ => {}
2867        }
2868    }
2869    Ok(None)
2870}
2871
2872/// Read a string from shared strings, preserving rich text formatting if present.
2873/// Returns Data::String for plain text or Data::RichText for formatted text.
2874fn read_rich_string<RS>(
2875    xml: &mut XlReader<'_, RS>,
2876    closing: QName,
2877) -> Result<Option<Data>, XlsxError>
2878where
2879    RS: Read + Seek,
2880{
2881    let mut buf = Vec::with_capacity(1024);
2882    let mut val_buf = Vec::with_capacity(1024);
2883    let mut runs: Vec<TextRun> = Vec::new();
2884    let mut current_props: Option<RunPropertiesResult> = None;
2885    let mut is_rich_text = false;
2886    let mut is_phonetic_text = false;
2887    let mut plain_text: Option<String> = None;
2888    let mut has_any_rich_formatting = false;
2889
2890    loop {
2891        buf.clear();
2892        match xml.read_event_into(&mut buf) {
2893            Ok(Event::Start(e)) if e.local_name().as_ref() == b"r" => {
2894                // Start of a rich text run
2895                is_rich_text = true;
2896                current_props = None;
2897            }
2898            Ok(Event::End(e)) if e.local_name().as_ref() == b"r" => {
2899                // End of a rich text run
2900                current_props = None;
2901            }
2902            Ok(Event::Start(e)) if e.local_name().as_ref() == b"rPr" => {
2903                // Run properties (formatting)
2904                current_props = parse_run_properties(xml, e.name())?;
2905                if let Some(ref props) = current_props {
2906                    if props.has_rich_formatting {
2907                        has_any_rich_formatting = true;
2908                    }
2909                }
2910            }
2911            Ok(Event::Start(e)) if e.local_name().as_ref() == b"rPh" => {
2912                is_phonetic_text = true;
2913            }
2914            Ok(Event::End(e)) if e.local_name().as_ref() == b"rPh" => {
2915                is_phonetic_text = false;
2916            }
2917            Ok(Event::End(e)) if e.name() == closing => {
2918                // End of string item
2919                if is_rich_text && !runs.is_empty() {
2920                    // Only return RichText if there's actual rich formatting (bold, italic, etc.)
2921                    if has_any_rich_formatting {
2922                        return Ok(Some(Data::RichText(RichText::from_runs(runs))));
2923                    } else {
2924                        // All runs are plain text or only have non-rich style info, concatenate them
2925                        let text: String = runs.iter().map(|r| r.text.as_str()).collect();
2926                        return Ok(Some(Data::String(text)));
2927                    }
2928                } else if let Some(text) = plain_text {
2929                    return Ok(Some(Data::String(text)));
2930                } else if is_rich_text {
2931                    // Rich text with no runs means empty string
2932                    return Ok(Some(Data::String(String::new())));
2933                } else {
2934                    // Empty element
2935                    return Ok(Some(Data::String(String::new())));
2936                }
2937            }
2938            Ok(Event::Start(e)) if e.local_name().as_ref() == b"t" && !is_phonetic_text => {
2939                // Text content
2940                val_buf.clear();
2941                let mut value = String::new();
2942                loop {
2943                    match xml.read_event_into(&mut val_buf)? {
2944                        Event::Text(t) => value.push_str(&unescape_xml(&t.xml10_content()?)),
2945                        Event::GeneralRef(e) => unescape_entity_to_buffer(&e, &mut value)?,
2946                        Event::End(end) if end.name() == e.name() => break,
2947                        Event::Eof => return Err(XlsxError::XmlEof("t")),
2948                        _ => (),
2949                    }
2950                }
2951
2952                if is_rich_text {
2953                    // Add as a run with optional formatting
2954                    // Only include font if there's rich formatting (bold, italic, etc.)
2955                    let font = current_props
2956                        .take()
2957                        .filter(|p| p.has_rich_formatting)
2958                        .map(|p| p.font);
2959                    runs.push(TextRun { text: value, font });
2960                } else {
2961                    // Plain text (not in a run)
2962                    if let Some(ref mut pt) = plain_text {
2963                        pt.push_str(&value);
2964                    } else {
2965                        plain_text = Some(value);
2966                    }
2967                    // Consume remaining and return
2968                    xml.read_to_end_into(closing, &mut val_buf)?;
2969                    return Ok(plain_text.map(Data::String));
2970                }
2971            }
2972            Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
2973            Err(e) => return Err(XlsxError::Xml(e)),
2974            _ => (),
2975        }
2976    }
2977}
2978
2979fn check_for_password_protected<RS: Read + Seek>(reader: &mut RS) -> Result<(), XlsxError> {
2980    let offset_end = reader.seek(std::io::SeekFrom::End(0))? as usize;
2981    reader.seek(std::io::SeekFrom::Start(0))?;
2982
2983    if let Ok(cfb) = crate::cfb::Cfb::new(reader, offset_end) {
2984        if cfb.has_directory("EncryptedPackage") {
2985            return Err(XlsxError::Password);
2986        }
2987    }
2988
2989    Ok(())
2990}
2991
2992fn read_merge_cells<RS>(xml: &mut XlReader<'_, RS>) -> Result<Vec<Dimensions>, XlsxError>
2993where
2994    RS: Read + Seek,
2995{
2996    let mut merge_cells = Vec::new();
2997
2998    loop {
2999        let mut buffer = Vec::new();
3000
3001        match xml.read_event_into(&mut buffer) {
3002            Ok(Event::Start(event)) if event.local_name().as_ref() == b"mergeCell" => {
3003                for attribute in event.attributes() {
3004                    let attribute = attribute?;
3005
3006                    if attribute.key == QName(b"ref") {
3007                        let dimensions = get_dimension(&attribute.value)?;
3008                        merge_cells.push(dimensions);
3009
3010                        break;
3011                    }
3012                }
3013            }
3014            Ok(Event::End(event)) if event.local_name().as_ref() == b"mergeCells" => {
3015                break;
3016            }
3017            Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
3018            Err(e) => return Err(XlsxError::Xml(e)),
3019            _ => (),
3020        }
3021    }
3022
3023    Ok(merge_cells)
3024}
3025
3026/// Advance the cell name by the offset
3027///
3028/// This function advances the cell name by the offset.
3029///
3030/// # Parameters
3031///
3032/// - `name`: The cell name to advance.
3033/// - `offset`: The offset to advance the cell name by.
3034///
3035/// # Returns
3036///
3037/// A vector of bytes representing the advanced cell name.
3038///
3039pub fn offset_cell_name(name: &[u8], offset: (i64, i64)) -> Result<Vec<u8>, XlsxError> {
3040    let reference = Reference::parse(name)?;
3041    let offset_ref = reference.offset(offset)?;
3042    let mut buf = Vec::new();
3043    offset_ref.format(&mut buf)?;
3044    Ok(buf)
3045}
3046
3047#[derive(Debug, Copy, Clone)]
3048enum Reference {
3049    Cell {
3050        row: u32,
3051        col: u32,
3052        absolute_row: bool,
3053        absolute_col: bool,
3054    },
3055    Row {
3056        row: u32,
3057        absolute: bool,
3058    },
3059    Column {
3060        col: u32,
3061        absolute: bool,
3062    },
3063}
3064
3065impl Reference {
3066    // Create a cell reference with validation.
3067    fn cell(row: u32, col: u32, absolute_row: bool, absolute_col: bool) -> Result<Self, XlsxError> {
3068        let reference = Reference::Cell {
3069            row,
3070            col,
3071            absolute_row,
3072            absolute_col,
3073        };
3074        reference.validate()?;
3075        Ok(reference)
3076    }
3077
3078    // Create a column reference with validation.
3079    fn column(col: u32, absolute: bool) -> Result<Self, XlsxError> {
3080        let reference = Reference::Column { col, absolute };
3081        reference.validate()?;
3082        Ok(reference)
3083    }
3084
3085    // Create a row reference with validation.
3086    fn row(row: u32, absolute: bool) -> Result<Self, XlsxError> {
3087        let reference = Reference::Row { row, absolute };
3088        reference.validate()?;
3089        Ok(reference)
3090    }
3091
3092    // Parse a reference (e.g., "A1", "$A1", "A$1", "$A$1", "E", "$E", "5", "$5").
3093    fn parse(name: &[u8]) -> Result<Self, XlsxError> {
3094        let mut iter = name.iter().peekable();
3095        let mut col: u32 = 0;
3096        let mut row: u32 = 0;
3097        let mut absolute_col = false;
3098        let mut absolute_row = false;
3099
3100        while let Some(&c) = iter.next() {
3101            match (c, iter.peek()) {
3102                (b'$', Some(b'A'..=b'Z' | b'a'..=b'z')) => {
3103                    if row > 0 || col > 0 {
3104                        return Err(XlsxError::Alphanumeric(c));
3105                    }
3106                    absolute_col = true;
3107                }
3108                (b'$', Some(b'0'..=b'9')) => {
3109                    if row > 0 {
3110                        return Err(XlsxError::Alphanumeric(c));
3111                    }
3112                    absolute_row = true;
3113                }
3114                (b'$', _) => return Err(XlsxError::Alphanumeric(c)),
3115                (c @ (b'A'..=b'Z' | b'a'..=b'z'), _) => {
3116                    if row > 0 {
3117                        return Err(XlsxError::Alphanumeric(c));
3118                    }
3119                    col = col
3120                        .wrapping_mul(26)
3121                        .wrapping_add((c.to_ascii_uppercase() - b'A') as u32 + 1);
3122                }
3123                (c @ b'0'..=b'9', _) => {
3124                    row = row.wrapping_mul(10).wrapping_add((c - b'0') as u32);
3125                }
3126                _ => return Err(XlsxError::Alphanumeric(c)),
3127            }
3128        }
3129
3130        match (col.checked_sub(1), row.checked_sub(1)) {
3131            (Some(col), Some(row)) => Reference::cell(row, col, absolute_row, absolute_col),
3132            (Some(col), None) => Reference::column(col, absolute_col),
3133            (None, Some(row)) => Reference::row(row, absolute_row),
3134            (None, None) => Err(XlsxError::Unexpected("Empty reference")),
3135        }
3136    }
3137
3138    // Apply offset to create a new reference with validation.
3139    fn offset(self, offset: (i64, i64)) -> Result<Self, XlsxError> {
3140        let result = match self {
3141            Reference::Cell {
3142                row,
3143                col,
3144                absolute_row,
3145                absolute_col,
3146            } => {
3147                let new_col = if absolute_col {
3148                    col
3149                } else {
3150                    (col as i64 + offset.1) as u32
3151                };
3152                let new_row = if absolute_row {
3153                    row
3154                } else {
3155                    (row as i64 + offset.0) as u32
3156                };
3157
3158                Reference::Cell {
3159                    row: new_row,
3160                    col: new_col,
3161                    absolute_row,
3162                    absolute_col,
3163                }
3164            }
3165            Reference::Column { col, absolute } => {
3166                let new_col = if absolute {
3167                    col
3168                } else {
3169                    (col as i64 + offset.1) as u32
3170                };
3171
3172                Reference::Column {
3173                    col: new_col,
3174                    absolute,
3175                }
3176            }
3177            Reference::Row { row, absolute } => {
3178                let new_row = if absolute {
3179                    row
3180                } else {
3181                    (row as i64 + offset.0) as u32
3182                };
3183
3184                Reference::Row {
3185                    row: new_row,
3186                    absolute,
3187                }
3188            }
3189        };
3190
3191        result.validate()?;
3192        Ok(result)
3193    }
3194
3195    // Validate that row/column values are in bounds.
3196    fn validate(&self) -> Result<(), XlsxError> {
3197        match self {
3198            Reference::Cell { row, col, .. } => {
3199                if *col >= MAX_COLUMNS {
3200                    return Err(XlsxError::ColumnNumberOverflow);
3201                }
3202                if *row >= MAX_ROWS {
3203                    return Err(XlsxError::RowNumberOverflow);
3204                }
3205                Ok(())
3206            }
3207            Reference::Column { col, .. } => {
3208                if *col >= MAX_COLUMNS {
3209                    return Err(XlsxError::ColumnNumberOverflow);
3210                }
3211                Ok(())
3212            }
3213            Reference::Row { row, .. } => {
3214                if *row >= MAX_ROWS {
3215                    return Err(XlsxError::RowNumberOverflow);
3216                }
3217                Ok(())
3218            }
3219        }
3220    }
3221
3222    // Format a reference to bytes.
3223    fn format(&self, buf: &mut Vec<u8>) -> Result<(), XlsxError> {
3224        match self {
3225            Reference::Cell {
3226                row,
3227                col,
3228                absolute_row,
3229                absolute_col,
3230            } => {
3231                if *absolute_col {
3232                    buf.push(b'$');
3233                }
3234                column_number_to_name(*col, buf)?;
3235                if *absolute_row {
3236                    buf.push(b'$');
3237                }
3238                buf.extend((row + 1).to_string().into_bytes());
3239                Ok(())
3240            }
3241            Reference::Column { col, absolute } => {
3242                if *absolute {
3243                    buf.push(b'$');
3244                }
3245                column_number_to_name(*col, buf)
3246            }
3247            Reference::Row { row, absolute } => {
3248                if *absolute {
3249                    buf.push(b'$');
3250                }
3251                buf.extend((row + 1).to_string().into_bytes());
3252                Ok(())
3253            }
3254        }
3255    }
3256}
3257
3258// Advance a reference by the offset (e.g., "A1", "E:F", "5:6", "A1:B5").
3259fn offset_range(range: &[u8], offset: (i64, i64), buf: &mut Vec<u8>) -> Result<(), XlsxError> {
3260    let colon_pos = range.iter().position(|&b| b == b':');
3261
3262    match colon_pos {
3263        None => {
3264            let reference = Reference::parse(range)?;
3265            if !matches!(reference, Reference::Cell { .. }) {
3266                return Err(XlsxError::Unexpected("Single reference type must be cell"));
3267            }
3268            let offset_ref = reference.offset(offset)?;
3269            offset_ref.format(buf)
3270        }
3271        Some(idx) => {
3272            let start = &range[..idx];
3273            let end = &range[idx + 1..];
3274
3275            let start_ref = Reference::parse(start)?;
3276            let end_ref = Reference::parse(end)?;
3277
3278            if std::mem::discriminant(&start_ref) != std::mem::discriminant(&end_ref) {
3279                return Err(XlsxError::Unexpected("Range type mismatch"));
3280            }
3281
3282            let start_offset = start_ref.offset(offset)?;
3283            let end_offset = end_ref.offset(offset)?;
3284
3285            start_offset.format(buf)?;
3286            buf.push(b':');
3287            end_offset.format(buf)
3288        }
3289    }
3290}
3291
3292// Advance all valid cell names in the string by the offset.
3293fn replace_cell_names(s: &str, offset: (i64, i64)) -> Result<String, XlsxError> {
3294    let bytes = s.as_bytes();
3295    let mut res: Vec<u8> = Vec::new();
3296    let mut in_quote = false;
3297
3298    let mut token_start = 0;
3299    let mut token_end = 0;
3300
3301    for (i, &c) in bytes.iter().enumerate() {
3302        if !in_quote && (c.is_ascii_alphanumeric() || c == b'$' || c == b':') {
3303            token_end = i + 1;
3304        } else {
3305            if token_start < token_end
3306                && offset_range(&bytes[token_start..token_end], offset, &mut res).is_err()
3307            {
3308                res.extend(&bytes[token_start..token_end]);
3309            }
3310            res.push(c);
3311            token_start = i + 1;
3312            token_end = i + 1;
3313
3314            if c == b'"' {
3315                in_quote = !in_quote;
3316            }
3317        }
3318    }
3319
3320    if token_start < token_end
3321        && offset_range(&bytes[token_start..token_end], offset, &mut res).is_err()
3322    {
3323        res.extend(&bytes[token_start..token_end]);
3324    }
3325
3326    match String::from_utf8(res) {
3327        Ok(s) => Ok(s),
3328        Err(_) => Err(XlsxError::Unexpected("fail to convert cell name")),
3329    }
3330}
3331
3332/// Convert the integer to Excelsheet column title.
3333/// If the column number not in 1~16384, an Error is returned.
3334pub(crate) fn column_number_to_name(num: u32, buf: &mut Vec<u8>) -> Result<(), XlsxError> {
3335    if num >= MAX_COLUMNS {
3336        return Err(XlsxError::ColumnNumberOverflow);
3337    }
3338    let start = buf.len();
3339    let mut num = num + 1;
3340    while num > 0 {
3341        let integer = ((num - 1) % 26 + 65) as u8;
3342        buf.push(integer);
3343        num = (num - 1) / 26;
3344    }
3345    buf[start..].reverse();
3346    Ok(())
3347}
3348
3349// Convert an Excel Open Packaging "Part" path like "xl/sharedStrings.xml" to
3350// the equivalent path/filename in the zip file. The file name in the zip file
3351// may be a case-insensitive version of the target path and may use backslashes.
3352pub(crate) fn path_to_zip_path<RS: Read + Seek>(zip: &ZipArchive<RS>, path: &str) -> String {
3353    for zip_path in zip.file_names() {
3354        let normalized_path = zip_path.replace('\\', "/");
3355
3356        if path.eq_ignore_ascii_case(&normalized_path) {
3357            return zip_path.to_string();
3358        }
3359    }
3360
3361    path.to_string()
3362}
3363
3364// Data type of the record's value.
3365enum Tag {
3366    // String
3367    S,
3368    // Number (Float or Int)
3369    N,
3370    // Missing
3371    M,
3372    // Error
3373    E,
3374    // Bool
3375    B,
3376    // Date
3377    D,
3378}
3379
3380type Value = Option<Box<[u8]>>;
3381
3382/// Check if tag is an item within a PivotCache Record, which does not require a Definitions lookup.
3383fn item_tag(e: &BytesStart) -> Option<Tag> {
3384    match e.local_name().as_ref() {
3385        b"s" => Some(Tag::S),
3386        b"n" => Some(Tag::N),
3387        b"m" => Some(Tag::M),
3388        b"e" => Some(Tag::E),
3389        b"b" => Some(Tag::B),
3390        b"d" => Some(Tag::D),
3391        _ => None,
3392    }
3393}
3394fn item_value(e: &BytesStart) -> Result<Value, AttrError> {
3395    for a in e.attributes() {
3396        if let Attribute {
3397            key: QName(b"v"),
3398            value,
3399        } = a?
3400        {
3401            return Ok(Some(Box::from(value)));
3402        }
3403    }
3404    Ok(None)
3405}
3406
3407// Get the target location of the pivot table's pivot cache definitions.
3408fn find_pivot_cache_definitions_from_pivot<RS>(
3409    zip: &mut zip::ZipArchive<RS>,
3410    path: &str,
3411) -> Result<String, XlsxError>
3412where
3413    RS: Read + Seek,
3414{
3415    let (base_folder, file_name) = path.rsplit_once('/').expect("should be in a folder");
3416    let rel_path = format!("{base_folder}/_rels/{file_name}.rels");
3417    let mut xml = match xml_reader(zip, &rel_path) {
3418        None => return Err(XlsxError::FileNotFound(rel_path.to_owned())),
3419        Some(x) => x?,
3420    };
3421    let mut definitions_path = None;
3422    let mut buf = Vec::with_capacity(64);
3423    loop {
3424        buf.clear();
3425        match xml.read_event_into(&mut buf) {
3426            Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => {
3427                let mut target = String::new();
3428                let mut is_pivot_cache_definitions_type = false;
3429                for a in e.attributes() {
3430                    match a? {
3431                            Attribute {
3432                                key: QName(b"Target"),
3433                                value: v,
3434                            } => target = xml.decoder().decode(&v)?.into_owned(),
3435                            Attribute {
3436                                key: QName(b"Type"),
3437                                value: v,
3438                            } => is_pivot_cache_definitions_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition"[..],
3439                            _ => (),
3440                        }
3441                }
3442                match (is_pivot_cache_definitions_type, definitions_path.is_some()) {
3443                    (true, false) => {
3444                        if let Some(target) = target.strip_prefix("../") {
3445                            // this is an incomplete implementation, but should be good enough for excel
3446                            let (parent, _) = base_folder
3447                                .rsplit_once('/')
3448                                .expect("Must be a parent folder");
3449                            definitions_path.replace(format!("{parent}/{target}"));
3450                        } else if !target.is_empty() {
3451                            definitions_path.replace(target);
3452                        }
3453                    }
3454                    (true, true) => return Err(XlsxError::Unexpected(
3455                        "multiple pivot cache definition relationships found for one pivot table",
3456                    )),
3457                    _ => {}
3458                }
3459            }
3460            Ok(Event::End(e)) if e.local_name().as_ref() == b"Relationships" => break,
3461            Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
3462            Err(e) => return Err(XlsxError::Xml(e)),
3463            _ => (),
3464        }
3465    }
3466    match definitions_path {
3467        Some(path) => Ok(path),
3468        None => Err(XlsxError::Unexpected(
3469            "no pivot cache definition found for pivot table",
3470        )),
3471    }
3472}
3473
3474// Get the target location of the pivot cache record file.
3475fn find_pivot_cache_records_from_definitions<RS>(
3476    zip: &mut zip::ZipArchive<RS>,
3477    path: &str,
3478) -> Result<String, XlsxError>
3479where
3480    RS: Read + Seek,
3481{
3482    let (base_folder, file_name) = path.rsplit_once('/').expect("should be in a folder");
3483    let rel_path = format!("{base_folder}/_rels/{file_name}.rels");
3484    let mut xml = match xml_reader(zip, rel_path.as_ref()) {
3485        None => return Err(XlsxError::FileNotFound(rel_path.to_owned())),
3486        Some(x) => x?,
3487    };
3488    let mut record_path = None;
3489    let mut buf = Vec::with_capacity(64);
3490    loop {
3491        buf.clear();
3492        match xml.read_event_into(&mut buf) {
3493            Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => {
3494                let mut target = String::new();
3495                let mut is_pivot_cache_record_type = false;
3496                for a in e.attributes() {
3497                    match a? {
3498                            Attribute {
3499                                key: QName(b"Target"),
3500                                value: v,
3501                            } => target = xml.decoder().decode(&v)?.into_owned(),
3502                            Attribute {
3503                                key: QName(b"Type"),
3504                                value: v,
3505                            } => is_pivot_cache_record_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheRecords"[..],
3506                            _ => (),
3507                        }
3508                }
3509                match (is_pivot_cache_record_type, record_path.is_some()) {
3510                    (true, false) => {
3511                        if target.starts_with("xl/pivotCache") {
3512                            record_path.replace(target);
3513                        } else if !target.is_empty() {
3514                            record_path.replace(format!("xl/pivotCache/{target}"));
3515                        }
3516                    }
3517                    (true, true) => {
3518                        return Err(XlsxError::Unexpected(
3519                            "multiple pivot cache record relationships found for one pivot table",
3520                        ))
3521                    }
3522                    _ => {}
3523                }
3524            }
3525            Ok(Event::End(e)) if e.local_name().as_ref() == b"Relationships" => break,
3526            Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
3527            Err(e) => return Err(XlsxError::Xml(e)),
3528            _ => (),
3529        }
3530    }
3531    match record_path {
3532        Some(path) => Ok(path),
3533        None => Err(XlsxError::Unexpected(
3534            "no pivot cache records found for pivot table",
3535        )),
3536    }
3537}
3538
3539// Return a vec of pivot table paths (ie xl/pivotTables/pivot1.xml) for a given sheet name.
3540fn find_pivot_table_paths_from_sheet<RS>(
3541    zip: &mut zip::ZipArchive<RS>,
3542    sheet_path: &str,
3543) -> Result<Vec<String>, XlsxError>
3544where
3545    RS: Read + Seek,
3546{
3547    let mut pivots_on_sheet = vec![];
3548    let mut buf = Vec::with_capacity(64);
3549
3550    let last_folder_index = sheet_path.rfind('/').expect("should be in a folder");
3551    let (base_folder, file_name) = sheet_path.split_at(last_folder_index);
3552    let rel_path = format!("{base_folder}/_rels{file_name}.rels");
3553
3554    let mut xml = match xml_reader(zip, &rel_path) {
3555        // Some sheets may not have relationships - okay for path to not exist.
3556        None => return Ok(vec![]),
3557        Some(x) => x?,
3558    };
3559    loop {
3560        buf.clear();
3561        match xml.read_event_into(&mut buf) {
3562            Ok(Event::Start(e)) if e.local_name().as_ref() == b"Relationship" => {
3563                let mut target = String::new();
3564                let mut is_pivot_table_type = false;
3565                for a in e.attributes() {
3566                    match a? {
3567                            Attribute {
3568                                key: QName(b"Target"),
3569                                value: v,
3570                            } => target = xml.decoder().decode(&v)?.into_owned(),
3571                            Attribute {
3572                                key: QName(b"Type"),
3573                                value: v,
3574                            } => is_pivot_table_type = *v == b"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"[..],
3575                            _ => (),
3576                        }
3577                }
3578                if is_pivot_table_type {
3579                    if let Some(target) = target.strip_prefix("../") {
3580                        // this is an incomplete implementation, but should be good enough for excel
3581                        let (parent, _) = base_folder
3582                            .rsplit_once('/')
3583                            .expect("Must be a parent folder");
3584                        pivots_on_sheet.push(format!("{parent}/{target}"));
3585                    } else if !target.is_empty() {
3586                        pivots_on_sheet.push(target);
3587                    }
3588                }
3589            }
3590            Ok(Event::End(e)) if e.local_name().as_ref() == b"Relationships" => break,
3591            Ok(Event::Eof) => return Err(XlsxError::XmlEof("Relationships")),
3592            Err(e) => return Err(XlsxError::Xml(e)),
3593            _ => (),
3594        }
3595    }
3596
3597    Ok(pivots_on_sheet)
3598}
3599
3600// Takes a pivot table path (ie xl/pivotTables/pivot1.xml) and returns the name.
3601fn find_pivot_name_from_pivot_path<RS>(
3602    zip: &mut zip::ZipArchive<RS>,
3603    pivot_path: &str,
3604) -> Result<String, XlsxError>
3605where
3606    RS: Read + Seek,
3607{
3608    let mut xml = match xml_reader(zip, pivot_path) {
3609        None => return Err(XlsxError::FileNotFound(pivot_path.to_string())),
3610        Some(x) => x?,
3611    };
3612    let mut buf = Vec::with_capacity(64);
3613    let mut name = None;
3614    loop {
3615        buf.clear();
3616        match xml.read_event_into(&mut buf) {
3617            Ok(Event::Start(e)) if e.local_name().as_ref() == b"pivotTableDefinition" => {
3618                for a in e.attributes() {
3619                    if let Attribute {
3620                        key: QName(b"name"),
3621                        value: v,
3622                    } = a?
3623                    {
3624                        if name.is_some() {
3625                            return Err(XlsxError::Unexpected(
3626                                "multiple name entries for one pivot table path",
3627                            ));
3628                        } else {
3629                            name.replace(xml.decoder().decode(&v)?.into_owned());
3630                        }
3631                    }
3632                }
3633            }
3634            Ok(Event::End(e)) if e.local_name().as_ref() == b"pivotTableDefinition" => break,
3635            Ok(Event::Eof) => return Err(XlsxError::XmlEof("pivotTableDefinition")),
3636            Err(e) => return Err(XlsxError::Xml(e)),
3637            _ => (),
3638        }
3639    }
3640    match name {
3641        Some(name) => Ok(name),
3642        None => Err(XlsxError::Unexpected("no name for pivot table")),
3643    }
3644}
3645
3646/// Parse an item within a PivotCache Record into its appropriate [`Data`] type.
3647fn parse_item(item: &(Tag, Value), decoder: &Decoder) -> Data {
3648    let Some(val) = item.1.as_deref() else {
3649        return Data::Empty;
3650    };
3651    match item.0 {
3652        Tag::M => Data::Empty,
3653        Tag::S => {
3654            if let Ok(val) = decoder.decode(val.as_ref()) {
3655                Data::String(val.to_string())
3656            } else {
3657                Data::Error(CellErrorType::GettingData)
3658            }
3659        }
3660        Tag::N => {
3661            if val.contains(&b'.') {
3662                match bytes_to_f64(val, decoder) {
3663                    Some(val) => Data::Float(val),
3664                    None => Data::Error(CellErrorType::GettingData),
3665                }
3666            } else {
3667                match bytes_to_i64(val, decoder) {
3668                    Some(val) => Data::Int(val),
3669                    None => Data::Error(CellErrorType::GettingData),
3670                }
3671            }
3672        }
3673        Tag::D => {
3674            if let Ok(val) = decoder.decode(val) {
3675                Data::DateTimeIso(val.into())
3676            } else {
3677                Data::Error(CellErrorType::GettingData)
3678            }
3679        }
3680        Tag::B => {
3681            {
3682                // boolean tags only support W3C XML Schema
3683                match val {
3684                    b"0" | b"false" => Data::Bool(false),
3685                    b"1" | b"true" => Data::Bool(true),
3686                    _ => Data::Error(CellErrorType::GettingData),
3687                }
3688            }
3689        }
3690        Tag::E => Data::Error(CellErrorType::Ref),
3691    }
3692}
3693
3694// Parse failures are handled with None and left to `Self::parse_item` to address.
3695fn bytes_to_i64(val: &[u8], decoder: &Decoder) -> Option<i64> {
3696    if let Ok(val) = decoder.decode(val) {
3697        atoi_simd::parse::<i64>(val.as_bytes()).ok()
3698    } else {
3699        None
3700    }
3701}
3702
3703// Parse failures are handled with None and left to `parse_item` to address.
3704fn bytes_to_f64(val: &[u8], decoder: &Decoder) -> Option<f64> {
3705    if let Ok(val) = decoder.decode(val) {
3706        fast_float2::parse(val.as_bytes()).ok()
3707    } else {
3708        None
3709    }
3710}
3711
3712#[derive(Default)]
3713pub struct PivotTables(Vec<PivotTableRef>);
3714
3715impl PivotTables {
3716    fn new() -> Self {
3717        Self(vec![])
3718    }
3719
3720    fn push(&mut self, pivot_table: PivotTableRef) {
3721        self.0.push(pivot_table);
3722    }
3723
3724    /// Helper function to identify pivot tables by name and worksheet.
3725    ///
3726    /// # Returns
3727    ///
3728    /// ```text
3729    /// Vec<(&str, &str)>
3730    ///        │     │
3731    ///        │     └─── Pivot Table name
3732    ///        │
3733    ///        └──── Worksheet name
3734    /// ```
3735    ///
3736    /// # Note
3737    ///
3738    /// Pivot table names are unique per worksheet, not per workbook.
3739    ///
3740    /// # Examples
3741    ///
3742    /// An example of retrieving pivot cache data for a Pivot Table named "PivotTable1"
3743    /// on worksheet "PivotSheet1".
3744    ///
3745    /// ```
3746    /// use calamine::{open_workbook, Error, Xlsx};
3747    ///
3748    /// fn main() -> Result<(), Error> {
3749    ///
3750    ///     // Open the workbook.
3751    ///     let mut workbook: Xlsx<_> = open_workbook("tests/pivots.xlsx")?;
3752    ///     // Must retrieve necessary metadata before reading Pivot Table data.
3753    ///     let pivot_tables = workbook.pivot_tables()?;
3754    ///
3755    ///     // "PivotTable1" is found on both sheets: "PivotSheet1" & "PivotSheet3" so
3756    ///     // we must include the sheet name in our filter ~ see note on uniqueness.
3757    ///     let names_and_sheets = pivot_tables.get_pivot_tables_by_name_and_sheet()
3758    ///         .into_iter()
3759    ///         .filter_map(|pt| {
3760    ///             if pt.0.eq("PivotSheet1") && pt.1.eq("PivotTable1") {
3761    ///                 Some(pt)
3762    ///             } else {
3763    ///                 None
3764    ///             }
3765    ///         })
3766    ///         .collect::<Vec<_>>();
3767    ///
3768    ///     assert_eq!(names_and_sheets.len(), 1);
3769    ///
3770    ///     Ok(())
3771    ///
3772    /// }
3773    /// ```
3774    ///
3775    pub fn get_pivot_tables_by_name_and_sheet(&self) -> Vec<(&str, &str)> {
3776        self.0.iter().map(|pt| (pt.sheet(), pt.name())).collect()
3777    }
3778
3779    /// Get the names of all pivot tables for a given worksheet.
3780    ///
3781    /// Worksheets that do not contain any pivot tables will return None. Worksheet names
3782    ///
3783    /// # Examples
3784    ///
3785    /// An example of getting all pivot tables for a provided sheet.
3786    ///
3787    /// ```
3788    /// use calamine::{open_workbook, Error, Xlsx};
3789    ///
3790    /// fn main() -> Result<(), Error> {
3791    ///
3792    ///     let path = "tests/pivots.xlsx";
3793    ///
3794    ///     // Open the workbook.
3795    ///     let mut workbook: Xlsx<_> = open_workbook(path)?;
3796    ///
3797    ///     // Must retrieve necessary metadata before reading Pivot Table data.
3798    ///     let pivot_tables = workbook.pivot_tables()?;
3799    ///
3800    ///     // Get the pivot table names in the workbook for a given sheet.
3801    ///     let pivot_table_names = pivot_tables.pivot_tables_by_sheet("PivotSheet1");
3802    ///
3803    ///     // Check the pivot table names (ordering not guaranteed).
3804    ///     assert_eq!(pivot_table_names, vec!["PivotTable1"]);
3805    ///
3806    ///     Ok(())
3807    /// }
3808    /// ```
3809    ///
3810    pub fn pivot_tables_by_sheet(&self, sheet_name: &str) -> Vec<&str> {
3811        self.0
3812            .iter()
3813            .filter_map(|val| {
3814                if val.sheet() == sheet_name {
3815                    Some(val.name())
3816                } else {
3817                    None
3818                }
3819            })
3820            .collect::<Vec<&str>>()
3821    }
3822}
3823
3824struct PivotTableRef {
3825    name: String,
3826    sheet: String,
3827    records: String,
3828    definitions: String,
3829}
3830
3831impl PivotTableRef {
3832    fn new(name: String, sheet: String, records: String, definitions: String) -> Self {
3833        Self {
3834            name,
3835            sheet,
3836            records,
3837            definitions,
3838        }
3839    }
3840    fn name(&self) -> &str {
3841        self.name.as_ref()
3842    }
3843    fn sheet(&self) -> &str {
3844        self.sheet.as_ref()
3845    }
3846    fn records(&self) -> &str {
3847        self.records.as_ref()
3848    }
3849    fn definitions(&self) -> &str {
3850        self.definitions.as_ref()
3851    }
3852}
3853
3854pub struct PivotCacheIter<'a, RS: Read + Seek + 'a> {
3855    definitions: HashMap<String, Vec<(Tag, Value)>>,
3856    field_names: Vec<String>,
3857    reader: XlReader<'a, RS>,
3858}
3859fn get_pivot_cache_iter<'a, RS: Read + Seek + 'a>(
3860    xl: &'a mut crate::Xlsx<RS>,
3861    pivot_table: &PivotTableRef,
3862) -> Result<PivotCacheIter<'a, RS>, XlsxError> {
3863    let definitions = pivot_table.definitions();
3864    let records = pivot_table.records();
3865
3866    let mut fields: Vec<Vec<(Tag, Value)>> = vec![];
3867    let mut definition_map = std::collections::HashMap::new();
3868    let mut field_names = vec![];
3869
3870    // Converting into an iterator requires first reading a pivotCacheDefinitions.xml file
3871    // to get lookup values used in pivotCacheRecords.xml file.
3872    {
3873        let mut xml = match xml_reader(&mut xl.zip, definitions) {
3874            None => {
3875                return Err(XlsxError::FileNotFound(format!(
3876                    "File not found: {}",
3877                    definitions
3878                )))
3879            }
3880            Some(x) => x?,
3881        };
3882
3883        let mut buf = Vec::with_capacity(64);
3884        // building list of field names and definitions from some pivotCacheDefinitions.xml file
3885        loop {
3886            buf.clear();
3887
3888            match xml.read_event_into(&mut buf) {
3889                Ok(Event::Start(e)) if e.local_name().as_ref() == b"cacheField" => {
3890                    for a in e.attributes() {
3891                        match a? {
3892                            Attribute {
3893                                key: QName(b"name"),
3894                                value,
3895                            } => {
3896                                field_names.push(xml.decoder().decode(value.as_ref())?.to_string());
3897                                fields.push(vec![]);
3898                            }
3899                            Attribute {
3900                                key: QName(b"formula"),
3901                                value: _value,
3902                            } => {
3903                                field_names.pop();
3904                                fields.pop();
3905                            }
3906                            _ => {
3907                                // do nothing
3908                            }
3909                        }
3910                    }
3911                }
3912                // Exclude grouped fields from results.
3913                // This does not represent the underlying data and should be removed.
3914                Ok(Event::Start(e)) if e.local_name().as_ref() == b"groupItems" => {
3915                    field_names.pop();
3916                    fields.pop();
3917                }
3918                Ok(Event::Eof) => break,
3919                Err(e) => {
3920                    panic!("{e}")
3921                }
3922                Ok(Event::Start(e)) => {
3923                    if let Some(tag) = item_tag(&e) {
3924                        if let Some(field) = fields.last_mut() {
3925                            field.push((tag, item_value(&e)?));
3926                        }
3927                    }
3928                }
3929                Ok(_) => {}
3930            }
3931        }
3932
3933        // add the definitions to the definition map with a key on field name
3934        for (field, name) in fields.into_iter().zip(field_names.iter()) {
3935            definition_map.insert(name.to_string(), field);
3936        }
3937    }
3938
3939    xml_reader(&mut xl.zip, records).map_or_else(
3940        || {
3941            Err(XlsxError::FileNotFound(format!(
3942                "File not found: {records}"
3943            )))
3944        },
3945        |record_reader| {
3946            Ok(PivotCacheIter::new(
3947                definition_map,
3948                field_names,
3949                record_reader?,
3950            ))
3951        },
3952    )
3953}
3954
3955impl<'a, RS: Read + Seek + 'a> PivotCacheIter<'a, RS> {
3956    fn new(
3957        definitions: HashMap<String, Vec<(Tag, Value)>>,
3958        field_names: Vec<String>,
3959        reader: XlReader<'a, RS>,
3960    ) -> Self {
3961        Self {
3962            definitions,
3963            field_names,
3964            reader,
3965        }
3966    }
3967}
3968
3969// Iterates over <r>, the tag for a row, found in the PivotCacheRecords.xml file.
3970// PivotCacheIter must also hold some lookup values / metadata to support the content within <r>.
3971//
3972// https://learn.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.pivotcacherecord?view=openxml-3.0.1
3973impl<'a, RS: Read + Seek + 'a> Iterator for PivotCacheIter<'a, RS> {
3974    type Item = Result<Vec<Data>, XlsxError>;
3975
3976    fn next(&mut self) -> Option<Self::Item> {
3977        let mut row = vec![];
3978        let mut col_number = 0;
3979        let mut buf = Vec::with_capacity(64);
3980        loop {
3981            buf.clear();
3982            match self.reader.read_event_into(&mut buf) {
3983                Ok(Event::Start(e)) if e.local_name().as_ref() == b"x" => {
3984                    for a in e.attributes() {
3985                        if let Ok(Attribute {
3986                            key: QName(b"v"),
3987                            value,
3988                        }) = a
3989                        {
3990                            let value_position = match self.reader.decoder().decode(value.as_ref())
3991                            {
3992                                Ok(val) => match val.parse::<usize>() {
3993                                    Ok(val) => val,
3994                                    Err(e) => {
3995                                        return Some(Err(XlsxError::ParseInt(e)));
3996                                    }
3997                                },
3998                                Err(e) => return Some(Err(XlsxError::Encoding(e))),
3999                            };
4000
4001                            let column_name = &self.field_names[col_number];
4002                            row.push(parse_item(
4003                                &self.definitions[column_name][value_position],
4004                                &self.reader.decoder(),
4005                            ));
4006                            break;
4007                        }
4008                    }
4009
4010                    col_number += 1;
4011                }
4012                Ok(Event::End(e)) if e.local_name().as_ref() == b"r" => return Some(Ok(row)),
4013                Ok(Event::Start(e)) if e.local_name().as_ref() == b"pivotCacheRecords" => {
4014                    return Some(Ok(self
4015                        .field_names
4016                        .iter()
4017                        .map(|fields| Data::String(fields.to_string()))
4018                        .collect()))
4019                }
4020                Ok(Event::Eof) => return None,
4021                Err(e) => return Some(Err(XlsxError::Xml(e))),
4022                Ok(Event::Start(e)) => {
4023                    if let Some(tag) = item_tag(&e) {
4024                        if let Ok(value) = item_value(&e) {
4025                            row.push(parse_item(&(tag, value), &self.reader.decoder()));
4026                            col_number += 1;
4027                        }
4028                    }
4029                }
4030                Ok(_) => {}
4031            }
4032        }
4033    }
4034}
4035
4036// -----------------------------------------------------------------------
4037// Unit tests for Xlsx.
4038// -----------------------------------------------------------------------
4039#[cfg(test)]
4040mod tests {
4041    use super::*;
4042    use std::io::Write;
4043    use zip::write::SimpleFileOptions;
4044    use zip::ZipWriter;
4045
4046    #[test]
4047    fn test_dimensions() {
4048        assert_eq!(get_row_column(b"A1").unwrap(), (0, 0));
4049        assert_eq!(get_row_column(b"C107").unwrap(), (106, 2));
4050        assert_eq!(
4051            get_dimension(b"C2:D35").unwrap(),
4052            Dimensions {
4053                start: (1, 2),
4054                end: (34, 3)
4055            }
4056        );
4057        assert_eq!(
4058            get_dimension(b"A1:XFD1048576").unwrap(),
4059            Dimensions {
4060                start: (0, 0),
4061                end: (1_048_575, 16_383),
4062            }
4063        );
4064    }
4065
4066    #[test]
4067    fn test_dimension_length() {
4068        assert_eq!(get_dimension(b"A1:Z99").unwrap().len(), 2_574);
4069        assert_eq!(
4070            get_dimension(b"A1:XFD1048576").unwrap().len(),
4071            17_179_869_184
4072        );
4073    }
4074
4075    #[test]
4076    fn test_parse_error() {
4077        assert_eq!(
4078            CellErrorType::from_str("#DIV/0!").unwrap(),
4079            CellErrorType::Div0
4080        );
4081        assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA);
4082        assert_eq!(
4083            CellErrorType::from_str("#NAME?").unwrap(),
4084            CellErrorType::Name
4085        );
4086        assert_eq!(
4087            CellErrorType::from_str("#NULL!").unwrap(),
4088            CellErrorType::Null
4089        );
4090        assert_eq!(
4091            CellErrorType::from_str("#NUM!").unwrap(),
4092            CellErrorType::Num
4093        );
4094        assert_eq!(
4095            CellErrorType::from_str("#REF!").unwrap(),
4096            CellErrorType::Ref
4097        );
4098        assert_eq!(
4099            CellErrorType::from_str("#VALUE!").unwrap(),
4100            CellErrorType::Value
4101        );
4102    }
4103
4104    #[test]
4105    fn test_column_number_to_name() {
4106        let check = |num, expected: &[u8]| {
4107            let mut buf = Vec::new();
4108            column_number_to_name(num, &mut buf).unwrap();
4109            assert_eq!(buf, expected);
4110        };
4111
4112        check(0, b"A");
4113        check(25, b"Z");
4114        check(26, b"AA");
4115        check(27, b"AB");
4116        check(MAX_COLUMNS - 1, b"XFD");
4117    }
4118
4119    #[test]
4120    fn test_parse_reference() {
4121        let check_cell =
4122            |input: &[u8], row, col, abs_row, abs_col| match Reference::parse(input).unwrap() {
4123                Reference::Cell {
4124                    row: r,
4125                    col: c,
4126                    absolute_row: ar,
4127                    absolute_col: ac,
4128                } => {
4129                    assert_eq!((r, c, ar, ac), (row, col, abs_row, abs_col));
4130                }
4131                _ => panic!("Expected Cell reference"),
4132            };
4133
4134        let check_column = |input: &[u8], col, abs| match Reference::parse(input).unwrap() {
4135            Reference::Column {
4136                col: c,
4137                absolute: a,
4138            } => {
4139                assert_eq!((c, a), (col, abs));
4140            }
4141            _ => panic!("Expected Column reference"),
4142        };
4143
4144        let check_row = |input: &[u8], row, abs| match Reference::parse(input).unwrap() {
4145            Reference::Row {
4146                row: r,
4147                absolute: a,
4148            } => {
4149                assert_eq!((r, a), (row, abs));
4150            }
4151            _ => panic!("Expected Row reference"),
4152        };
4153
4154        // Cell references
4155        check_cell(b"A1", 0, 0, false, false);
4156        check_cell(b"$A1", 0, 0, false, true);
4157        check_cell(b"A$1", 0, 0, true, false);
4158        check_cell(b"$A$1", 0, 0, true, true);
4159        check_cell(b"XFD1048576", MAX_ROWS - 1, MAX_COLUMNS - 1, false, false);
4160
4161        // Column references
4162        check_column(b"A", 0, false);
4163        check_column(b"$A", 0, true);
4164        check_column(b"XFD", MAX_COLUMNS - 1, false);
4165
4166        // Row references
4167        check_row(b"1", 0, false);
4168        check_row(b"$1", 0, true);
4169        check_row(b"1048576", MAX_ROWS - 1, false);
4170    }
4171
4172    #[test]
4173    fn test_format_reference() {
4174        let check_cell = |row, col, abs_row, abs_col, expected: &[u8]| {
4175            let mut buf = Vec::new();
4176            Reference::Cell {
4177                row,
4178                col,
4179                absolute_row: abs_row,
4180                absolute_col: abs_col,
4181            }
4182            .format(&mut buf)
4183            .unwrap();
4184            assert_eq!(buf, expected);
4185        };
4186
4187        let check_column = |col, absolute, expected: &[u8]| {
4188            let mut buf = Vec::new();
4189            Reference::Column { col, absolute }
4190                .format(&mut buf)
4191                .unwrap();
4192            assert_eq!(buf, expected);
4193        };
4194
4195        let check_row = |row, absolute, expected: &[u8]| {
4196            let mut buf = Vec::new();
4197            Reference::Row { row, absolute }.format(&mut buf).unwrap();
4198            assert_eq!(buf, expected);
4199        };
4200
4201        // Cell references
4202        check_cell(0, 0, false, false, b"A1");
4203        check_cell(0, 0, false, true, b"$A1");
4204        check_cell(0, 0, true, false, b"A$1");
4205        check_cell(0, 0, true, true, b"$A$1");
4206        check_cell(MAX_ROWS - 1, MAX_COLUMNS - 1, false, false, b"XFD1048576");
4207
4208        // Column references
4209        check_column(0, false, b"A");
4210        check_column(0, true, b"$A");
4211        check_column(MAX_COLUMNS - 1, false, b"XFD");
4212
4213        // Row references
4214        check_row(0, false, b"1");
4215        check_row(0, true, b"$1");
4216        check_row(MAX_ROWS - 1, false, b"1048576");
4217    }
4218
4219    #[test]
4220    fn test_format_reference_overflow() {
4221        let check_err = |reference: Reference, offset| {
4222            let result = reference.offset(offset);
4223            assert!(
4224                matches!(
4225                    result,
4226                    Err(XlsxError::ColumnNumberOverflow) | Err(XlsxError::RowNumberOverflow)
4227                ),
4228                "expected overflow error, got {:?}",
4229                result
4230            );
4231        };
4232
4233        // Cell reference offset pushes column out of bounds
4234        check_err(
4235            Reference::Cell {
4236                row: 0,
4237                col: MAX_COLUMNS - 1,
4238                absolute_row: false,
4239                absolute_col: false,
4240            },
4241            (0, 1),
4242        );
4243
4244        // Cell reference offset pushes row out of bounds
4245        check_err(
4246            Reference::Cell {
4247                row: MAX_ROWS - 1,
4248                col: 0,
4249                absolute_row: false,
4250                absolute_col: false,
4251            },
4252            (1, 0),
4253        );
4254
4255        // Column reference offset pushes out of bounds
4256        check_err(
4257            Reference::Column {
4258                col: MAX_COLUMNS - 1,
4259                absolute: false,
4260            },
4261            (0, 1),
4262        );
4263
4264        // Row reference offset pushes out of bounds
4265        check_err(
4266            Reference::Row {
4267                row: MAX_ROWS - 1,
4268                absolute: false,
4269            },
4270            (1, 0),
4271        );
4272    }
4273
4274    #[test]
4275    fn test_offset_range() {
4276        let check = |input: &[u8], offset, expected: &[u8]| {
4277            let mut buf = Vec::new();
4278            offset_range(input, offset, &mut buf).unwrap();
4279            assert_eq!(buf, expected);
4280        };
4281
4282        let check_err = |input: &[u8], offset| {
4283            let mut buf = Vec::new();
4284            let res = offset_range(input, offset, &mut buf);
4285            assert!(res.is_err());
4286            assert_eq!(buf.len(), 0)
4287        };
4288
4289        // Cell references
4290        check(b"A1", (1, 1), b"B2");
4291        check(b"$A1", (1, 1), b"$A2");
4292        check(b"A$1", (1, 1), b"B$1");
4293        check(b"$A$1", (1, 1), b"$A$1");
4294
4295        // Column references
4296        check_err(b"E", (0, 1));
4297        check_err(b"$E", (0, 1));
4298
4299        // Row references
4300        check_err(b"5", (1, 0));
4301        check_err(b"$5", (1, 0));
4302
4303        // Cell ranges
4304        check(b"A1:B2", (1, 1), b"B2:C3");
4305        check(b"$A$1:$B$2", (1, 1), b"$A$1:$B$2");
4306
4307        // Column ranges
4308        check(b"E:F", (0, 1), b"F:G");
4309        check(b"$E:$F", (0, 1), b"$E:$F");
4310        check(b"E:F", (1, 0), b"E:F");
4311
4312        // Row ranges
4313        check(b"5:6", (1, 0), b"6:7");
4314        check(b"$5:$6", (1, 0), b"$5:$6");
4315        check(b"5:6", (0, 1), b"5:6");
4316    }
4317
4318    #[test]
4319    fn test_parse_reference_overflow() {
4320        let check_col_err = |input: &[u8]| {
4321            assert!(matches!(
4322                Reference::parse(input),
4323                Err(XlsxError::ColumnNumberOverflow)
4324            ));
4325        };
4326        let check_row_err = |input: &[u8]| {
4327            assert!(matches!(
4328                Reference::parse(input),
4329                Err(XlsxError::RowNumberOverflow)
4330            ));
4331        };
4332        let check_syntax_err = |input: &[u8]| {
4333            assert!(matches!(
4334                Reference::parse(input),
4335                Err(XlsxError::Alphanumeric(_))
4336            ));
4337        };
4338
4339        // Invalid syntax
4340        check_syntax_err(b"A$A1");
4341        check_syntax_err(b"A1$2");
4342        check_syntax_err(b"$$A1");
4343        check_syntax_err(b"$A$$1");
4344        check_syntax_err(b"A$$1");
4345        check_syntax_err(b"1A");
4346        check_syntax_err(b"1A1");
4347        check_syntax_err(b"A1B2");
4348
4349        // Cell references
4350        check_col_err(b"XFE1");
4351        check_col_err(b"AAAA1");
4352        check_row_err(b"A1048577");
4353        check_row_err(b"A99999999999999999999");
4354        check_col_err(b"$XFE$1");
4355
4356        // Column references
4357        check_col_err(b"XFE");
4358        check_col_err(b"$XFE");
4359
4360        // Row references
4361        check_row_err(b"1048577");
4362        check_row_err(b"$1048577");
4363    }
4364
4365    #[test]
4366    fn test_offset_range_overflow() {
4367        let check_col_err = |input: &[u8], offset| {
4368            let mut buf = Vec::new();
4369            assert!(matches!(
4370                offset_range(input, offset, &mut buf),
4371                Err(XlsxError::ColumnNumberOverflow)
4372            ));
4373        };
4374        let check_row_err = |input: &[u8], offset| {
4375            let mut buf = Vec::new();
4376            assert!(matches!(
4377                offset_range(input, offset, &mut buf),
4378                Err(XlsxError::RowNumberOverflow)
4379            ));
4380        };
4381
4382        // Original reference is out of bounds
4383        check_col_err(b"XFE1", (0, 0));
4384        check_col_err(b"$XFE$1", (0, 0));
4385        check_row_err(b"A1048577", (0, 0));
4386        check_row_err(b"$A$1048577", (0, 0));
4387        check_col_err(b"XFE:XFE", (0, 0));
4388        check_row_err(b"1048577:1048577", (0, 0));
4389
4390        // Offset pushes valid cell out of bounds
4391        check_col_err(b"XFD1", (0, 1));
4392        check_row_err(b"A1048576", (1, 0));
4393        check_row_err(b"XFD1048576", (1, 0));
4394        check_col_err(b"XFD1048576", (0, 1));
4395
4396        // Offset pushes valid range out of bounds
4397        check_col_err(b"XFD:XFD", (0, 1));
4398        check_row_err(b"1048576:1048576", (1, 0));
4399    }
4400
4401    #[test]
4402    fn test_replace_cell_names() {
4403        assert_eq!(replace_cell_names("A1", (1, 0)).unwrap(), "A2".to_owned());
4404        assert_eq!(
4405            replace_cell_names("CONCATENATE(A1, \"a\")", (1, 0)).unwrap(),
4406            "CONCATENATE(A2, \"a\")".to_owned()
4407        );
4408        assert_eq!(
4409            replace_cell_names(
4410                "A1 is a cell, B1 is another, also C107, but XFE123 is not and \"A3\" in quote wont change.",
4411                (1, 0)
4412            )
4413            .unwrap(),
4414            "A2 is a cell, B2 is another, also C108, but XFE123 is not and \"A3\" in quote wont change.".to_owned()
4415        );
4416        assert_eq!(
4417            replace_cell_names("한글 A1 テスト", (0, 1)).unwrap(),
4418            "한글 B1 テスト".to_owned()
4419        );
4420
4421        assert_eq!(
4422            replace_cell_names("ABC\"asd\"123", (1, 0)).unwrap(),
4423            "ABC\"asd\"123".to_owned()
4424        );
4425
4426        // Column ranges
4427        assert_eq!(
4428            replace_cell_names("SUM(E:F)", (0, 1)).unwrap(),
4429            "SUM(F:G)".to_owned()
4430        );
4431        assert_eq!(
4432            replace_cell_names("SUM($E:$F)", (0, 1)).unwrap(),
4433            "SUM($E:$F)".to_owned()
4434        );
4435        assert_eq!(
4436            replace_cell_names("SUM($E:F)", (0, 1)).unwrap(),
4437            "SUM($E:G)".to_owned()
4438        );
4439
4440        // Row ranges
4441        assert_eq!(
4442            replace_cell_names("SUM(5:6)", (1, 0)).unwrap(),
4443            "SUM(6:7)".to_owned()
4444        );
4445        assert_eq!(
4446            replace_cell_names("SUM($5:$6)", (1, 0)).unwrap(),
4447            "SUM($5:$6)".to_owned()
4448        );
4449        assert_eq!(
4450            replace_cell_names("SUM($5:6)", (1, 0)).unwrap(),
4451            "SUM($5:7)".to_owned()
4452        );
4453
4454        // Mixed with cell references
4455        assert_eq!(
4456            replace_cell_names("SUM(A1:A5,E:F)", (0, 1)).unwrap(),
4457            "SUM(B1:B5,F:G)".to_owned()
4458        );
4459
4460        // Invalid syntax
4461        assert_eq!(
4462            replace_cell_names(
4463                "Valid: A1 Invalid: A1B1 A1$ $$A1 $A$$1 A$$1 A:1 1:A 1 A A1:1 A1:B A$A1 A1$2 $1 $A Valid: C1:D1",
4464                (1, 1)
4465            )
4466            .unwrap(),
4467            "Valid: B2 Invalid: A1B1 A1$ $$A1 $A$$1 A$$1 A:1 1:A 1 A A1:1 A1:B A$A1 A1$2 $1 $A Valid: D2:E2"
4468                .to_owned()
4469        );
4470    }
4471
4472    #[test]
4473    fn test_read_shared_strings_with_namespaced_si_name() {
4474        let shared_strings_data = br#"<?xml version="1.0" encoding="utf-8"?>
4475<x:sst count="1187" uniqueCount="1187" xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4476    <x:si>
4477        <x:t>String 1</x:t>
4478    </x:si>
4479    <x:si>
4480        <x:r>
4481            <x:rPr>
4482                <x:sz val="11"/>
4483            </x:rPr>
4484            <x:t>String 2</x:t>
4485        </x:r>
4486    </x:si>
4487    <x:si>
4488        <x:r>
4489            <x:t>String 3</x:t>
4490        </x:r>
4491    </x:si>
4492</x:sst>"#;
4493
4494        let mut buf = [0; 1000];
4495        let mut zip_writer = ZipWriter::new(std::io::Cursor::new(&mut buf[..]));
4496        let options =
4497            SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
4498        zip_writer
4499            .start_file("xl/sharedStrings.xml", options)
4500            .unwrap();
4501        zip_writer.write_all(shared_strings_data).unwrap();
4502        let zip_size = zip_writer.finish().unwrap().position() as usize;
4503
4504        let zip = ZipArchive::new(std::io::Cursor::new(&buf[..zip_size])).unwrap();
4505
4506        let mut xlsx = Xlsx {
4507            zip,
4508            strings: vec![],
4509            sheets: vec![],
4510            tables: None,
4511            formats: vec![],
4512            styles: Vec::new(),
4513            is_1904: false,
4514            metadata: Metadata::default(),
4515            #[cfg(feature = "picture")]
4516            pictures: None,
4517            merged_regions: None,
4518            options: XlsxOptions::default(),
4519        };
4520
4521        assert!(xlsx.read_shared_strings().is_ok());
4522        assert_eq!(3, xlsx.strings.len());
4523        assert_eq!(Data::String("String 1".to_string()), xlsx.strings[0]);
4524        assert_eq!(Data::String("String 2".to_string()), xlsx.strings[1]);
4525        assert_eq!(Data::String("String 3".to_string()), xlsx.strings[2]);
4526    }
4527}