Skip to main content

excel_rs/
sheet.rs

1use std::io::{Seek, Write};
2
3use crate::error::{ExcelError, Result};
4use zip::{write::SimpleFileOptions, ZipWriter};
5
6const MAX_ROWS_BUFFER: u32 = 100_000;
7
8/// A single worksheet within an XLSX workbook.
9///
10/// Obtained from [`WorkBook::new_worksheet`](crate::WorkBook::new_worksheet).
11/// Write rows with [`write_row`](Sheet::write_row), then call
12/// [`close`](Sheet::close) when finished.
13///
14/// # Example
15///
16/// ```no_run
17/// # use excel_rs::{WorkBook, sheet::CellType};
18/// # use std::io::Cursor;
19/// # let mut wb = WorkBook::new(Cursor::new(vec![]));
20/// let mut sheet = wb.new_worksheet("My Sheet".to_string()).unwrap();
21/// sheet.write_row([b"Col A".as_ref(), b"Col B"].into_iter(), None).unwrap();
22/// sheet.close().unwrap();
23/// ```
24pub struct Sheet<'a, W: Write + Seek> {
25    /// Name of this sheet as it appears in the Excel tab bar.
26    pub name: String,
27    current_row_num: u32,
28    sheet_buf: &'a mut ZipWriter<W>,
29    global_shared_vec: Vec<u8>,
30}
31
32/// The data type of a cell value.
33///
34/// Pass a slice of `CellType` as `type_hints` to [`Sheet::write_row`] to
35/// control how each column is encoded in the XLSX file.  When in doubt, use
36/// [`CellType::String`].
37#[derive(Clone, Debug)]
38pub enum CellType {
39    /// Plain text. Written as `<c t="str">`.
40    String,
41    /// Date/time stored as a number with a date format applied. Written as `<c t="n" s="1">`.
42    Date,
43    /// Boolean. Written as `<c t="b">`.
44    Boolean,
45    /// Numeric (integer or float). Written as `<c t="n">`.
46    Number,
47}
48
49impl CellType {
50    #[inline(always)]
51    fn as_static_bytes(&self) -> &'static [u8] {
52        match self {
53            CellType::String => b"str",
54            CellType::Date => b"n\" s=\"1",
55            CellType::Boolean => b"b",
56            CellType::Number => b"n",
57        }
58    }
59}
60
61fn write_escaped(out: &mut Vec<u8>, bytes: &[u8]) {
62    // just learnt of SIMD instructions and this resulted in ~5% perf boost
63    // i'm assuming that cells needing escapes are relatively rarer than cells containing normal text
64    if memchr::memchr3(b'<', b'>', b'&', bytes).is_none()
65    // && memchr::memchr2(b'&', b'"', bytes).is_none()
66    {
67        out.extend_from_slice(bytes);
68        return;
69    }
70
71    let mut start = 0;
72    for (i, &b) in bytes.iter().enumerate() {
73        let escape: &[u8] = match b {
74            b'<' => b"&lt;",
75            b'>' => b"&gt;",
76            // b'\'' => b"&apos;",
77            b'&' => b"&amp;",
78            // b'"' => b"&quot;",
79            _ => continue,
80        };
81        out.extend_from_slice(&bytes[start..i]);
82        out.extend_from_slice(escape);
83        start = i + 1;
84    }
85    out.extend_from_slice(&bytes[start..]);
86}
87
88impl<'a, W: Write + Seek> Sheet<'a, W> {
89    /// Returns the 1-based index of the last row written (0 before any rows are written).
90    #[inline]
91    pub fn current_row(&self) -> u32 {
92        self.current_row_num
93    }
94
95    pub(crate) fn new(name: String, id: u16, writer: &'a mut ZipWriter<W>) -> Result<Self> {
96        let options = SimpleFileOptions::default()
97            .compression_method(zip::CompressionMethod::Deflated)
98            .compression_level(Some(1))
99            .large_file(true);
100
101        writer.start_file(format!("xl/worksheets/sheet{}.xml", id), options)?;
102        writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"><sheetData>")?;
103
104        Ok(Sheet {
105            sheet_buf: writer,
106            name,
107            current_row_num: 0,
108            global_shared_vec: Vec::with_capacity(64 * 1024 * 1024),
109        })
110    }
111
112    pub fn write_row<'b>(
113        &mut self,
114        cells: impl Iterator<Item = &'b [u8]>,
115        type_hints: Option<&[CellType]>,
116    ) -> Result<()> {
117        self.current_row_num += 1;
118
119        if self.current_row_num > crate::MAX_ROWS {
120            return Err(ExcelError::RowLimitExceeded);
121        }
122
123        self.global_shared_vec.extend_from_slice(b"<row>");
124
125        for (col, cell) in cells.enumerate() {
126            if col >= crate::MAX_COLS {
127                return Err(ExcelError::ColumnLimitExceeded);
128            }
129
130            self.global_shared_vec.extend_from_slice(b"<c t=\"");
131            self.global_shared_vec.extend_from_slice(type_hints.map_or(
132                CellType::String.as_static_bytes(),
133                |x| {
134                    x.get(col)
135                        .map_or(CellType::String.as_static_bytes(), |x| x.as_static_bytes())
136                },
137            ));
138
139            self.global_shared_vec.extend_from_slice(b"\"><v>");
140            write_escaped(&mut self.global_shared_vec, cell);
141            self.global_shared_vec.extend_from_slice(b"</v></c>");
142        }
143
144        self.global_shared_vec.extend_from_slice(b"</row>");
145
146        if self.current_row_num.is_multiple_of(MAX_ROWS_BUFFER) {
147            self.flush()?;
148        }
149
150        Ok(())
151    }
152
153    fn flush(&mut self) -> Result<()> {
154        self.sheet_buf.write_all(&self.global_shared_vec)?;
155        self.global_shared_vec.clear();
156        Ok(())
157    }
158
159    pub fn close(&mut self) -> Result<()> {
160        self.flush()?;
161        self.sheet_buf.write_all(b"</sheetData></worksheet>")?;
162        Ok(())
163    }
164}