Skip to main content

excel_rs/
sheet.rs

1use std::io::{Seek, Write};
2
3use crate::error::{ExcelError, Result};
4use zip::{write::SimpleFileOptions, ZipWriter};
5
6const MAX_ROWS_BUFFER: u32 = 100_000;
7
8/// A single worksheet within an XLSX workbook.
9///
10/// Obtained from [`WorkBook::new_worksheet`](crate::WorkBook::new_worksheet).
11/// Write rows with [`write_row`](Sheet::write_row), then call
12/// [`close`](Sheet::close) when finished.
13///
14/// # Example
15///
16/// ```no_run
17/// # use excel_rs::{WorkBook, sheet::CellType};
18/// # use std::io::Cursor;
19/// # let mut wb = WorkBook::new(Cursor::new(vec![]));
20/// let mut sheet = wb.new_worksheet("My Sheet".to_string()).unwrap();
21/// sheet.write_row([b"Col A".as_ref(), b"Col B"].into_iter(), None).unwrap();
22/// sheet.close().unwrap();
23/// ```
24pub struct Sheet<'a, W: Write + Seek> {
25    /// Name of this sheet as it appears in the Excel tab bar.
26    pub name: String,
27    current_row_num: u32,
28    sheet_buf: &'a mut ZipWriter<W>,
29    global_shared_vec: Vec<u8>,
30}
31
32/// The data type of a cell value.
33///
34/// Pass a slice of `CellType` as `type_hints` to [`Sheet::write_row`] to
35/// control how each column is encoded in the XLSX file.  When in doubt, use
36/// [`CellType::String`].
37#[derive(Clone, Debug)]
38pub enum CellType {
39    /// Plain text. Written as `<c t="str">`.
40    String,
41    /// Date/time stored as a number with a date format applied. Written as `<c t="n" s="1">`.
42    Date,
43    /// Boolean. Written as `<c t="b">`.
44    Boolean,
45    /// Numeric (integer or float). Written as `<c t="n">`.
46    Number,
47}
48
49impl CellType {
50    #[inline(always)]
51    fn as_static_bytes(&self) -> &'static [u8] {
52        match self {
53            CellType::String => b"str",
54            CellType::Date => b"n\" s=\"1",
55            CellType::Boolean => b"b",
56            CellType::Number => b"n",
57        }
58    }
59}
60
61// max col count of 16384 means that the largest column is XFD, which fits in 3 bytes
62static COL_LOOKUP: [[u8; 3]; crate::MAX_COLS] = build_col_lookup();
63
64const fn build_col_lookup() -> [[u8; 3]; crate::MAX_COLS] {
65    let mut table = [[0u8; 3]; crate::MAX_COLS];
66    let mut col = 0usize;
67
68    while col < crate::MAX_COLS {
69        let mut col_buf = [0u8; 3];
70        let mut col_len = 0usize;
71        let mut c = col as i32;
72
73        // constructs the column ref backwards. so XFD would be [D, F, X]
74        loop {
75            col_buf[col_len] = b'A' + (c % 26) as u8;
76            col_len += 1;
77            c = c / 26 - 1;
78            if c < 0 {
79                break;
80            }
81        }
82
83        col_buf.reverse();
84        table[col] = col_buf;
85        col += 1;
86    }
87
88    table
89}
90
91// function is quite small so i figured best to inline it. i'm not sure if this makes much of a difference
92#[inline(always)]
93fn get_col_letters(col: usize) -> &'static [u8] {
94    let entry = &COL_LOOKUP[col];
95    let start = entry.iter().position(|&b| b != 0).unwrap_or(2);
96    &entry[start..]
97}
98
99fn get_ref_id(col: usize, row: &[u8], row_len: usize) -> ([u8; 10], usize) {
100    let mut final_arr = [0u8; 10];
101    let col_letter = get_col_letters(col);
102    let col_len = col_letter.len();
103
104    final_arr[..col_len].copy_from_slice(col_letter);
105    final_arr[col_len..col_len + row_len].copy_from_slice(row);
106
107    (final_arr, col_len + row_len)
108}
109
110// rows are fit into 7 bytes because the max number of rows in excel is only in the millions
111fn num_to_bytes(n: u32) -> ([u8; 7], usize) {
112    // convert from number to string manually
113    let mut buffer = [0; 7];
114    let mut pos = 7;
115    let mut row = n;
116    loop {
117        pos -= 1;
118        buffer[pos] = b'0' + (row % 10) as u8;
119        row /= 10;
120        if row == 0 {
121            break;
122        }
123    }
124    (buffer, pos)
125}
126
127fn write_escaped(out: &mut Vec<u8>, bytes: &[u8]) {
128    // just learnt of SIMD instructions and this resulted in ~5% perf boost
129    // i'm assuming that cells needing escapes are relatively rarer than cells containing normal text
130    if memchr::memchr3(b'<', b'>', b'&', bytes).is_none()
131    // && memchr::memchr2(b'&', b'"', bytes).is_none()
132    {
133        out.extend_from_slice(bytes);
134        return;
135    }
136
137    let mut start = 0;
138    for (i, &b) in bytes.iter().enumerate() {
139        let escape: &[u8] = match b {
140            b'<' => b"&lt;",
141            b'>' => b"&gt;",
142            // b'\'' => b"&apos;",
143            b'&' => b"&amp;",
144            // b'"' => b"&quot;",
145            _ => continue,
146        };
147        out.extend_from_slice(&bytes[start..i]);
148        out.extend_from_slice(escape);
149        start = i + 1;
150    }
151    out.extend_from_slice(&bytes[start..]);
152}
153
154impl<'a, W: Write + Seek> Sheet<'a, W> {
155    /// Returns the 1-based index of the last row written (0 before any rows are written).
156    #[inline]
157    pub fn current_row(&self) -> u32 {
158        self.current_row_num
159    }
160
161    pub(crate) fn new(name: String, id: u16, writer: &'a mut ZipWriter<W>) -> Result<Self> {
162        let options = SimpleFileOptions::default()
163            .compression_method(zip::CompressionMethod::Deflated)
164            .compression_level(Some(1))
165            .large_file(true);
166
167        writer.start_file(format!("xl/worksheets/sheet{}.xml", id), options)?;
168        writer.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"><sheetData>")?;
169
170        Ok(Sheet {
171            sheet_buf: writer,
172            name,
173            current_row_num: 0,
174            global_shared_vec: Vec::with_capacity(64 * 1024 * 1024),
175        })
176    }
177
178    pub fn write_row<'b>(
179        &mut self,
180        cells: impl Iterator<Item = &'b [u8]>,
181        type_hints: Option<&[CellType]>,
182    ) -> Result<()> {
183        self.current_row_num += 1;
184
185        if self.current_row_num > crate::MAX_ROWS {
186            return Err(ExcelError::RowLimitExceeded);
187        }
188
189        let (row, pos) = num_to_bytes(self.current_row_num);
190
191        self.global_shared_vec.extend_from_slice(b"<row r=\"");
192        self.global_shared_vec.extend_from_slice(&row[pos..]);
193        self.global_shared_vec.extend_from_slice(b"\">");
194
195        let row_len = 7 - pos;
196        for (col, cell) in cells.enumerate() {
197            if col >= crate::MAX_COLS {
198                return Err(ExcelError::ColumnLimitExceeded);
199            }
200
201            let (ref_id, ref_len) = get_ref_id(col, &row[pos..], row_len);
202
203            self.global_shared_vec.extend_from_slice(b"<c r=\"");
204            self.global_shared_vec.extend_from_slice(&ref_id[..ref_len]);
205            self.global_shared_vec.extend_from_slice(b"\" t=\"");
206            self.global_shared_vec.extend_from_slice(type_hints.map_or(
207                CellType::String.as_static_bytes(),
208                |x| {
209                    x.get(col)
210                        .map_or(CellType::String.as_static_bytes(), |x| x.as_static_bytes())
211                },
212            ));
213
214            self.global_shared_vec.extend_from_slice(b"\"><v>");
215            write_escaped(&mut self.global_shared_vec, cell);
216            self.global_shared_vec.extend_from_slice(b"</v></c>");
217        }
218
219        self.global_shared_vec.extend_from_slice(b"</row>");
220
221        if self.current_row_num.is_multiple_of(MAX_ROWS_BUFFER) {
222            self.flush()?;
223        }
224
225        Ok(())
226    }
227
228    fn flush(&mut self) -> Result<()> {
229        self.sheet_buf.write_all(&self.global_shared_vec)?;
230        self.global_shared_vec.clear();
231        Ok(())
232    }
233
234    pub fn close(&mut self) -> Result<()> {
235        self.flush()?;
236        self.sheet_buf.write_all(b"</sheetData></worksheet>")?;
237        Ok(())
238    }
239}