Skip to main content

sheetkit_core/
raw_transfer.rs

1//! Read-direction buffer serializer for bulk data transfer.
2//!
3//! Converts a `WorksheetXml` and `SharedStringTable` into a compact binary
4//! buffer that can be transferred to JavaScript as a single `Buffer` object,
5//! avoiding per-cell napi object creation overhead.
6//!
7//! Binary format (little-endian throughout):
8//!
9//! ```text
10//! HEADER (16 bytes)
11//!   magic:     u32  = 0x534B5244 ("SKRD")
12//!   version:   u16  = 1
13//!   row_count: u32  = number of rows
14//!   col_count: u16  = number of columns
15//!   flags:     u32  = bit 0: 1=sparse, 0=dense
16//!
17//! ROW INDEX (row_count * 8 bytes)
18//!   per row: row_number (u32) + offset (u32) into CELL DATA
19//!   offset = 0xFFFFFFFF for empty rows
20//!
21//! STRING TABLE
22//!   count:     u32
23//!   blob_size: u32
24//!   offsets:   u32[count] (byte offset within blob)
25//!   blob:      concatenated UTF-8 strings (blob_size bytes)
26//!
27//! CELL DATA
28//!   Dense:  row_count * col_count * 9 bytes
29//!     per cell: type (u8) + payload (8 bytes)
30//!   Sparse: variable length
31//!     per row: cell_count (u16) + cell_count * 11 bytes
32//!       per cell: col (u16) + type (u8) + payload (8 bytes)
33//! ```
34
35use std::collections::HashMap;
36
37use crate::error::Result;
38use crate::sst::SharedStringTable;
39use crate::utils::cell_ref::cell_name_to_coordinates;
40use sheetkit_xml::worksheet::{CellTypeTag, WorksheetXml};
41
42pub const MAGIC: u32 = 0x534B5244;
43pub const VERSION: u16 = 1;
44pub const HEADER_SIZE: usize = 16;
45pub const CELL_STRIDE: usize = 9;
46
47pub const TYPE_EMPTY: u8 = 0x00;
48pub const TYPE_NUMBER: u8 = 0x01;
49pub const TYPE_STRING: u8 = 0x02;
50pub const TYPE_BOOL: u8 = 0x03;
51pub const TYPE_DATE: u8 = 0x04;
52pub const TYPE_ERROR: u8 = 0x05;
53pub const TYPE_FORMULA: u8 = 0x06;
54pub const TYPE_RICH_STRING: u8 = 0x07;
55
56pub const FLAG_SPARSE: u32 = 0x01;
57
58const SPARSE_DENSITY_THRESHOLD: f64 = 0.30;
59const EMPTY_ROW_OFFSET: u32 = 0xFFFF_FFFF;
60const SPARSE_CELL_STRIDE: usize = 11;
61
62/// Serialize a worksheet's cell data into a compact binary buffer.
63///
64/// Reads cell data directly from `WorksheetXml` sheet data, resolving shared
65/// string references via `sst`. The resulting buffer uses either dense or
66/// sparse layout depending on cell density relative to the bounding rectangle.
67pub fn sheet_to_raw_buffer(ws: &WorksheetXml, sst: &SharedStringTable) -> Result<Vec<u8>> {
68    let rows = &ws.sheet_data.rows;
69
70    if rows.is_empty() {
71        return Ok(write_empty_buffer());
72    }
73
74    let (min_row, max_row, min_col, max_col, total_cells) = scan_dimensions(ws)?;
75
76    if total_cells == 0 {
77        return Ok(write_empty_buffer());
78    }
79
80    let row_count = (max_row - min_row + 1) as usize;
81    let col_count = (max_col - min_col + 1) as usize;
82
83    let total_grid = row_count * col_count;
84    let density = total_cells as f64 / total_grid as f64;
85    let sparse = density < SPARSE_DENSITY_THRESHOLD;
86
87    let mut string_table = StringTableBuilder::from_sst(sst);
88
89    let cell_entries = collect_cell_entries(ws, sst, min_col, &mut string_table)?;
90
91    let flags: u32 = (if sparse { FLAG_SPARSE } else { 0 }) | ((min_col & 0xFFFF) << 16);
92
93    let row_index_size = row_count * 8;
94    let string_section = string_table.encode();
95    let cell_data = if sparse {
96        encode_sparse_cells(&cell_entries, min_row, max_row)
97    } else {
98        encode_dense_cells(&cell_entries, min_row, row_count, col_count)
99    };
100    let row_index = build_row_index(&cell_entries, min_row, max_row, col_count, sparse);
101
102    let total_size = HEADER_SIZE + row_index_size + string_section.len() + cell_data.len();
103    let mut buf = Vec::with_capacity(total_size);
104
105    buf.extend_from_slice(&MAGIC.to_le_bytes());
106    buf.extend_from_slice(&VERSION.to_le_bytes());
107    buf.extend_from_slice(&(row_count as u32).to_le_bytes());
108    buf.extend_from_slice(&(col_count as u16).to_le_bytes());
109    buf.extend_from_slice(&flags.to_le_bytes());
110
111    buf.extend_from_slice(&row_index);
112    buf.extend_from_slice(&string_section);
113    buf.extend_from_slice(&cell_data);
114
115    Ok(buf)
116}
117
118fn write_empty_buffer() -> Vec<u8> {
119    let mut buf = Vec::with_capacity(HEADER_SIZE);
120    buf.extend_from_slice(&MAGIC.to_le_bytes());
121    buf.extend_from_slice(&VERSION.to_le_bytes());
122    buf.extend_from_slice(&0u32.to_le_bytes());
123    buf.extend_from_slice(&0u16.to_le_bytes());
124    buf.extend_from_slice(&0u32.to_le_bytes());
125    buf
126}
127
128struct CellEntry {
129    col: u32,
130    type_tag: u8,
131    payload: [u8; 8],
132}
133
134struct RowEntries {
135    row_num: u32,
136    cells: Vec<CellEntry>,
137}
138
139fn scan_dimensions(ws: &WorksheetXml) -> Result<(u32, u32, u32, u32, usize)> {
140    let mut min_row = u32::MAX;
141    let mut max_row = 0u32;
142    let mut min_col = u32::MAX;
143    let mut max_col = 0u32;
144    let mut total_cells = 0usize;
145
146    for row in &ws.sheet_data.rows {
147        if row.cells.is_empty() {
148            continue;
149        }
150        min_row = min_row.min(row.r);
151        max_row = max_row.max(row.r);
152
153        for cell in &row.cells {
154            let col = resolve_col(cell)?;
155            min_col = min_col.min(col);
156            max_col = max_col.max(col);
157            total_cells += 1;
158        }
159    }
160
161    if total_cells == 0 {
162        return Ok((1, 1, 1, 1, 0));
163    }
164
165    Ok((min_row, max_row, min_col, max_col, total_cells))
166}
167
168fn resolve_col(cell: &sheetkit_xml::worksheet::Cell) -> Result<u32> {
169    if cell.col > 0 {
170        return Ok(cell.col);
171    }
172    let (col, _row) = cell_name_to_coordinates(cell.r.as_str())?;
173    Ok(col)
174}
175
176fn collect_cell_entries(
177    ws: &WorksheetXml,
178    sst: &SharedStringTable,
179    min_col: u32,
180    string_table: &mut StringTableBuilder,
181) -> Result<Vec<RowEntries>> {
182    let mut result = Vec::with_capacity(ws.sheet_data.rows.len());
183
184    for row in &ws.sheet_data.rows {
185        if row.cells.is_empty() {
186            continue;
187        }
188
189        let mut cells = Vec::with_capacity(row.cells.len());
190        for cell in &row.cells {
191            let col = resolve_col(cell)?;
192            let relative_col = col - min_col;
193            let (type_tag, payload) = encode_cell_value(cell, sst, string_table)?;
194            cells.push(CellEntry {
195                col: relative_col,
196                type_tag,
197                payload,
198            });
199        }
200
201        result.push(RowEntries {
202            row_num: row.r,
203            cells,
204        });
205    }
206
207    Ok(result)
208}
209
210fn encode_cell_value(
211    cell: &sheetkit_xml::worksheet::Cell,
212    sst: &SharedStringTable,
213    string_table: &mut StringTableBuilder,
214) -> Result<(u8, [u8; 8])> {
215    let mut payload = [0u8; 8];
216
217    if cell.f.is_some() {
218        let formula_expr = cell
219            .f
220            .as_ref()
221            .and_then(|f| f.value.as_deref())
222            .unwrap_or("");
223        let idx = string_table.intern(formula_expr);
224        payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
225        return Ok((TYPE_FORMULA, payload));
226    }
227
228    match cell.t {
229        CellTypeTag::SharedString => {
230            if let Some(ref v) = cell.v {
231                if let Ok(sst_idx) = v.parse::<usize>() {
232                    let text = sst.get(sst_idx).unwrap_or("");
233                    let idx = string_table.intern(text);
234                    payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
235                    if sst.get_rich_text(sst_idx).is_some() {
236                        return Ok((TYPE_RICH_STRING, payload));
237                    }
238                    return Ok((TYPE_STRING, payload));
239                }
240            }
241            Ok((TYPE_EMPTY, payload))
242        }
243        CellTypeTag::Boolean => {
244            if let Some(ref v) = cell.v {
245                payload[0] = if v == "1" || v.eq_ignore_ascii_case("true") {
246                    1
247                } else {
248                    0
249                };
250            }
251            Ok((TYPE_BOOL, payload))
252        }
253        CellTypeTag::Error => {
254            let error_text = cell.v.as_deref().unwrap_or("#VALUE!");
255            let idx = string_table.intern(error_text);
256            payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
257            Ok((TYPE_ERROR, payload))
258        }
259        CellTypeTag::InlineString => {
260            let text = cell
261                .is
262                .as_ref()
263                .and_then(|is| is.t.as_deref())
264                .or(cell.v.as_deref())
265                .unwrap_or("");
266            let idx = string_table.intern(text);
267            payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
268            Ok((TYPE_STRING, payload))
269        }
270        CellTypeTag::Date => {
271            if let Some(ref v) = cell.v {
272                if let Ok(n) = v.parse::<f64>() {
273                    payload.copy_from_slice(&n.to_le_bytes());
274                    return Ok((TYPE_DATE, payload));
275                }
276            }
277            Ok((TYPE_EMPTY, payload))
278        }
279        CellTypeTag::FormulaString => {
280            if let Some(ref v) = cell.v {
281                let idx = string_table.intern(v);
282                payload[..4].copy_from_slice(&(idx as u32).to_le_bytes());
283                return Ok((TYPE_STRING, payload));
284            }
285            Ok((TYPE_EMPTY, payload))
286        }
287        CellTypeTag::None | CellTypeTag::Number => {
288            if let Some(ref v) = cell.v {
289                if let Ok(n) = v.parse::<f64>() {
290                    payload.copy_from_slice(&n.to_le_bytes());
291                    return Ok((TYPE_NUMBER, payload));
292                }
293            }
294            Ok((TYPE_EMPTY, payload))
295        }
296    }
297}
298
299struct StringTableBuilder {
300    strings: Vec<String>,
301    index_map: HashMap<String, usize>,
302}
303
304impl StringTableBuilder {
305    fn from_sst(sst: &SharedStringTable) -> Self {
306        let count = sst.len();
307        let mut strings = Vec::with_capacity(count);
308        let mut index_map = HashMap::with_capacity(count);
309
310        for i in 0..count {
311            if let Some(s) = sst.get(i) {
312                let owned = s.to_string();
313                index_map.entry(owned.clone()).or_insert(i);
314                strings.push(owned);
315            }
316        }
317
318        Self { strings, index_map }
319    }
320
321    fn intern(&mut self, s: &str) -> usize {
322        if let Some(&idx) = self.index_map.get(s) {
323            return idx;
324        }
325        let idx = self.strings.len();
326        self.strings.push(s.to_string());
327        self.index_map.insert(s.to_string(), idx);
328        idx
329    }
330
331    /// Encode the string table section: count(u32) + blob_size(u32) + offsets(u32[count]) + blob.
332    fn encode(&self) -> Vec<u8> {
333        let count = self.strings.len() as u32;
334        if count == 0 {
335            let mut buf = Vec::with_capacity(8);
336            buf.extend_from_slice(&0u32.to_le_bytes());
337            buf.extend_from_slice(&0u32.to_le_bytes());
338            return buf;
339        }
340
341        let mut blob = Vec::new();
342        let mut offsets = Vec::with_capacity(self.strings.len());
343        for s in &self.strings {
344            offsets.push(blob.len() as u32);
345            blob.extend_from_slice(s.as_bytes());
346        }
347        let blob_size = blob.len() as u32;
348
349        let total = 4 + 4 + self.strings.len() * 4 + blob.len();
350        let mut buf = Vec::with_capacity(total);
351        buf.extend_from_slice(&count.to_le_bytes());
352        buf.extend_from_slice(&blob_size.to_le_bytes());
353        for off in &offsets {
354            buf.extend_from_slice(&off.to_le_bytes());
355        }
356        buf.extend_from_slice(&blob);
357        buf
358    }
359}
360
361fn encode_dense_cells(
362    row_entries: &[RowEntries],
363    min_row: u32,
364    row_count: usize,
365    col_count: usize,
366) -> Vec<u8> {
367    let total = row_count * col_count * CELL_STRIDE;
368    let mut buf = vec![0u8; total];
369
370    for re in row_entries {
371        let row_offset = (re.row_num - min_row) as usize;
372        for ce in &re.cells {
373            let cell_idx = row_offset * col_count + ce.col as usize;
374            let pos = cell_idx * CELL_STRIDE;
375            buf[pos] = ce.type_tag;
376            buf[pos + 1..pos + 9].copy_from_slice(&ce.payload);
377        }
378    }
379
380    buf
381}
382
383fn encode_sparse_cells(row_entries: &[RowEntries], min_row: u32, max_row: u32) -> Vec<u8> {
384    let total_rows = (max_row - min_row + 1) as usize;
385    let mut entries_by_row: Vec<Option<&RowEntries>> = vec![None; total_rows];
386    for re in row_entries {
387        let idx = (re.row_num - min_row) as usize;
388        entries_by_row[idx] = Some(re);
389    }
390
391    let mut buf = Vec::new();
392    for entry in &entries_by_row {
393        match entry {
394            Some(re) => {
395                let count = re.cells.len() as u16;
396                buf.extend_from_slice(&count.to_le_bytes());
397                for ce in &re.cells {
398                    buf.extend_from_slice(&(ce.col as u16).to_le_bytes());
399                    buf.push(ce.type_tag);
400                    buf.extend_from_slice(&ce.payload);
401                }
402            }
403            None => {
404                buf.extend_from_slice(&0u16.to_le_bytes());
405            }
406        }
407    }
408
409    buf
410}
411
412fn build_row_index(
413    row_entries: &[RowEntries],
414    min_row: u32,
415    max_row: u32,
416    col_count: usize,
417    sparse: bool,
418) -> Vec<u8> {
419    let total_rows = (max_row - min_row + 1) as usize;
420    let mut index = Vec::with_capacity(total_rows * 8);
421
422    let mut entries_map: HashMap<u32, &RowEntries> = HashMap::new();
423    for re in row_entries {
424        entries_map.insert(re.row_num, re);
425    }
426
427    if sparse {
428        let mut sparse_offset = 0u32;
429        for row_num in min_row..=max_row {
430            index.extend_from_slice(&row_num.to_le_bytes());
431            if let Some(re) = entries_map.get(&row_num) {
432                if re.cells.is_empty() {
433                    index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
434                    sparse_offset += 2;
435                } else {
436                    index.extend_from_slice(&sparse_offset.to_le_bytes());
437                    sparse_offset += 2 + (re.cells.len() as u32) * SPARSE_CELL_STRIDE as u32;
438                }
439            } else {
440                index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
441                sparse_offset += 2;
442            }
443        }
444    } else {
445        for row_num in min_row..=max_row {
446            index.extend_from_slice(&row_num.to_le_bytes());
447            if entries_map.contains_key(&row_num) {
448                let offset = ((row_num - min_row) as usize * col_count * CELL_STRIDE) as u32;
449                index.extend_from_slice(&offset.to_le_bytes());
450            } else {
451                index.extend_from_slice(&EMPTY_ROW_OFFSET.to_le_bytes());
452            }
453        }
454    }
455
456    index
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462    use sheetkit_xml::worksheet::{
463        Cell, CellFormula, CellTypeTag, CompactCellRef, InlineString, Row, SheetData, WorksheetXml,
464    };
465
466    fn make_cell(col_ref: &str, col_num: u32, t: CellTypeTag, v: Option<&str>) -> Cell {
467        Cell {
468            r: CompactCellRef::new(col_ref),
469            col: col_num,
470            s: None,
471            t,
472            v: v.map(|s| s.to_string()),
473            f: None,
474            is: None,
475        }
476    }
477
478    fn make_row(row_num: u32, cells: Vec<Cell>) -> Row {
479        Row {
480            r: row_num,
481            spans: None,
482            s: None,
483            custom_format: None,
484            ht: None,
485            hidden: None,
486            custom_height: None,
487            outline_level: None,
488            cells,
489        }
490    }
491
492    fn make_worksheet(rows: Vec<Row>) -> WorksheetXml {
493        let mut ws = WorksheetXml::default();
494        ws.sheet_data = SheetData { rows };
495        ws
496    }
497
498    fn make_sst(strings: &[&str]) -> SharedStringTable {
499        let mut sst = SharedStringTable::new();
500        for s in strings {
501            sst.add(s);
502        }
503        sst
504    }
505
506    fn read_u32_le(buf: &[u8], offset: usize) -> u32 {
507        u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap())
508    }
509
510    fn read_u16_le(buf: &[u8], offset: usize) -> u16 {
511        u16::from_le_bytes(buf[offset..offset + 2].try_into().unwrap())
512    }
513
514    /// Parse a buffer and return (row_index_end, string_section_end, cell_data_start, flags).
515    fn parse_sections(buf: &[u8]) -> (usize, usize, usize, u32) {
516        let row_count = read_u32_le(buf, 6) as usize;
517        let flags = read_u32_le(buf, 12);
518        let row_index_end = HEADER_SIZE + row_count * 8;
519        let string_count = read_u32_le(buf, row_index_end) as usize;
520        let blob_size = read_u32_le(buf, row_index_end + 4) as usize;
521        let string_section_end = row_index_end + 8 + string_count * 4 + blob_size;
522        (row_index_end, string_section_end, string_section_end, flags)
523    }
524
525    /// Read a string from the string table by index.
526    fn read_string(buf: &[u8], string_section_start: usize, idx: usize) -> String {
527        let count = read_u32_le(buf, string_section_start) as usize;
528        let blob_size = read_u32_le(buf, string_section_start + 4) as usize;
529        assert!(
530            idx < count,
531            "string index {idx} out of range (count={count})"
532        );
533        let offsets_start = string_section_start + 8;
534        let blob_start = offsets_start + count * 4;
535
536        let start = read_u32_le(buf, offsets_start + idx * 4) as usize;
537        let end = if idx + 1 < count {
538            read_u32_le(buf, offsets_start + (idx + 1) * 4) as usize
539        } else {
540            blob_size
541        };
542        String::from_utf8(buf[blob_start + start..blob_start + end].to_vec()).unwrap()
543    }
544
545    /// Read the cell type tag and payload from cell data at a given position.
546    fn read_cell_at(
547        buf: &[u8],
548        cell_data_start: usize,
549        is_sparse: bool,
550        cell_index: usize,
551    ) -> (u8, &[u8]) {
552        if is_sparse {
553            panic!("use read_sparse_row for sparse format");
554        }
555        let pos = cell_data_start + cell_index * CELL_STRIDE;
556        (buf[pos], &buf[pos + 1..pos + 9])
557    }
558
559    /// Read sparse row: returns vec of (col, type_tag, payload_slice).
560    fn read_sparse_row<'a>(buf: &'a [u8], row_offset: usize) -> Vec<(u16, u8, &'a [u8])> {
561        let cell_count = read_u16_le(buf, row_offset) as usize;
562        let mut result = Vec::with_capacity(cell_count);
563        let mut pos = row_offset + 2;
564        for _ in 0..cell_count {
565            let col = read_u16_le(buf, pos);
566            let type_tag = buf[pos + 2];
567            let payload = &buf[pos + 3..pos + 11];
568            result.push((col, type_tag, payload));
569            pos += SPARSE_CELL_STRIDE;
570        }
571        result
572    }
573
574    #[test]
575    fn test_empty_sheet() {
576        let ws = make_worksheet(vec![]);
577        let sst = SharedStringTable::new();
578        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
579
580        assert_eq!(buf.len(), HEADER_SIZE);
581        assert_eq!(read_u32_le(&buf, 0), MAGIC);
582        assert_eq!(read_u16_le(&buf, 4), VERSION);
583        assert_eq!(read_u32_le(&buf, 6), 0);
584        assert_eq!(read_u16_le(&buf, 10), 0);
585        assert_eq!(read_u32_le(&buf, 12), 0);
586    }
587
588    #[test]
589    fn test_single_number_cell() {
590        let ws = make_worksheet(vec![make_row(
591            1,
592            vec![make_cell("A1", 1, CellTypeTag::None, Some("42.5"))],
593        )]);
594        let sst = SharedStringTable::new();
595        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
596
597        assert_eq!(read_u32_le(&buf, 0), MAGIC);
598        let row_count = read_u32_le(&buf, 6);
599        let col_count = read_u16_le(&buf, 10) as usize;
600        assert_eq!(row_count, 1);
601        assert_eq!(col_count, 1);
602
603        let (st_start, _, cd_start, flags) = parse_sections(&buf);
604        let is_sparse = flags & FLAG_SPARSE != 0;
605
606        if is_sparse {
607            let cells = read_sparse_row(&buf, cd_start);
608            assert_eq!(cells.len(), 1);
609            assert_eq!(cells[0].1, TYPE_NUMBER);
610            let val = f64::from_le_bytes(cells[0].2.try_into().unwrap());
611            assert!((val - 42.5).abs() < f64::EPSILON);
612        } else {
613            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
614            assert_eq!(tag, TYPE_NUMBER);
615            let val = f64::from_le_bytes(payload.try_into().unwrap());
616            assert!((val - 42.5).abs() < f64::EPSILON);
617        }
618
619        let _ = st_start;
620    }
621
622    #[test]
623    fn test_string_cell_sst() {
624        let sst = make_sst(&["Hello", "World"]);
625        let ws = make_worksheet(vec![make_row(
626            1,
627            vec![make_cell("A1", 1, CellTypeTag::SharedString, Some("1"))],
628        )]);
629        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
630
631        let (st_start, _, cd_start, flags) = parse_sections(&buf);
632        let is_sparse = flags & FLAG_SPARSE != 0;
633
634        let string_count = read_u32_le(&buf, st_start);
635        assert!(string_count >= 2);
636
637        let s0 = read_string(&buf, st_start, 0);
638        assert_eq!(s0, "Hello");
639        let s1 = read_string(&buf, st_start, 1);
640        assert_eq!(s1, "World");
641
642        let str_idx = if is_sparse {
643            let cells = read_sparse_row(&buf, cd_start);
644            assert_eq!(cells[0].1, TYPE_STRING);
645            u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize
646        } else {
647            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
648            assert_eq!(tag, TYPE_STRING);
649            u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize
650        };
651        assert_eq!(str_idx, 1, "should reference SST index 1 = 'World'");
652    }
653
654    #[test]
655    fn test_bool_cell() {
656        let sst = SharedStringTable::new();
657        let ws = make_worksheet(vec![make_row(
658            1,
659            vec![
660                make_cell("A1", 1, CellTypeTag::Boolean, Some("1")),
661                make_cell("B1", 2, CellTypeTag::Boolean, Some("0")),
662            ],
663        )]);
664        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
665
666        let col_count = read_u16_le(&buf, 10) as usize;
667        assert_eq!(col_count, 2);
668
669        let (_, _, cd_start, flags) = parse_sections(&buf);
670        let is_sparse = flags & FLAG_SPARSE != 0;
671
672        if is_sparse {
673            let cells = read_sparse_row(&buf, cd_start);
674            assert_eq!(cells.len(), 2);
675            assert_eq!(cells[0].1, TYPE_BOOL);
676            assert_eq!(cells[0].2[0], 1);
677            assert_eq!(cells[1].1, TYPE_BOOL);
678            assert_eq!(cells[1].2[0], 0);
679        } else {
680            let (tag0, payload0) = read_cell_at(&buf, cd_start, false, 0);
681            assert_eq!(tag0, TYPE_BOOL);
682            assert_eq!(payload0[0], 1);
683            let (tag1, payload1) = read_cell_at(&buf, cd_start, false, 1);
684            assert_eq!(tag1, TYPE_BOOL);
685            assert_eq!(payload1[0], 0);
686        }
687    }
688
689    #[test]
690    fn test_error_cell() {
691        let sst = SharedStringTable::new();
692        let ws = make_worksheet(vec![make_row(
693            1,
694            vec![make_cell("A1", 1, CellTypeTag::Error, Some("#DIV/0!"))],
695        )]);
696        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
697
698        let (st_start, _, cd_start, flags) = parse_sections(&buf);
699        let is_sparse = flags & FLAG_SPARSE != 0;
700
701        let string_count = read_u32_le(&buf, st_start);
702        assert!(string_count >= 1);
703
704        let type_tag = if is_sparse {
705            let cells = read_sparse_row(&buf, cd_start);
706            cells[0].1
707        } else {
708            let (tag, _) = read_cell_at(&buf, cd_start, false, 0);
709            tag
710        };
711        assert_eq!(type_tag, TYPE_ERROR);
712
713        let error_str = read_string(&buf, st_start, 0);
714        assert_eq!(error_str, "#DIV/0!");
715    }
716
717    #[test]
718    fn test_formula_cell() {
719        let sst = SharedStringTable::new();
720        let mut cell = make_cell("A1", 1, CellTypeTag::None, Some("84"));
721        cell.f = Some(Box::new(CellFormula {
722            t: None,
723            reference: None,
724            si: None,
725            value: Some("A2+B2".to_string()),
726        }));
727        let ws = make_worksheet(vec![make_row(1, vec![cell])]);
728        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
729
730        let (st_start, _, cd_start, flags) = parse_sections(&buf);
731        let is_sparse = flags & FLAG_SPARSE != 0;
732
733        let (type_tag, str_idx) = if is_sparse {
734            let cells = read_sparse_row(&buf, cd_start);
735            let idx = u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize;
736            (cells[0].1, idx)
737        } else {
738            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
739            let idx = u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize;
740            (tag, idx)
741        };
742
743        assert_eq!(type_tag, TYPE_FORMULA);
744        let formula = read_string(&buf, st_start, str_idx);
745        assert_eq!(formula, "A2+B2");
746    }
747
748    #[test]
749    fn test_inline_string_cell() {
750        let sst = SharedStringTable::new();
751        let mut cell = make_cell("A1", 1, CellTypeTag::InlineString, None);
752        cell.is = Some(Box::new(InlineString {
753            t: Some("Inline Text".to_string()),
754        }));
755        let ws = make_worksheet(vec![make_row(1, vec![cell])]);
756        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
757
758        let (st_start, _, cd_start, flags) = parse_sections(&buf);
759        let is_sparse = flags & FLAG_SPARSE != 0;
760
761        let (type_tag, str_idx) = if is_sparse {
762            let cells = read_sparse_row(&buf, cd_start);
763            let idx = u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize;
764            (cells[0].1, idx)
765        } else {
766            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
767            let idx = u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize;
768            (tag, idx)
769        };
770
771        assert_eq!(type_tag, TYPE_STRING);
772        let text = read_string(&buf, st_start, str_idx);
773        assert_eq!(text, "Inline Text");
774    }
775
776    #[test]
777    fn test_date_cell() {
778        let sst = SharedStringTable::new();
779        let ws = make_worksheet(vec![make_row(
780            1,
781            vec![make_cell("A1", 1, CellTypeTag::Date, Some("44927.0"))],
782        )]);
783        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
784
785        let (_, _, cd_start, flags) = parse_sections(&buf);
786        let is_sparse = flags & FLAG_SPARSE != 0;
787
788        if is_sparse {
789            let cells = read_sparse_row(&buf, cd_start);
790            assert_eq!(cells[0].1, TYPE_DATE);
791            let val = f64::from_le_bytes(cells[0].2.try_into().unwrap());
792            assert!((val - 44927.0).abs() < f64::EPSILON);
793        } else {
794            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
795            assert_eq!(tag, TYPE_DATE);
796            let val = f64::from_le_bytes(payload.try_into().unwrap());
797            assert!((val - 44927.0).abs() < f64::EPSILON);
798        }
799    }
800
801    #[test]
802    fn test_mixed_types_row() {
803        let sst = make_sst(&["Hello"]);
804        let ws = make_worksheet(vec![make_row(
805            1,
806            vec![
807                make_cell("A1", 1, CellTypeTag::None, Some("3.14")),
808                make_cell("B1", 2, CellTypeTag::SharedString, Some("0")),
809                make_cell("C1", 3, CellTypeTag::Boolean, Some("1")),
810            ],
811        )]);
812        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
813
814        let col_count = read_u16_le(&buf, 10) as usize;
815        assert_eq!(col_count, 3);
816
817        let (_, _, cd_start, flags) = parse_sections(&buf);
818        let is_sparse = flags & FLAG_SPARSE != 0;
819
820        if is_sparse {
821            let cells = read_sparse_row(&buf, cd_start);
822            assert_eq!(cells.len(), 3);
823            assert_eq!(cells[0].1, TYPE_NUMBER);
824            assert_eq!(cells[1].1, TYPE_STRING);
825            assert_eq!(cells[2].1, TYPE_BOOL);
826        } else {
827            let (t0, p0) = read_cell_at(&buf, cd_start, false, 0);
828            assert_eq!(t0, TYPE_NUMBER);
829            let val = f64::from_le_bytes(p0.try_into().unwrap());
830            assert!((val - 3.14).abs() < f64::EPSILON);
831
832            let (t1, _) = read_cell_at(&buf, cd_start, false, 1);
833            assert_eq!(t1, TYPE_STRING);
834
835            let (t2, p2) = read_cell_at(&buf, cd_start, false, 2);
836            assert_eq!(t2, TYPE_BOOL);
837            assert_eq!(p2[0], 1);
838        }
839    }
840
841    #[test]
842    fn test_dense_format() {
843        let sst = SharedStringTable::new();
844        let mut rows = Vec::new();
845        for r in 1..=5u32 {
846            let mut cells = Vec::new();
847            for c in 1..=5u32 {
848                let col_letter = match c {
849                    1 => "A",
850                    2 => "B",
851                    3 => "C",
852                    4 => "D",
853                    5 => "E",
854                    _ => unreachable!(),
855                };
856                let cell_ref = format!("{col_letter}{r}");
857                cells.push(make_cell(
858                    &cell_ref,
859                    c,
860                    CellTypeTag::None,
861                    Some(&format!("{}", r * 10 + c)),
862                ));
863            }
864            rows.push(make_row(r, cells));
865        }
866        let ws = make_worksheet(rows);
867        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
868
869        let flags = read_u32_le(&buf, 12);
870        assert_eq!(
871            flags & FLAG_SPARSE,
872            0,
873            "5x5 fully populated should be dense"
874        );
875
876        let row_count = read_u32_le(&buf, 6) as usize;
877        let col_count = read_u16_le(&buf, 10) as usize;
878        assert_eq!(row_count, 5);
879        assert_eq!(col_count, 5);
880
881        let (_, _, cd_start, _) = parse_sections(&buf);
882        let cell_data_size = row_count * col_count * CELL_STRIDE;
883        assert_eq!(
884            buf.len() - cd_start,
885            cell_data_size,
886            "dense cell data should be exactly row_count * col_count * CELL_STRIDE"
887        );
888
889        for r in 0..5usize {
890            for c in 0..5usize {
891                let idx = r * col_count + c;
892                let (tag, payload) = read_cell_at(&buf, cd_start, false, idx);
893                assert_eq!(tag, TYPE_NUMBER);
894                let val = f64::from_le_bytes(payload.try_into().unwrap());
895                let expected = ((r + 1) * 10 + (c + 1)) as f64;
896                assert!(
897                    (val - expected).abs() < f64::EPSILON,
898                    "cell ({r},{c}) expected {expected}, got {val}"
899                );
900            }
901        }
902    }
903
904    #[test]
905    fn test_sparse_format() {
906        let sst = SharedStringTable::new();
907        let rows = vec![
908            make_row(1, vec![make_cell("A1", 1, CellTypeTag::None, Some("1"))]),
909            make_row(
910                100,
911                vec![make_cell("T100", 20, CellTypeTag::None, Some("2"))],
912            ),
913        ];
914        let ws = make_worksheet(rows);
915        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
916
917        let flags = read_u32_le(&buf, 12);
918        assert_ne!(
919            flags & FLAG_SPARSE,
920            0,
921            "2 cells in 100x20 grid should be sparse"
922        );
923
924        let row_count = read_u32_le(&buf, 6) as usize;
925        assert_eq!(row_count, 100);
926        let col_count = read_u16_le(&buf, 10) as usize;
927        assert_eq!(col_count, 20);
928    }
929
930    #[test]
931    fn test_header_format() {
932        let sst = SharedStringTable::new();
933        let ws = make_worksheet(vec![
934            make_row(
935                2,
936                vec![
937                    make_cell("B2", 2, CellTypeTag::None, Some("10")),
938                    make_cell("D2", 4, CellTypeTag::None, Some("20")),
939                ],
940            ),
941            make_row(5, vec![make_cell("C5", 3, CellTypeTag::None, Some("30"))]),
942        ]);
943        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
944
945        assert_eq!(read_u32_le(&buf, 0), MAGIC);
946        assert_eq!(read_u16_le(&buf, 4), VERSION);
947        let row_count = read_u32_le(&buf, 6);
948        assert_eq!(row_count, 4, "rows 2-5 = 4 rows");
949        let col_count = read_u16_le(&buf, 10);
950        assert_eq!(col_count, 3, "cols B-D = 3 columns");
951    }
952
953    #[test]
954    fn test_string_table_format() {
955        let sst = make_sst(&["Alpha", "Beta", "Gamma"]);
956        let ws = make_worksheet(vec![make_row(
957            1,
958            vec![
959                make_cell("A1", 1, CellTypeTag::SharedString, Some("0")),
960                make_cell("B1", 2, CellTypeTag::SharedString, Some("2")),
961            ],
962        )]);
963        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
964
965        let (st_start, _, _, _) = parse_sections(&buf);
966        let string_count = read_u32_le(&buf, st_start) as usize;
967        assert_eq!(string_count, 3, "all SST strings should be in table");
968
969        let s0 = read_string(&buf, st_start, 0);
970        let s1 = read_string(&buf, st_start, 1);
971        let s2 = read_string(&buf, st_start, 2);
972        assert_eq!(s0, "Alpha");
973        assert_eq!(s1, "Beta");
974        assert_eq!(s2, "Gamma");
975    }
976
977    #[test]
978    fn test_large_sheet_dimensions() {
979        let sst = SharedStringTable::new();
980        let mut rows = Vec::new();
981        for r in [1u32, 500, 1000] {
982            let mut cells = Vec::new();
983            for c in [1u32, 10, 50] {
984                let col_name = crate::utils::cell_ref::column_number_to_name(c).unwrap();
985                let cell_ref = format!("{col_name}{r}");
986                cells.push(make_cell(
987                    &cell_ref,
988                    c,
989                    CellTypeTag::None,
990                    Some(&format!("{}", r + c)),
991                ));
992            }
993            rows.push(make_row(r, cells));
994        }
995        let ws = make_worksheet(rows);
996        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
997
998        let row_count = read_u32_le(&buf, 6);
999        assert_eq!(row_count, 1000, "rows 1-1000 = 1000 rows");
1000        let col_count = read_u16_le(&buf, 10);
1001        assert_eq!(col_count, 50, "cols A-AX = 50 columns");
1002
1003        let flags = read_u32_le(&buf, 12);
1004        assert_ne!(
1005            flags & FLAG_SPARSE,
1006            0,
1007            "9 cells in 1000x50 should be sparse"
1008        );
1009    }
1010
1011    #[test]
1012    fn test_row_index_entries() {
1013        let sst = SharedStringTable::new();
1014        let ws = make_worksheet(vec![
1015            make_row(1, vec![make_cell("A1", 1, CellTypeTag::None, Some("1"))]),
1016            make_row(3, vec![make_cell("A3", 1, CellTypeTag::None, Some("3"))]),
1017        ]);
1018        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1019
1020        let row_count = read_u32_le(&buf, 6) as usize;
1021        assert_eq!(row_count, 3);
1022
1023        let row_index_start = HEADER_SIZE;
1024        let row1_num = read_u32_le(&buf, row_index_start);
1025        let row1_offset = read_u32_le(&buf, row_index_start + 4);
1026        assert_eq!(row1_num, 1);
1027        assert_ne!(row1_offset, EMPTY_ROW_OFFSET);
1028
1029        let row2_num = read_u32_le(&buf, row_index_start + 8);
1030        let row2_offset = read_u32_le(&buf, row_index_start + 12);
1031        assert_eq!(row2_num, 2);
1032        assert_eq!(row2_offset, EMPTY_ROW_OFFSET, "row 2 has no data");
1033
1034        let row3_num = read_u32_le(&buf, row_index_start + 16);
1035        let row3_offset = read_u32_le(&buf, row_index_start + 20);
1036        assert_eq!(row3_num, 3);
1037        assert_ne!(row3_offset, EMPTY_ROW_OFFSET);
1038    }
1039
1040    #[test]
1041    fn test_string_deduplication() {
1042        let sst = SharedStringTable::new();
1043        let ws = make_worksheet(vec![make_row(
1044            1,
1045            vec![
1046                make_cell("A1", 1, CellTypeTag::Error, Some("#N/A")),
1047                make_cell("B1", 2, CellTypeTag::Error, Some("#N/A")),
1048            ],
1049        )]);
1050        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1051
1052        let (st_start, _, _, _) = parse_sections(&buf);
1053        let string_count = read_u32_le(&buf, st_start) as usize;
1054        assert_eq!(
1055            string_count, 1,
1056            "duplicate error strings should be deduplicated"
1057        );
1058        let s = read_string(&buf, st_start, 0);
1059        assert_eq!(s, "#N/A");
1060    }
1061
1062    #[test]
1063    fn test_formula_string_type() {
1064        let sst = SharedStringTable::new();
1065        let ws = make_worksheet(vec![make_row(
1066            1,
1067            vec![make_cell(
1068                "A1",
1069                1,
1070                CellTypeTag::FormulaString,
1071                Some("computed"),
1072            )],
1073        )]);
1074        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1075
1076        let (_, _, cd_start, flags) = parse_sections(&buf);
1077        let is_sparse = flags & FLAG_SPARSE != 0;
1078
1079        if is_sparse {
1080            let cells = read_sparse_row(&buf, cd_start);
1081            assert_eq!(cells[0].1, TYPE_STRING);
1082        } else {
1083            let (tag, _) = read_cell_at(&buf, cd_start, false, 0);
1084            assert_eq!(tag, TYPE_STRING);
1085        }
1086    }
1087
1088    #[test]
1089    fn test_number_with_explicit_type() {
1090        let sst = SharedStringTable::new();
1091        let ws = make_worksheet(vec![make_row(
1092            1,
1093            vec![make_cell("A1", 1, CellTypeTag::Number, Some("99.9"))],
1094        )]);
1095        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1096
1097        let (_, _, cd_start, flags) = parse_sections(&buf);
1098        let is_sparse = flags & FLAG_SPARSE != 0;
1099
1100        if is_sparse {
1101            let cells = read_sparse_row(&buf, cd_start);
1102            assert_eq!(cells[0].1, TYPE_NUMBER);
1103            let val = f64::from_le_bytes(cells[0].2.try_into().unwrap());
1104            assert!((val - 99.9).abs() < f64::EPSILON);
1105        } else {
1106            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
1107            assert_eq!(tag, TYPE_NUMBER);
1108            let val = f64::from_le_bytes(payload.try_into().unwrap());
1109            assert!((val - 99.9).abs() < f64::EPSILON);
1110        }
1111    }
1112
1113    #[test]
1114    fn test_cell_without_col_uses_ref_parsing() {
1115        let sst = SharedStringTable::new();
1116        let mut cell = make_cell("C5", 0, CellTypeTag::None, Some("42"));
1117        cell.col = 0;
1118        let ws = make_worksheet(vec![make_row(5, vec![cell])]);
1119        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1120
1121        let row_count = read_u32_le(&buf, 6);
1122        assert_eq!(row_count, 1);
1123        let col_count = read_u16_le(&buf, 10);
1124        assert_eq!(col_count, 1);
1125    }
1126
1127    #[test]
1128    fn test_rich_string_from_sst() {
1129        use crate::rich_text::RichTextRun;
1130
1131        let mut sst = SharedStringTable::new();
1132        sst.add("plain");
1133        sst.add_rich_text(&[
1134            RichTextRun {
1135                text: "Bold".to_string(),
1136                font: None,
1137                size: None,
1138                bold: true,
1139                italic: false,
1140                color: None,
1141            },
1142            RichTextRun {
1143                text: " Normal".to_string(),
1144                font: None,
1145                size: None,
1146                bold: false,
1147                italic: false,
1148                color: None,
1149            },
1150        ]);
1151
1152        let ws = make_worksheet(vec![make_row(
1153            1,
1154            vec![make_cell("A1", 1, CellTypeTag::SharedString, Some("1"))],
1155        )]);
1156        let buf = sheet_to_raw_buffer(&ws, &sst).unwrap();
1157
1158        let (st_start, _, cd_start, flags) = parse_sections(&buf);
1159        let is_sparse = flags & FLAG_SPARSE != 0;
1160
1161        let (type_tag, str_idx) = if is_sparse {
1162            let cells = read_sparse_row(&buf, cd_start);
1163            let idx = u32::from_le_bytes(cells[0].2[..4].try_into().unwrap()) as usize;
1164            (cells[0].1, idx)
1165        } else {
1166            let (tag, payload) = read_cell_at(&buf, cd_start, false, 0);
1167            let idx = u32::from_le_bytes(payload[..4].try_into().unwrap()) as usize;
1168            (tag, idx)
1169        };
1170
1171        assert_eq!(type_tag, TYPE_RICH_STRING);
1172        let text = read_string(&buf, st_start, str_idx);
1173        assert_eq!(text, "Bold Normal");
1174    }
1175}