Skip to main content

sheetkit_core/
raw_transfer_write.rs

1//! Write-direction buffer serialization and deserialization for raw FFI transfer.
2//!
3//! Provides [`cells_to_raw_buffer`] to encode structured cell data into a
4//! compact binary buffer, and [`raw_buffer_to_cells`] to decode a buffer
5//! back into cell values. These are used for the JS-to-Rust write path
6//! (e.g., `setSheetData`) and for round-trip testing.
7//!
8//! The binary format matches the specification in `raw_transfer.rs` so that
9//! buffers produced by either module can be consumed by the other.
10
11use std::collections::HashMap;
12
13use crate::cell::CellValue;
14use crate::error::{Error, Result};
15use crate::rich_text;
16
17const MAGIC: u32 = 0x534B5244;
18const VERSION: u16 = 1;
19const HEADER_SIZE: usize = 16;
20const ROW_INDEX_ENTRY_SIZE: usize = 8;
21const CELL_STRIDE: usize = 9;
22const SPARSE_ENTRY_SIZE: usize = 11;
23const EMPTY_ROW_SENTINEL: u32 = 0xFFFF_FFFF;
24const FLAG_SPARSE: u32 = 1;
25const DENSITY_THRESHOLD: f64 = 0.3;
26
27const TYPE_EMPTY: u8 = 0x00;
28const TYPE_NUMBER: u8 = 0x01;
29const TYPE_STRING: u8 = 0x02;
30const TYPE_BOOL: u8 = 0x03;
31const TYPE_DATE: u8 = 0x04;
32const TYPE_ERROR: u8 = 0x05;
33const TYPE_FORMULA: u8 = 0x06;
34const TYPE_RICH_STRING: u8 = 0x07;
35
36/// A row of cell data: (1-based row number, cells in that row).
37type CellRow = (u32, Vec<(u32, CellValue)>);
38
39/// Intermediate encoded cell: (0-based column index, type tag, 8-byte payload).
40type EncodedCell = (u16, u8, [u8; 8]);
41
42/// Intermediate encoded row: (1-based row number, encoded cells).
43type EncodedRow = (u32, Vec<EncodedCell>);
44
45struct BufferHeader {
46    _version: u16,
47    row_count: u32,
48    col_count: u16,
49    flags: u32,
50}
51
52fn read_header(buf: &[u8]) -> Result<BufferHeader> {
53    if buf.len() < HEADER_SIZE {
54        return Err(Error::Internal(format!(
55            "buffer too short for header: {} bytes (need {})",
56            buf.len(),
57            HEADER_SIZE
58        )));
59    }
60    let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap());
61    if magic != MAGIC {
62        return Err(Error::Internal(format!(
63            "invalid buffer magic: expected 0x{MAGIC:08X}, got 0x{magic:08X}"
64        )));
65    }
66    let version = u16::from_le_bytes(buf[4..6].try_into().unwrap());
67    let row_count = u32::from_le_bytes(buf[6..10].try_into().unwrap());
68    let col_count = u16::from_le_bytes(buf[10..12].try_into().unwrap());
69    let flags = u32::from_le_bytes(buf[12..16].try_into().unwrap());
70    Ok(BufferHeader {
71        _version: version,
72        row_count,
73        col_count,
74        flags,
75    })
76}
77
78fn read_row_index(buf: &[u8], row_count: u32) -> Result<Vec<(u32, u32)>> {
79    let start = HEADER_SIZE;
80    let end = start + row_count as usize * ROW_INDEX_ENTRY_SIZE;
81    if buf.len() < end {
82        return Err(Error::Internal(format!(
83            "buffer too short for row index: {} bytes (need {})",
84            buf.len(),
85            end
86        )));
87    }
88    let mut entries = Vec::with_capacity(row_count as usize);
89    for i in 0..row_count as usize {
90        let offset = start + i * ROW_INDEX_ENTRY_SIZE;
91        let row_num = u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap());
92        let row_off = u32::from_le_bytes(buf[offset + 4..offset + 8].try_into().unwrap());
93        entries.push((row_num, row_off));
94    }
95    Ok(entries)
96}
97
98/// Read the string table. Returns (strings, byte position after string table).
99fn read_string_table(buf: &[u8], offset: usize) -> Result<(Vec<String>, usize)> {
100    if buf.len() < offset + 8 {
101        return Err(Error::Internal(
102            "buffer too short for string table header".to_string(),
103        ));
104    }
105    let count = u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap()) as usize;
106    let blob_size = u32::from_le_bytes(buf[offset + 4..offset + 8].try_into().unwrap()) as usize;
107
108    let offsets_start = offset + 8;
109    let offsets_end = offsets_start + count * 4;
110    let blob_start = offsets_end;
111    let blob_end = blob_start + blob_size;
112
113    if buf.len() < blob_end {
114        return Err(Error::Internal(format!(
115            "buffer too short for string table: {} bytes (need {})",
116            buf.len(),
117            blob_end
118        )));
119    }
120
121    let mut string_offsets = Vec::with_capacity(count);
122    for i in 0..count {
123        let pos = offsets_start + i * 4;
124        let off = u32::from_le_bytes(buf[pos..pos + 4].try_into().unwrap()) as usize;
125        string_offsets.push(off);
126    }
127
128    let mut strings = Vec::with_capacity(count);
129    for i in 0..count {
130        let start = blob_start + string_offsets[i];
131        let end = if i + 1 < count {
132            blob_start + string_offsets[i + 1]
133        } else {
134            blob_end
135        };
136        let s = std::str::from_utf8(&buf[start..end])
137            .map_err(|e| Error::Internal(format!("invalid UTF-8 in string table: {e}")))?;
138        strings.push(s.to_string());
139    }
140
141    Ok((strings, blob_end))
142}
143
144fn decode_cell_payload(type_tag: u8, payload: &[u8], strings: &[String]) -> Result<CellValue> {
145    match type_tag {
146        TYPE_EMPTY => Ok(CellValue::Empty),
147        TYPE_NUMBER => {
148            let n = f64::from_le_bytes(payload[0..8].try_into().unwrap());
149            Ok(CellValue::Number(n))
150        }
151        TYPE_STRING => {
152            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
153            let s = strings
154                .get(idx)
155                .cloned()
156                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
157            Ok(CellValue::String(s))
158        }
159        TYPE_BOOL => Ok(CellValue::Bool(payload[0] != 0)),
160        TYPE_DATE => {
161            let n = f64::from_le_bytes(payload[0..8].try_into().unwrap());
162            Ok(CellValue::Date(n))
163        }
164        TYPE_ERROR => {
165            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
166            let s = strings
167                .get(idx)
168                .cloned()
169                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
170            Ok(CellValue::Error(s))
171        }
172        TYPE_FORMULA => {
173            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
174            let expr = strings
175                .get(idx)
176                .cloned()
177                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
178            Ok(CellValue::Formula { expr, result: None })
179        }
180        TYPE_RICH_STRING => {
181            let idx = u32::from_le_bytes(payload[0..4].try_into().unwrap()) as usize;
182            let s = strings
183                .get(idx)
184                .cloned()
185                .ok_or_else(|| Error::Internal(format!("string index {idx} out of range")))?;
186            Ok(CellValue::String(s))
187        }
188        _ => Ok(CellValue::Empty),
189    }
190}
191
192fn read_dense_cells(
193    buf: &[u8],
194    cell_data_start: usize,
195    row_index: &[(u32, u32)],
196    col_count: u16,
197    strings: &[String],
198) -> Result<Vec<CellRow>> {
199    let mut result = Vec::new();
200    for &(row_num, offset) in row_index {
201        if offset == EMPTY_ROW_SENTINEL {
202            continue;
203        }
204        let row_start = cell_data_start + offset as usize;
205        let row_end = row_start + col_count as usize * CELL_STRIDE;
206        if buf.len() < row_end {
207            return Err(Error::Internal(format!(
208                "buffer too short for dense row data at offset {}",
209                row_start
210            )));
211        }
212        let mut cells = Vec::new();
213        for c in 0..col_count as usize {
214            let cell_offset = row_start + c * CELL_STRIDE;
215            let type_tag = buf[cell_offset];
216            if type_tag == TYPE_EMPTY {
217                continue;
218            }
219            let payload = &buf[cell_offset + 1..cell_offset + 9];
220            let value = decode_cell_payload(type_tag, payload, strings)?;
221            cells.push((c as u32 + 1, value));
222        }
223        if !cells.is_empty() {
224            result.push((row_num, cells));
225        }
226    }
227    Ok(result)
228}
229
230fn read_sparse_cells(
231    buf: &[u8],
232    cell_data_start: usize,
233    row_index: &[(u32, u32)],
234    strings: &[String],
235) -> Result<Vec<CellRow>> {
236    let mut result = Vec::new();
237    for &(row_num, offset) in row_index {
238        if offset == EMPTY_ROW_SENTINEL {
239            continue;
240        }
241        let pos = cell_data_start + offset as usize;
242        if buf.len() < pos + 2 {
243            return Err(Error::Internal(
244                "buffer too short for sparse row cell count".to_string(),
245            ));
246        }
247        let cell_count = u16::from_le_bytes(buf[pos..pos + 2].try_into().unwrap()) as usize;
248        let entries_start = pos + 2;
249        let entries_end = entries_start + cell_count * SPARSE_ENTRY_SIZE;
250        if buf.len() < entries_end {
251            return Err(Error::Internal(format!(
252                "buffer too short for sparse row entries at offset {}",
253                entries_start
254            )));
255        }
256        let mut cells = Vec::with_capacity(cell_count);
257        for i in 0..cell_count {
258            let entry_off = entries_start + i * SPARSE_ENTRY_SIZE;
259            let col = u16::from_le_bytes(buf[entry_off..entry_off + 2].try_into().unwrap());
260            let type_tag = buf[entry_off + 2];
261            let payload = &buf[entry_off + 3..entry_off + 11];
262            let value = decode_cell_payload(type_tag, payload, strings)?;
263            cells.push((col as u32 + 1, value));
264        }
265        if !cells.is_empty() {
266            result.push((row_num, cells));
267        }
268    }
269    Ok(result)
270}
271
272/// Decode a raw buffer into cell values for applying to a worksheet.
273///
274/// Returns rows as `(row_number, cells)` where each cell is
275/// `(col_number, CellValue)`. Both row and column numbers are 1-based.
276pub fn raw_buffer_to_cells(buf: &[u8]) -> Result<Vec<CellRow>> {
277    let header = read_header(buf)?;
278    if header.row_count == 0 {
279        return Ok(Vec::new());
280    }
281
282    let row_index = read_row_index(buf, header.row_count)?;
283    let string_table_offset = HEADER_SIZE + header.row_count as usize * ROW_INDEX_ENTRY_SIZE;
284    let (strings, cell_data_start) = read_string_table(buf, string_table_offset)?;
285
286    let is_sparse = header.flags & FLAG_SPARSE != 0;
287    if is_sparse {
288        read_sparse_cells(buf, cell_data_start, &row_index, &strings)
289    } else {
290        read_dense_cells(buf, cell_data_start, &row_index, header.col_count, &strings)
291    }
292}
293
294struct StringTable {
295    strings: Vec<String>,
296    index_map: HashMap<String, u32>,
297}
298
299impl StringTable {
300    fn new() -> Self {
301        Self {
302            strings: Vec::new(),
303            index_map: HashMap::new(),
304        }
305    }
306
307    fn intern(&mut self, s: &str) -> u32 {
308        if let Some(&idx) = self.index_map.get(s) {
309            return idx;
310        }
311        let idx = self.strings.len() as u32;
312        self.strings.push(s.to_string());
313        self.index_map.insert(s.to_string(), idx);
314        idx
315    }
316}
317
318fn cell_type_tag(value: &CellValue) -> u8 {
319    match value {
320        CellValue::Empty => TYPE_EMPTY,
321        CellValue::Number(_) => TYPE_NUMBER,
322        CellValue::String(_) => TYPE_STRING,
323        CellValue::Bool(_) => TYPE_BOOL,
324        CellValue::Date(_) => TYPE_DATE,
325        CellValue::Error(_) => TYPE_ERROR,
326        CellValue::Formula { .. } => TYPE_FORMULA,
327        CellValue::RichString(_) => TYPE_RICH_STRING,
328    }
329}
330
331fn encode_cell_payload(value: &CellValue, st: &mut StringTable) -> [u8; 8] {
332    let mut payload = [0u8; 8];
333    match value {
334        CellValue::Empty => {}
335        CellValue::Number(n) => {
336            payload[0..8].copy_from_slice(&n.to_le_bytes());
337        }
338        CellValue::String(s) => {
339            let idx = st.intern(s);
340            payload[0..4].copy_from_slice(&idx.to_le_bytes());
341        }
342        CellValue::Bool(b) => {
343            payload[0] = u8::from(*b);
344        }
345        CellValue::Date(n) => {
346            payload[0..8].copy_from_slice(&n.to_le_bytes());
347        }
348        CellValue::Error(s) => {
349            let idx = st.intern(s);
350            payload[0..4].copy_from_slice(&idx.to_le_bytes());
351        }
352        CellValue::Formula { expr, .. } => {
353            let idx = st.intern(expr);
354            payload[0..4].copy_from_slice(&idx.to_le_bytes());
355        }
356        CellValue::RichString(runs) => {
357            let plain = rich_text::rich_text_to_plain(runs);
358            let idx = st.intern(&plain);
359            payload[0..4].copy_from_slice(&idx.to_le_bytes());
360        }
361    }
362    payload
363}
364
365/// Encode cell values into a raw buffer for transfer.
366///
367/// Takes rows as `(row_number, cells)` where each cell is
368/// `(col_number, CellValue)`. Both row and column numbers are 1-based.
369/// Returns the encoded binary buffer.
370pub fn cells_to_raw_buffer(rows: &[(u32, Vec<(u32, CellValue)>)]) -> Result<Vec<u8>> {
371    if rows.is_empty() {
372        return write_empty_buffer();
373    }
374
375    let mut max_col: u32 = 0;
376    let mut total_cells: usize = 0;
377    for (_, cells) in rows {
378        for &(col, _) in cells {
379            if col > max_col {
380                max_col = col;
381            }
382        }
383        total_cells += cells.len();
384    }
385
386    let row_count = rows.len() as u32;
387    let col_count = max_col as u16;
388
389    let grid_size = row_count as usize * col_count as usize;
390    let density = if grid_size > 0 {
391        total_cells as f64 / grid_size as f64
392    } else {
393        0.0
394    };
395    let is_sparse = density < DENSITY_THRESHOLD;
396
397    let mut st = StringTable::new();
398    let mut row_payloads: Vec<EncodedRow> = Vec::with_capacity(rows.len());
399    for &(row_num, ref cells) in rows {
400        let mut encoded_cells = Vec::with_capacity(cells.len());
401        for &(col, ref value) in cells {
402            let tag = cell_type_tag(value);
403            let payload = encode_cell_payload(value, &mut st);
404            encoded_cells.push((col as u16 - 1, tag, payload));
405        }
406        row_payloads.push((row_num, encoded_cells));
407    }
408
409    let row_index_size = row_count as usize * ROW_INDEX_ENTRY_SIZE;
410    let string_table_size = compute_string_table_size(&st);
411    let cell_data_size = if is_sparse {
412        compute_sparse_size(&row_payloads)
413    } else {
414        compute_dense_size(row_count, col_count)
415    };
416
417    let total_size = HEADER_SIZE + row_index_size + string_table_size + cell_data_size;
418    let mut buf = vec![0u8; total_size];
419
420    write_header(
421        &mut buf,
422        row_count,
423        col_count,
424        if is_sparse { FLAG_SPARSE } else { 0 },
425    );
426
427    let cell_data_start = HEADER_SIZE + row_index_size + string_table_size;
428
429    if is_sparse {
430        write_sparse_data(&mut buf, &row_payloads, cell_data_start);
431    } else {
432        write_dense_data(&mut buf, &row_payloads, col_count, cell_data_start);
433    }
434
435    write_row_index(&mut buf, &row_payloads, is_sparse, col_count);
436    write_string_table(&mut buf, HEADER_SIZE + row_index_size, &st);
437
438    Ok(buf)
439}
440
441fn write_empty_buffer() -> Result<Vec<u8>> {
442    let st_size = 8; // count(4) + blob_size(4), both zero
443    let total = HEADER_SIZE + st_size;
444    let mut buf = vec![0u8; total];
445    write_header(&mut buf, 0, 0, 0);
446    // String table: count=0, blob_size=0
447    buf[HEADER_SIZE..HEADER_SIZE + 4].copy_from_slice(&0u32.to_le_bytes());
448    buf[HEADER_SIZE + 4..HEADER_SIZE + 8].copy_from_slice(&0u32.to_le_bytes());
449    Ok(buf)
450}
451
452fn write_header(buf: &mut [u8], row_count: u32, col_count: u16, flags: u32) {
453    buf[0..4].copy_from_slice(&MAGIC.to_le_bytes());
454    buf[4..6].copy_from_slice(&VERSION.to_le_bytes());
455    buf[6..10].copy_from_slice(&row_count.to_le_bytes());
456    buf[10..12].copy_from_slice(&col_count.to_le_bytes());
457    buf[12..16].copy_from_slice(&flags.to_le_bytes());
458}
459
460fn compute_string_table_size(st: &StringTable) -> usize {
461    let blob_size: usize = st.strings.iter().map(|s| s.len()).sum();
462    8 + st.strings.len() * 4 + blob_size // count(4) + blob_size(4) + offsets + blob
463}
464
465fn write_string_table(buf: &mut [u8], offset: usize, st: &StringTable) {
466    let count = st.strings.len() as u32;
467    let blob_size: usize = st.strings.iter().map(|s| s.len()).sum();
468
469    buf[offset..offset + 4].copy_from_slice(&count.to_le_bytes());
470    buf[offset + 4..offset + 8].copy_from_slice(&(blob_size as u32).to_le_bytes());
471
472    let offsets_start = offset + 8;
473    let blob_start = offsets_start + st.strings.len() * 4;
474
475    let mut blob_offset: u32 = 0;
476    for (i, s) in st.strings.iter().enumerate() {
477        let pos = offsets_start + i * 4;
478        buf[pos..pos + 4].copy_from_slice(&blob_offset.to_le_bytes());
479        let dst = blob_start + blob_offset as usize;
480        buf[dst..dst + s.len()].copy_from_slice(s.as_bytes());
481        blob_offset += s.len() as u32;
482    }
483}
484
485fn compute_dense_size(row_count: u32, col_count: u16) -> usize {
486    row_count as usize * col_count as usize * CELL_STRIDE
487}
488
489fn compute_sparse_size(row_payloads: &[EncodedRow]) -> usize {
490    let mut size = 0;
491    for (_, cells) in row_payloads {
492        size += 2 + cells.len() * SPARSE_ENTRY_SIZE; // cell_count(u16) + entries
493    }
494    size
495}
496
497fn write_row_index(buf: &mut [u8], row_payloads: &[EncodedRow], is_sparse: bool, col_count: u16) {
498    let base = HEADER_SIZE;
499    if is_sparse {
500        let mut data_offset: u32 = 0;
501        for (i, (row_num, cells)) in row_payloads.iter().enumerate() {
502            let pos = base + i * ROW_INDEX_ENTRY_SIZE;
503            buf[pos..pos + 4].copy_from_slice(&row_num.to_le_bytes());
504            if cells.is_empty() {
505                buf[pos + 4..pos + 8].copy_from_slice(&EMPTY_ROW_SENTINEL.to_le_bytes());
506            } else {
507                buf[pos + 4..pos + 8].copy_from_slice(&data_offset.to_le_bytes());
508            }
509            let row_size = 2 + cells.len() * SPARSE_ENTRY_SIZE;
510            data_offset += row_size as u32;
511        }
512    } else {
513        for (i, (row_num, _)) in row_payloads.iter().enumerate() {
514            let pos = base + i * ROW_INDEX_ENTRY_SIZE;
515            buf[pos..pos + 4].copy_from_slice(&row_num.to_le_bytes());
516            let offset = i as u32 * col_count as u32 * CELL_STRIDE as u32;
517            buf[pos + 4..pos + 8].copy_from_slice(&offset.to_le_bytes());
518        }
519    }
520}
521
522fn write_dense_data(
523    buf: &mut [u8],
524    row_payloads: &[EncodedRow],
525    col_count: u16,
526    cell_data_start: usize,
527) {
528    for (i, (_, cells)) in row_payloads.iter().enumerate() {
529        let row_start = cell_data_start + i * col_count as usize * CELL_STRIDE;
530        for &(col_idx, tag, ref payload) in cells {
531            let cell_off = row_start + col_idx as usize * CELL_STRIDE;
532            buf[cell_off] = tag;
533            buf[cell_off + 1..cell_off + 9].copy_from_slice(payload);
534        }
535    }
536}
537
538fn write_sparse_data(buf: &mut [u8], row_payloads: &[EncodedRow], cell_data_start: usize) {
539    let mut offset = cell_data_start;
540    for (_, cells) in row_payloads {
541        let cell_count = cells.len() as u16;
542        buf[offset..offset + 2].copy_from_slice(&cell_count.to_le_bytes());
543        offset += 2;
544        for &(col_idx, tag, ref payload) in cells {
545            buf[offset..offset + 2].copy_from_slice(&col_idx.to_le_bytes());
546            buf[offset + 2] = tag;
547            buf[offset + 3..offset + 11].copy_from_slice(payload);
548            offset += SPARSE_ENTRY_SIZE;
549        }
550    }
551}
552
553#[cfg(test)]
554mod tests {
555    use super::*;
556    use crate::rich_text::RichTextRun;
557
558    #[test]
559    fn test_decode_empty_buffer() {
560        let buf = cells_to_raw_buffer(&[]).unwrap();
561        let result = raw_buffer_to_cells(&buf).unwrap();
562        assert!(result.is_empty());
563    }
564
565    #[test]
566    fn test_decode_single_number() {
567        let rows = vec![(1, vec![(1, CellValue::Number(42.5))])];
568        let buf = cells_to_raw_buffer(&rows).unwrap();
569        let result = raw_buffer_to_cells(&buf).unwrap();
570        assert_eq!(result.len(), 1);
571        assert_eq!(result[0].0, 1);
572        assert_eq!(result[0].1.len(), 1);
573        assert_eq!(result[0].1[0].0, 1);
574        assert_eq!(result[0].1[0].1, CellValue::Number(42.5));
575    }
576
577    #[test]
578    fn test_decode_string_with_table() {
579        let rows = vec![(1, vec![(1, CellValue::String("hello world".to_string()))])];
580        let buf = cells_to_raw_buffer(&rows).unwrap();
581        let result = raw_buffer_to_cells(&buf).unwrap();
582        assert_eq!(result.len(), 1);
583        assert_eq!(
584            result[0].1[0].1,
585            CellValue::String("hello world".to_string())
586        );
587    }
588
589    #[test]
590    fn test_decode_bool_true_false() {
591        let rows = vec![(
592            1,
593            vec![(1, CellValue::Bool(true)), (2, CellValue::Bool(false))],
594        )];
595        let buf = cells_to_raw_buffer(&rows).unwrap();
596        let result = raw_buffer_to_cells(&buf).unwrap();
597        assert_eq!(result[0].1[0].1, CellValue::Bool(true));
598        assert_eq!(result[0].1[1].1, CellValue::Bool(false));
599    }
600
601    #[test]
602    fn test_decode_error() {
603        let rows = vec![(1, vec![(1, CellValue::Error("#DIV/0!".to_string()))])];
604        let buf = cells_to_raw_buffer(&rows).unwrap();
605        let result = raw_buffer_to_cells(&buf).unwrap();
606        assert_eq!(result[0].1[0].1, CellValue::Error("#DIV/0!".to_string()));
607    }
608
609    #[test]
610    fn test_decode_formula() {
611        let rows = vec![(
612            1,
613            vec![(
614                1,
615                CellValue::Formula {
616                    expr: "SUM(A1:A10)".to_string(),
617                    result: None,
618                },
619            )],
620        )];
621        let buf = cells_to_raw_buffer(&rows).unwrap();
622        let result = raw_buffer_to_cells(&buf).unwrap();
623        assert_eq!(
624            result[0].1[0].1,
625            CellValue::Formula {
626                expr: "SUM(A1:A10)".to_string(),
627                result: None,
628            }
629        );
630    }
631
632    #[test]
633    fn test_decode_date() {
634        let serial = 44927.0; // 2023-01-01
635        let rows = vec![(1, vec![(1, CellValue::Date(serial))])];
636        let buf = cells_to_raw_buffer(&rows).unwrap();
637        let result = raw_buffer_to_cells(&buf).unwrap();
638        assert_eq!(result[0].1[0].1, CellValue::Date(serial));
639    }
640
641    #[test]
642    fn test_decode_mixed_row() {
643        let rows = vec![(
644            3,
645            vec![
646                (1, CellValue::Number(1.0)),
647                (2, CellValue::String("text".to_string())),
648                (3, CellValue::Bool(true)),
649                (4, CellValue::Date(44927.0)),
650                (5, CellValue::Error("#N/A".to_string())),
651                (
652                    6,
653                    CellValue::Formula {
654                        expr: "A3+B3".to_string(),
655                        result: None,
656                    },
657                ),
658            ],
659        )];
660        let buf = cells_to_raw_buffer(&rows).unwrap();
661        let result = raw_buffer_to_cells(&buf).unwrap();
662
663        assert_eq!(result.len(), 1);
664        assert_eq!(result[0].0, 3);
665        let cells = &result[0].1;
666        assert_eq!(cells.len(), 6);
667        assert_eq!(cells[0], (1, CellValue::Number(1.0)));
668        assert_eq!(cells[1], (2, CellValue::String("text".to_string())));
669        assert_eq!(cells[2], (3, CellValue::Bool(true)));
670        assert_eq!(cells[3], (4, CellValue::Date(44927.0)));
671        assert_eq!(cells[4], (5, CellValue::Error("#N/A".to_string())));
672        assert_eq!(
673            cells[5],
674            (
675                6,
676                CellValue::Formula {
677                    expr: "A3+B3".to_string(),
678                    result: None,
679                }
680            )
681        );
682    }
683
684    #[test]
685    fn test_round_trip_cells_to_buffer() {
686        let rows = vec![
687            (
688                1,
689                vec![
690                    (1, CellValue::String("Name".to_string())),
691                    (2, CellValue::String("Age".to_string())),
692                    (3, CellValue::String("Active".to_string())),
693                ],
694            ),
695            (
696                2,
697                vec![
698                    (1, CellValue::String("Alice".to_string())),
699                    (2, CellValue::Number(30.0)),
700                    (3, CellValue::Bool(true)),
701                ],
702            ),
703            (
704                3,
705                vec![
706                    (1, CellValue::String("Bob".to_string())),
707                    (2, CellValue::Number(25.0)),
708                    (3, CellValue::Bool(false)),
709                ],
710            ),
711        ];
712        let buf = cells_to_raw_buffer(&rows).unwrap();
713        let result = raw_buffer_to_cells(&buf).unwrap();
714        assert_eq!(result, rows);
715    }
716
717    #[test]
718    fn test_invalid_magic() {
719        let mut buf = vec![0u8; 24];
720        buf[0..4].copy_from_slice(&0xDEADBEEFu32.to_le_bytes());
721        let err = raw_buffer_to_cells(&buf).unwrap_err();
722        assert!(err.to_string().contains("invalid buffer magic"));
723    }
724
725    #[test]
726    fn test_buffer_too_short() {
727        let buf = vec![0u8; 4];
728        let err = raw_buffer_to_cells(&buf).unwrap_err();
729        assert!(err.to_string().contains("buffer too short"));
730    }
731
732    #[test]
733    fn test_rich_string_degrades_to_string() {
734        let runs = vec![
735            RichTextRun {
736                text: "bold ".to_string(),
737                font: None,
738                size: None,
739                bold: true,
740                italic: false,
741                color: None,
742            },
743            RichTextRun {
744                text: "text".to_string(),
745                font: None,
746                size: None,
747                bold: false,
748                italic: false,
749                color: None,
750            },
751        ];
752        let rows = vec![(1, vec![(1, CellValue::RichString(runs))])];
753        let buf = cells_to_raw_buffer(&rows).unwrap();
754        let result = raw_buffer_to_cells(&buf).unwrap();
755        assert_eq!(result[0].1[0].1, CellValue::String("bold text".to_string()));
756    }
757
758    #[test]
759    fn test_multiple_rows_and_columns() {
760        let rows = vec![
761            (
762                1,
763                vec![(1, CellValue::Number(1.0)), (5, CellValue::Number(5.0))],
764            ),
765            (10, vec![(3, CellValue::String("mid".to_string()))]),
766            (
767                100,
768                vec![(1, CellValue::Bool(true)), (5, CellValue::Date(45000.0))],
769            ),
770        ];
771        let buf = cells_to_raw_buffer(&rows).unwrap();
772        let result = raw_buffer_to_cells(&buf).unwrap();
773        assert_eq!(result.len(), 3);
774        assert_eq!(result[0].0, 1);
775        assert_eq!(result[1].0, 10);
776        assert_eq!(result[2].0, 100);
777        assert_eq!(result[0].1.len(), 2);
778        assert_eq!(result[0].1[0], (1, CellValue::Number(1.0)));
779        assert_eq!(result[0].1[1], (5, CellValue::Number(5.0)));
780        assert_eq!(result[1].1[0], (3, CellValue::String("mid".to_string())));
781        assert_eq!(result[2].1[0], (1, CellValue::Bool(true)));
782        assert_eq!(result[2].1[1], (5, CellValue::Date(45000.0)));
783    }
784
785    #[test]
786    fn test_sparse_format_selected_for_sparse_data() {
787        // 10 rows with 1 cell each, but col ranges up to 100 -> density = 10/(10*100) = 1%
788        let mut rows = Vec::new();
789        for i in 1..=10 {
790            rows.push((i, vec![(100, CellValue::Number(i as f64))]));
791        }
792        let buf = cells_to_raw_buffer(&rows).unwrap();
793        let header = read_header(&buf).unwrap();
794        assert_ne!(header.flags & FLAG_SPARSE, 0, "sparse flag should be set");
795
796        let result = raw_buffer_to_cells(&buf).unwrap();
797        assert_eq!(result.len(), 10);
798        for (i, (row_num, cells)) in result.iter().enumerate() {
799            assert_eq!(*row_num, i as u32 + 1);
800            assert_eq!(cells[0], (100, CellValue::Number((i + 1) as f64)));
801        }
802    }
803
804    #[test]
805    fn test_dense_format_selected_for_dense_data() {
806        let mut rows = Vec::new();
807        for r in 1..=5 {
808            let cells: Vec<(u32, CellValue)> = (1..=5)
809                .map(|c| (c, CellValue::Number((r * 10 + c) as f64)))
810                .collect();
811            rows.push((r, cells));
812        }
813        let buf = cells_to_raw_buffer(&rows).unwrap();
814        let header = read_header(&buf).unwrap();
815        assert_eq!(
816            header.flags & FLAG_SPARSE,
817            0,
818            "sparse flag should not be set"
819        );
820
821        let result = raw_buffer_to_cells(&buf).unwrap();
822        assert_eq!(result.len(), 5);
823        for r in 0..5 {
824            assert_eq!(result[r].0, (r + 1) as u32);
825            assert_eq!(result[r].1.len(), 5);
826            for c in 0..5 {
827                let expected = ((r + 1) * 10 + (c + 1)) as f64;
828                assert_eq!(
829                    result[r].1[c],
830                    ((c + 1) as u32, CellValue::Number(expected))
831                );
832            }
833        }
834    }
835
836    #[test]
837    fn test_string_deduplication() {
838        let rows = vec![(
839            1,
840            vec![
841                (1, CellValue::String("repeated".to_string())),
842                (2, CellValue::String("repeated".to_string())),
843                (3, CellValue::String("unique".to_string())),
844            ],
845        )];
846        let buf = cells_to_raw_buffer(&rows).unwrap();
847        let header = read_header(&buf).unwrap();
848        let st_offset = HEADER_SIZE + header.row_count as usize * ROW_INDEX_ENTRY_SIZE;
849        let count = u32::from_le_bytes(buf[st_offset..st_offset + 4].try_into().unwrap());
850        assert_eq!(count, 2, "string table should have 2 unique strings, not 3");
851
852        let result = raw_buffer_to_cells(&buf).unwrap();
853        assert_eq!(result[0].1[0].1, CellValue::String("repeated".to_string()));
854        assert_eq!(result[0].1[1].1, CellValue::String("repeated".to_string()));
855        assert_eq!(result[0].1[2].1, CellValue::String("unique".to_string()));
856    }
857
858    #[test]
859    fn test_header_fields() {
860        let rows = vec![
861            (
862                1,
863                vec![(1, CellValue::Number(1.0)), (3, CellValue::Number(3.0))],
864            ),
865            (2, vec![(2, CellValue::Number(2.0))]),
866        ];
867        let buf = cells_to_raw_buffer(&rows).unwrap();
868        let header = read_header(&buf).unwrap();
869        assert_eq!(header._version, VERSION);
870        assert_eq!(header.row_count, 2);
871        assert_eq!(header.col_count, 3);
872    }
873
874    #[test]
875    fn test_formula_result_not_preserved() {
876        let rows = vec![(
877            1,
878            vec![(
879                1,
880                CellValue::Formula {
881                    expr: "1+1".to_string(),
882                    result: Some(Box::new(CellValue::Number(2.0))),
883                },
884            )],
885        )];
886        let buf = cells_to_raw_buffer(&rows).unwrap();
887        let result = raw_buffer_to_cells(&buf).unwrap();
888        assert_eq!(
889            result[0].1[0].1,
890            CellValue::Formula {
891                expr: "1+1".to_string(),
892                result: None,
893            }
894        );
895    }
896
897    #[test]
898    fn test_hand_constructed_dense_buffer() {
899        // Manually construct a buffer with 1 row, 2 cols, dense, 1 number + 1 bool
900        let row_count: u32 = 1;
901        let col_count: u16 = 2;
902
903        let st_size = 8; // count(4) + blob_size(4) + 0 offsets + 0 blob
904        let cell_data_size = 2 * CELL_STRIDE; // 2 cols * 9 bytes
905        let total = HEADER_SIZE + ROW_INDEX_ENTRY_SIZE + st_size + cell_data_size;
906
907        let mut buf = vec![0u8; total];
908        // Header
909        buf[0..4].copy_from_slice(&MAGIC.to_le_bytes());
910        buf[4..6].copy_from_slice(&1u16.to_le_bytes()); // version
911        buf[6..10].copy_from_slice(&row_count.to_le_bytes());
912        buf[10..12].copy_from_slice(&col_count.to_le_bytes());
913        buf[12..16].copy_from_slice(&0u32.to_le_bytes()); // flags (dense)
914
915        // Row index: row 1 at offset 0
916        let ri_start = HEADER_SIZE;
917        buf[ri_start..ri_start + 4].copy_from_slice(&1u32.to_le_bytes());
918        buf[ri_start + 4..ri_start + 8].copy_from_slice(&0u32.to_le_bytes());
919
920        // String table: count=0, blob_size=0
921        let st_start = ri_start + ROW_INDEX_ENTRY_SIZE;
922        buf[st_start..st_start + 4].copy_from_slice(&0u32.to_le_bytes());
923        buf[st_start + 4..st_start + 8].copy_from_slice(&0u32.to_le_bytes());
924
925        // Cell data
926        let cd_start = st_start + st_size;
927        // Col 0: Number 99.0
928        buf[cd_start] = TYPE_NUMBER;
929        buf[cd_start + 1..cd_start + 9].copy_from_slice(&99.0f64.to_le_bytes());
930        // Col 1: Bool true
931        buf[cd_start + CELL_STRIDE] = TYPE_BOOL;
932        buf[cd_start + CELL_STRIDE + 1] = 1;
933
934        let result = raw_buffer_to_cells(&buf).unwrap();
935        assert_eq!(result.len(), 1);
936        assert_eq!(result[0].0, 1);
937        assert_eq!(result[0].1[0], (1, CellValue::Number(99.0)));
938        assert_eq!(result[0].1[1], (2, CellValue::Bool(true)));
939    }
940
941    #[test]
942    fn test_hand_constructed_sparse_buffer() {
943        // Manually construct a sparse buffer: 1 row, col_count=100, 1 cell at col 50
944        let row_count: u32 = 1;
945        let col_count: u16 = 100;
946
947        let st_size = 8; // count=0, blob_size=0
948        let cell_data_size = 2 + SPARSE_ENTRY_SIZE; // cell_count(2) + 1 entry(11)
949        let total = HEADER_SIZE + ROW_INDEX_ENTRY_SIZE + st_size + cell_data_size;
950
951        let mut buf = vec![0u8; total];
952        // Header
953        buf[0..4].copy_from_slice(&MAGIC.to_le_bytes());
954        buf[4..6].copy_from_slice(&1u16.to_le_bytes());
955        buf[6..10].copy_from_slice(&row_count.to_le_bytes());
956        buf[10..12].copy_from_slice(&col_count.to_le_bytes());
957        buf[12..16].copy_from_slice(&FLAG_SPARSE.to_le_bytes());
958
959        // Row index
960        let ri_start = HEADER_SIZE;
961        buf[ri_start..ri_start + 4].copy_from_slice(&5u32.to_le_bytes()); // row 5
962        buf[ri_start + 4..ri_start + 8].copy_from_slice(&0u32.to_le_bytes()); // offset 0
963
964        // String table
965        let st_start = ri_start + ROW_INDEX_ENTRY_SIZE;
966        buf[st_start..st_start + 4].copy_from_slice(&0u32.to_le_bytes());
967        buf[st_start + 4..st_start + 8].copy_from_slice(&0u32.to_le_bytes());
968
969        // Sparse cell data
970        let cd_start = st_start + st_size;
971        buf[cd_start..cd_start + 2].copy_from_slice(&1u16.to_le_bytes()); // 1 cell
972        let entry = cd_start + 2;
973        buf[entry..entry + 2].copy_from_slice(&49u16.to_le_bytes()); // col index 49 (0-based)
974        buf[entry + 2] = TYPE_NUMBER;
975        buf[entry + 3..entry + 11].copy_from_slice(&7.77f64.to_le_bytes());
976
977        let result = raw_buffer_to_cells(&buf).unwrap();
978        assert_eq!(result.len(), 1);
979        assert_eq!(result[0].0, 5);
980        assert_eq!(result[0].1[0], (50, CellValue::Number(7.77))); // 1-based col 50
981    }
982}