1use std::io::BufRead;
13
14use quick_xml::events::Event;
15use quick_xml::name::QName;
16
17use crate::cell::CellValue;
18use crate::error::{Error, Result};
19use crate::sst::SharedStringTable;
20use crate::utils::cell_ref::cell_name_to_coordinates;
21use crate::workbook::open_options::DateInterpretation;
22
23#[derive(Debug, Clone)]
25pub struct StreamRow {
26 pub row_number: u32,
28 pub cells: Vec<(u32, CellValue)>,
30}
31
32pub struct SheetStreamReader<'a, R: BufRead> {
38 reader: quick_xml::Reader<R>,
39 sst: &'a SharedStringTable,
40 done: bool,
41 row_limit: Option<u32>,
42 rows_emitted: u32,
43 date_interpretation: DateInterpretation,
44 style_is_date: Vec<bool>,
45}
46
47impl<'a, R: BufRead> SheetStreamReader<'a, R> {
48 pub fn new(source: R, sst: &'a SharedStringTable) -> Self {
54 let mut reader = quick_xml::Reader::from_reader(source);
55 reader.config_mut().trim_text(false);
56 Self {
57 reader,
58 sst,
59 done: false,
60 row_limit: None,
61 rows_emitted: 0,
62 date_interpretation: DateInterpretation::default(),
63 style_is_date: Vec::new(),
64 }
65 }
66
67 pub fn row_limit(mut self, limit: Option<u32>) -> Self {
70 self.row_limit = limit;
71 self
72 }
73
74 pub fn date_promotion(
81 mut self,
82 interpretation: DateInterpretation,
83 style_is_date: Vec<bool>,
84 ) -> Self {
85 self.date_interpretation = interpretation;
86 self.style_is_date = style_is_date;
87 self
88 }
89
90 fn should_promote_to_date(&self, style_idx: Option<u32>) -> bool {
94 if !matches!(self.date_interpretation, DateInterpretation::NumFmt) {
95 return false;
96 }
97 let idx = match style_idx {
98 Some(i) => i as usize,
99 None => return false,
100 };
101 self.style_is_date.get(idx).copied().unwrap_or(false)
102 }
103
104 pub fn next_batch(&mut self, batch_size: usize) -> Result<Vec<StreamRow>> {
107 if self.done {
108 return Ok(Vec::new());
109 }
110
111 let mut rows = Vec::with_capacity(batch_size);
112 let mut buf = Vec::with_capacity(4096);
113
114 loop {
115 if rows.len() >= batch_size {
116 break;
117 }
118 if let Some(limit) = self.row_limit {
119 if self.rows_emitted >= limit {
120 self.done = true;
121 break;
122 }
123 }
124
125 buf.clear();
126 match self
127 .reader
128 .read_event_into(&mut buf)
129 .map_err(|e| Error::XmlParse(e.to_string()))?
130 {
131 Event::Start(ref e) if e.name() == QName(b"row") => {
132 let row_number = extract_row_number(e)?;
133 let row = self.parse_row_body(row_number)?;
134 self.rows_emitted += 1;
135 if !row.cells.is_empty() {
136 rows.push(row);
137 }
138 }
139 Event::Eof => {
140 self.done = true;
141 break;
142 }
143 _ => {}
144 }
145 }
146
147 Ok(rows)
148 }
149
150 pub fn has_more(&self) -> bool {
152 !self.done
153 }
154
155 pub fn close(self) {
157 drop(self);
158 }
159
160 fn parse_row_body(&mut self, row_number: u32) -> Result<StreamRow> {
163 let mut cells = Vec::new();
164 let mut buf = Vec::with_capacity(1024);
165
166 loop {
167 buf.clear();
168 match self
169 .reader
170 .read_event_into(&mut buf)
171 .map_err(|e| Error::XmlParse(e.to_string()))?
172 {
173 Event::Start(ref e) if e.name() == QName(b"c") => {
174 let (col, cell_type, style_idx) = extract_cell_attrs(e)?;
175 if let Some(col) = col {
176 let promote = self.should_promote_to_date(style_idx);
177 let cv = self.parse_cell_body(cell_type.as_deref(), promote)?;
178 cells.push((col, cv));
179 } else {
180 self.skip_to_end_of(b"c")?;
181 }
182 }
183 Event::Empty(ref e) if e.name() == QName(b"c") => {
184 let (col, cell_type, style_idx) = extract_cell_attrs(e)?;
185 if let Some(col) = col {
186 let promote = self.should_promote_to_date(style_idx);
187 let cv = resolve_cell_value(
188 self.sst,
189 cell_type.as_deref(),
190 None,
191 None,
192 None,
193 promote,
194 )?;
195 cells.push((col, cv));
196 }
197 }
198 Event::End(ref e) if e.name() == QName(b"row") => break,
199 Event::Eof => {
200 self.done = true;
201 break;
202 }
203 _ => {}
204 }
205 }
206
207 Ok(StreamRow { row_number, cells })
208 }
209
210 fn parse_cell_body(
213 &mut self,
214 cell_type: Option<&str>,
215 promote_to_date: bool,
216 ) -> Result<CellValue> {
217 let mut value_text: Option<String> = None;
218 let mut formula_text: Option<String> = None;
219 let mut inline_string: Option<String> = None;
220 let mut buf = Vec::with_capacity(512);
221 let mut in_is = false;
222
223 loop {
224 buf.clear();
225 match self
226 .reader
227 .read_event_into(&mut buf)
228 .map_err(|e| Error::XmlParse(e.to_string()))?
229 {
230 Event::Start(ref e) => {
231 let local = e.local_name();
232 if local.as_ref() == b"v" {
233 value_text = Some(self.read_text_content(b"v")?);
234 } else if local.as_ref() == b"f" {
235 formula_text = Some(self.read_text_content(b"f")?);
236 } else if local.as_ref() == b"is" {
237 in_is = true;
238 inline_string = Some(String::new());
239 } else if local.as_ref() == b"t" && in_is {
240 let t = self.read_text_content(b"t")?;
241 if let Some(ref mut is) = inline_string {
242 is.push_str(&t);
243 }
244 }
245 }
246 Event::End(ref e) => {
247 let local = e.local_name();
248 if local.as_ref() == b"c" {
249 break;
250 }
251 if local.as_ref() == b"is" {
252 in_is = false;
253 }
254 }
255 Event::Eof => {
256 self.done = true;
257 break;
258 }
259 _ => {}
260 }
261 }
262
263 resolve_cell_value(
264 self.sst,
265 cell_type,
266 value_text.as_deref(),
267 formula_text,
268 inline_string,
269 promote_to_date,
270 )
271 }
272
273 fn read_text_content(&mut self, end_tag: &[u8]) -> Result<String> {
275 let mut text = String::new();
276 let mut buf = Vec::with_capacity(256);
277 loop {
278 buf.clear();
279 match self
280 .reader
281 .read_event_into(&mut buf)
282 .map_err(|e| Error::XmlParse(e.to_string()))?
283 {
284 Event::Text(ref e) => {
285 let decoded = e.unescape().map_err(|e| Error::XmlParse(e.to_string()))?;
286 text.push_str(&decoded);
287 }
288 Event::End(ref e) if e.local_name().as_ref() == end_tag => break,
289 Event::Eof => {
290 self.done = true;
291 break;
292 }
293 _ => {}
294 }
295 }
296 Ok(text)
297 }
298
299 fn skip_to_end_of(&mut self, tag: &[u8]) -> Result<()> {
301 let mut buf = Vec::with_capacity(256);
302 let mut depth: u32 = 1;
303 loop {
304 buf.clear();
305 match self
306 .reader
307 .read_event_into(&mut buf)
308 .map_err(|e| Error::XmlParse(e.to_string()))?
309 {
310 Event::Start(ref e) if e.local_name().as_ref() == tag => {
311 depth += 1;
312 }
313 Event::End(ref e) if e.local_name().as_ref() == tag => {
314 depth -= 1;
315 if depth == 0 {
316 break;
317 }
318 }
319 Event::Eof => {
320 self.done = true;
321 break;
322 }
323 _ => {}
324 }
325 }
326 Ok(())
327 }
328}
329
330pub struct OwnedSheetStreamReader {
337 reader: quick_xml::Reader<std::io::BufReader<std::io::Cursor<Vec<u8>>>>,
338 sst: SharedStringTable,
339 done: bool,
340 row_limit: Option<u32>,
341 rows_emitted: u32,
342 date_interpretation: DateInterpretation,
343 style_is_date: Vec<bool>,
344}
345
346impl OwnedSheetStreamReader {
347 pub fn new(xml_bytes: Vec<u8>, sst: SharedStringTable) -> Self {
353 let cursor = std::io::Cursor::new(xml_bytes);
354 let buf_reader = std::io::BufReader::new(cursor);
355 let mut reader = quick_xml::Reader::from_reader(buf_reader);
356 reader.config_mut().trim_text(false);
357 Self {
358 reader,
359 sst,
360 done: false,
361 row_limit: None,
362 rows_emitted: 0,
363 date_interpretation: DateInterpretation::default(),
364 style_is_date: Vec::new(),
365 }
366 }
367
368 pub fn row_limit(mut self, limit: Option<u32>) -> Self {
371 self.row_limit = limit;
372 self
373 }
374
375 pub fn date_promotion(
378 mut self,
379 interpretation: DateInterpretation,
380 style_is_date: Vec<bool>,
381 ) -> Self {
382 self.date_interpretation = interpretation;
383 self.style_is_date = style_is_date;
384 self
385 }
386
387 fn should_promote_to_date(&self, style_idx: Option<u32>) -> bool {
388 if !matches!(self.date_interpretation, DateInterpretation::NumFmt) {
389 return false;
390 }
391 let idx = match style_idx {
392 Some(i) => i as usize,
393 None => return false,
394 };
395 self.style_is_date.get(idx).copied().unwrap_or(false)
396 }
397
398 pub fn next_batch(&mut self, batch_size: usize) -> Result<Vec<StreamRow>> {
401 if self.done {
402 return Ok(Vec::new());
403 }
404
405 let mut rows = Vec::with_capacity(batch_size);
406 let mut buf = Vec::with_capacity(4096);
407
408 loop {
409 if rows.len() >= batch_size {
410 break;
411 }
412 if let Some(limit) = self.row_limit {
413 if self.rows_emitted >= limit {
414 self.done = true;
415 break;
416 }
417 }
418
419 buf.clear();
420 match self
421 .reader
422 .read_event_into(&mut buf)
423 .map_err(|e| Error::XmlParse(e.to_string()))?
424 {
425 Event::Start(ref e) if e.name() == QName(b"row") => {
426 let row_number = extract_row_number(e)?;
427 let row = self.parse_row_body(row_number)?;
428 self.rows_emitted += 1;
429 if !row.cells.is_empty() {
430 rows.push(row);
431 }
432 }
433 Event::Eof => {
434 self.done = true;
435 break;
436 }
437 _ => {}
438 }
439 }
440
441 Ok(rows)
442 }
443
444 pub fn has_more(&self) -> bool {
446 !self.done
447 }
448
449 pub fn close(self) {
451 drop(self);
452 }
453
454 fn parse_row_body(&mut self, row_number: u32) -> Result<StreamRow> {
455 let mut cells = Vec::new();
456 let mut buf = Vec::with_capacity(1024);
457
458 loop {
459 buf.clear();
460 match self
461 .reader
462 .read_event_into(&mut buf)
463 .map_err(|e| Error::XmlParse(e.to_string()))?
464 {
465 Event::Start(ref e) if e.name() == QName(b"c") => {
466 let (col, cell_type, style_idx) = extract_cell_attrs(e)?;
467 if let Some(col) = col {
468 let promote = self.should_promote_to_date(style_idx);
469 let cv = self.parse_cell_body(cell_type.as_deref(), promote)?;
470 cells.push((col, cv));
471 } else {
472 self.skip_to_end_of(b"c")?;
473 }
474 }
475 Event::Empty(ref e) if e.name() == QName(b"c") => {
476 let (col, cell_type, style_idx) = extract_cell_attrs(e)?;
477 if let Some(col) = col {
478 let promote = self.should_promote_to_date(style_idx);
479 let cv = resolve_cell_value(
480 &self.sst,
481 cell_type.as_deref(),
482 None,
483 None,
484 None,
485 promote,
486 )?;
487 cells.push((col, cv));
488 }
489 }
490 Event::End(ref e) if e.name() == QName(b"row") => break,
491 Event::Eof => {
492 self.done = true;
493 break;
494 }
495 _ => {}
496 }
497 }
498
499 Ok(StreamRow { row_number, cells })
500 }
501
502 fn parse_cell_body(
503 &mut self,
504 cell_type: Option<&str>,
505 promote_to_date: bool,
506 ) -> Result<CellValue> {
507 let mut value_text: Option<String> = None;
508 let mut formula_text: Option<String> = None;
509 let mut inline_string: Option<String> = None;
510 let mut buf = Vec::with_capacity(512);
511 let mut in_is = false;
512
513 loop {
514 buf.clear();
515 match self
516 .reader
517 .read_event_into(&mut buf)
518 .map_err(|e| Error::XmlParse(e.to_string()))?
519 {
520 Event::Start(ref e) => {
521 let local = e.local_name();
522 if local.as_ref() == b"v" {
523 value_text = Some(self.read_text_content(b"v")?);
524 } else if local.as_ref() == b"f" {
525 formula_text = Some(self.read_text_content(b"f")?);
526 } else if local.as_ref() == b"is" {
527 in_is = true;
528 inline_string = Some(String::new());
529 } else if local.as_ref() == b"t" && in_is {
530 let t = self.read_text_content(b"t")?;
531 if let Some(ref mut is) = inline_string {
532 is.push_str(&t);
533 }
534 }
535 }
536 Event::End(ref e) => {
537 let local = e.local_name();
538 if local.as_ref() == b"c" {
539 break;
540 }
541 if local.as_ref() == b"is" {
542 in_is = false;
543 }
544 }
545 Event::Eof => {
546 self.done = true;
547 break;
548 }
549 _ => {}
550 }
551 }
552
553 resolve_cell_value(
554 &self.sst,
555 cell_type,
556 value_text.as_deref(),
557 formula_text,
558 inline_string,
559 promote_to_date,
560 )
561 }
562
563 fn read_text_content(&mut self, end_tag: &[u8]) -> Result<String> {
564 let mut text = String::new();
565 let mut buf = Vec::with_capacity(256);
566 loop {
567 buf.clear();
568 match self
569 .reader
570 .read_event_into(&mut buf)
571 .map_err(|e| Error::XmlParse(e.to_string()))?
572 {
573 Event::Text(ref e) => {
574 let decoded = e.unescape().map_err(|e| Error::XmlParse(e.to_string()))?;
575 text.push_str(&decoded);
576 }
577 Event::End(ref e) if e.local_name().as_ref() == end_tag => break,
578 Event::Eof => {
579 self.done = true;
580 break;
581 }
582 _ => {}
583 }
584 }
585 Ok(text)
586 }
587
588 fn skip_to_end_of(&mut self, tag: &[u8]) -> Result<()> {
589 let mut buf = Vec::with_capacity(256);
590 let mut depth: u32 = 1;
591 loop {
592 buf.clear();
593 match self
594 .reader
595 .read_event_into(&mut buf)
596 .map_err(|e| Error::XmlParse(e.to_string()))?
597 {
598 Event::Start(ref e) if e.local_name().as_ref() == tag => {
599 depth += 1;
600 }
601 Event::End(ref e) if e.local_name().as_ref() == tag => {
602 depth -= 1;
603 if depth == 0 {
604 break;
605 }
606 }
607 Event::Eof => {
608 self.done = true;
609 break;
610 }
611 _ => {}
612 }
613 }
614 Ok(())
615 }
616}
617
618fn extract_row_number(start: &quick_xml::events::BytesStart<'_>) -> Result<u32> {
620 for attr in start.attributes().flatten() {
621 if attr.key == QName(b"r") {
622 let val =
623 std::str::from_utf8(&attr.value).map_err(|e| Error::XmlParse(e.to_string()))?;
624 return val
625 .parse::<u32>()
626 .map_err(|e| Error::XmlParse(format!("invalid row number: {e}")));
627 }
628 }
629 Err(Error::XmlParse(
630 "row element missing r attribute".to_string(),
631 ))
632}
633
634fn extract_cell_attrs(
637 start: &quick_xml::events::BytesStart<'_>,
638) -> Result<(Option<u32>, Option<String>, Option<u32>)> {
639 let mut cell_ref: Option<String> = None;
640 let mut cell_type: Option<String> = None;
641 let mut style_idx: Option<u32> = None;
642
643 for attr in start.attributes().flatten() {
644 match attr.key {
645 QName(b"r") => {
646 cell_ref = Some(
647 std::str::from_utf8(&attr.value)
648 .map_err(|e| Error::XmlParse(e.to_string()))?
649 .to_string(),
650 );
651 }
652 QName(b"t") => {
653 cell_type = Some(
654 std::str::from_utf8(&attr.value)
655 .map_err(|e| Error::XmlParse(e.to_string()))?
656 .to_string(),
657 );
658 }
659 QName(b"s") => {
660 let raw =
661 std::str::from_utf8(&attr.value).map_err(|e| Error::XmlParse(e.to_string()))?;
662 style_idx = raw.parse::<u32>().ok();
663 }
664 _ => {}
665 }
666 }
667
668 let col = match &cell_ref {
669 Some(r) => Some(cell_name_to_coordinates(r)?.0),
670 None => None,
671 };
672
673 Ok((col, cell_type, style_idx))
674}
675
676fn resolve_cell_value(
683 sst: &SharedStringTable,
684 cell_type: Option<&str>,
685 value_text: Option<&str>,
686 formula_text: Option<String>,
687 inline_string: Option<String>,
688 promote_to_date: bool,
689) -> Result<CellValue> {
690 if let Some(formula) = formula_text {
691 let cached = match (cell_type, value_text) {
692 (Some("b"), Some(v)) => Some(Box::new(CellValue::Bool(v == "1"))),
693 (Some("e"), Some(v)) => Some(Box::new(CellValue::Error(v.to_string()))),
694 (Some("str"), Some(v)) => Some(Box::new(CellValue::String(v.to_string()))),
695 (_, Some(v)) => v
696 .parse::<f64>()
697 .ok()
698 .map(|n| Box::new(CellValue::Number(n))),
699 _ => None,
700 };
701 return Ok(CellValue::Formula {
702 expr: formula,
703 result: cached,
704 });
705 }
706
707 match (cell_type, value_text) {
708 (Some("s"), Some(v)) => {
709 let idx: usize = v
710 .parse()
711 .map_err(|_| Error::Internal(format!("invalid SST index: {v}")))?;
712 let s = sst
713 .get(idx)
714 .ok_or_else(|| Error::Internal(format!("SST index {idx} out of bounds")))?;
715 Ok(CellValue::String(s.to_string()))
716 }
717 (Some("b"), Some(v)) => Ok(CellValue::Bool(v == "1")),
718 (Some("e"), Some(v)) => Ok(CellValue::Error(v.to_string())),
719 (Some("inlineStr"), _) => Ok(CellValue::String(inline_string.unwrap_or_default())),
720 (Some("str"), Some(v)) => Ok(CellValue::String(v.to_string())),
721 (Some("d"), Some(v)) => {
722 let n: f64 = v
723 .parse()
724 .map_err(|_| Error::Internal(format!("invalid date value: {v}")))?;
725 Ok(CellValue::Date(n))
726 }
727 (Some("n") | None, Some(v)) => {
728 let n: f64 = v
729 .parse()
730 .map_err(|_| Error::Internal(format!("invalid number: {v}")))?;
731 if promote_to_date {
732 Ok(CellValue::Date(n))
733 } else {
734 Ok(CellValue::Number(n))
735 }
736 }
737 _ => Ok(CellValue::Empty),
738 }
739}
740
741#[cfg(test)]
742mod tests {
743 use super::*;
744 use std::io::Cursor;
745
746 fn make_sst(strings: &[&str]) -> SharedStringTable {
747 let mut sst = SharedStringTable::new();
748 for s in strings {
749 sst.add(s);
750 }
751 sst
752 }
753
754 fn worksheet_xml(sheet_data: &str) -> String {
755 format!(
756 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
757<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
758<sheetData>
759{sheet_data}
760</sheetData>
761</worksheet>"#
762 )
763 }
764
765 fn read_all(xml: &str, sst: &SharedStringTable, row_limit: Option<u32>) -> Vec<StreamRow> {
766 let cursor = Cursor::new(xml.as_bytes().to_vec());
767 let mut reader = SheetStreamReader::new(cursor, sst).row_limit(row_limit);
768 let mut all = Vec::new();
769 loop {
770 let batch = reader.next_batch(100).unwrap();
771 if batch.is_empty() {
772 break;
773 }
774 all.extend(batch);
775 }
776 all
777 }
778
779 #[test]
780 fn test_basic_batch_reading() {
781 let sst = make_sst(&["Name", "Age"]);
782 let xml = worksheet_xml(
783 r#"
784<row r="1"><c r="A1" t="s"><v>0</v></c><c r="B1" t="s"><v>1</v></c></row>
785<row r="2"><c r="A2" t="s"><v>0</v></c><c r="B2"><v>30</v></c></row>
786<row r="3"><c r="A3" t="s"><v>0</v></c><c r="B3"><v>25</v></c></row>
787"#,
788 );
789
790 let cursor = Cursor::new(xml.as_bytes().to_vec());
791 let mut reader = SheetStreamReader::new(cursor, &sst);
792
793 let batch1 = reader.next_batch(2).unwrap();
794 assert_eq!(batch1.len(), 2);
795 assert!(reader.has_more());
796
797 let batch2 = reader.next_batch(2).unwrap();
798 assert_eq!(batch2.len(), 1);
799
800 let batch3 = reader.next_batch(2).unwrap();
801 assert!(batch3.is_empty());
802 assert!(!reader.has_more());
803 }
804
805 #[test]
806 fn test_sparse_rows() {
807 let sst = SharedStringTable::new();
808 let xml = worksheet_xml(
809 r#"
810<row r="1"><c r="A1"><v>1</v></c></row>
811<row r="5"><c r="C5"><v>5</v></c></row>
812<row r="100"><c r="A100"><v>100</v></c></row>
813"#,
814 );
815
816 let rows = read_all(&xml, &sst, None);
817 assert_eq!(rows.len(), 3);
818 assert_eq!(rows[0].row_number, 1);
819 assert_eq!(rows[1].row_number, 5);
820 assert_eq!(rows[1].cells[0].0, 3);
821 assert_eq!(rows[2].row_number, 100);
822 }
823
824 #[test]
825 fn test_all_cell_types() {
826 let sst = make_sst(&["Hello"]);
827 let xml = worksheet_xml(
828 r#"
829<row r="1">
830 <c r="A1" t="s"><v>0</v></c>
831 <c r="B1"><v>42.5</v></c>
832 <c r="C1" t="b"><v>1</v></c>
833 <c r="D1" t="e"><v>#DIV/0!</v></c>
834 <c r="E1" t="inlineStr"><is><t>Inline</t></is></c>
835 <c r="F1" t="n"><v>99</v></c>
836 <c r="G1" t="d"><v>45000</v></c>
837</row>
838"#,
839 );
840
841 let rows = read_all(&xml, &sst, None);
842 assert_eq!(rows.len(), 1);
843 let cells = &rows[0].cells;
844
845 assert_eq!(cells[0], (1, CellValue::String("Hello".to_string())));
846 assert_eq!(cells[1], (2, CellValue::Number(42.5)));
847 assert_eq!(cells[2], (3, CellValue::Bool(true)));
848 assert_eq!(cells[3], (4, CellValue::Error("#DIV/0!".to_string())));
849 assert_eq!(cells[4], (5, CellValue::String("Inline".to_string())));
850 assert_eq!(cells[5], (6, CellValue::Number(99.0)));
851 assert_eq!(cells[6], (7, CellValue::Date(45000.0)));
852 }
853
854 #[test]
855 fn test_boolean_false() {
856 let sst = SharedStringTable::new();
857 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="b"><v>0</v></c></row>"#);
858 let rows = read_all(&xml, &sst, None);
859 assert_eq!(rows[0].cells[0].1, CellValue::Bool(false));
860 }
861
862 #[test]
863 fn test_shared_string_resolution() {
864 let sst = make_sst(&["First", "Second", "Third"]);
865 let xml = worksheet_xml(
866 r#"
867<row r="1">
868 <c r="A1" t="s"><v>0</v></c>
869 <c r="B1" t="s"><v>1</v></c>
870 <c r="C1" t="s"><v>2</v></c>
871</row>
872"#,
873 );
874
875 let rows = read_all(&xml, &sst, None);
876 assert_eq!(rows[0].cells[0].1, CellValue::String("First".to_string()));
877 assert_eq!(rows[0].cells[1].1, CellValue::String("Second".to_string()));
878 assert_eq!(rows[0].cells[2].1, CellValue::String("Third".to_string()));
879 }
880
881 #[test]
882 fn test_shared_string_out_of_bounds() {
883 let sst = make_sst(&["Only"]);
884 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="s"><v>999</v></c></row>"#);
885
886 let cursor = Cursor::new(xml.as_bytes().to_vec());
887 let mut reader = SheetStreamReader::new(cursor, &sst);
888 let result = reader.next_batch(10);
889 assert!(result.is_err());
890 }
891
892 #[test]
893 fn test_row_limit() {
894 let sst = SharedStringTable::new();
895 let xml = worksheet_xml(
896 r#"
897<row r="1"><c r="A1"><v>1</v></c></row>
898<row r="2"><c r="A2"><v>2</v></c></row>
899<row r="3"><c r="A3"><v>3</v></c></row>
900<row r="4"><c r="A4"><v>4</v></c></row>
901<row r="5"><c r="A5"><v>5</v></c></row>
902"#,
903 );
904
905 let rows = read_all(&xml, &sst, Some(3));
906 assert_eq!(rows.len(), 3);
907 assert_eq!(rows[0].row_number, 1);
908 assert_eq!(rows[2].row_number, 3);
909 }
910
911 #[test]
912 fn test_row_limit_zero() {
913 let sst = SharedStringTable::new();
914 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
915
916 let rows = read_all(&xml, &sst, Some(0));
917 assert!(rows.is_empty());
918 }
919
920 #[test]
921 fn test_empty_sheet() {
922 let sst = SharedStringTable::new();
923 let xml = worksheet_xml("");
924
925 let rows = read_all(&xml, &sst, None);
926 assert!(rows.is_empty());
927 }
928
929 #[test]
930 fn test_empty_rows_are_skipped() {
931 let sst = SharedStringTable::new();
932 let xml = worksheet_xml(
933 r#"
934<row r="1"></row>
935<row r="2"><c r="A2"><v>42</v></c></row>
936<row r="3"></row>
937"#,
938 );
939
940 let rows = read_all(&xml, &sst, None);
941 assert_eq!(rows.len(), 1);
942 assert_eq!(rows[0].row_number, 2);
943 }
944
945 #[test]
946 fn test_empty_rows_count_against_limit() {
947 let sst = SharedStringTable::new();
948 let xml = worksheet_xml(
949 r#"
950<row r="1"></row>
951<row r="2"></row>
952<row r="3"><c r="A3"><v>3</v></c></row>
953<row r="4"><c r="A4"><v>4</v></c></row>
954"#,
955 );
956
957 let rows = read_all(&xml, &sst, Some(2));
958 assert!(
959 rows.is_empty(),
960 "with limit=2 and 2 empty rows, no data rows should be returned"
961 );
962
963 let rows2 = read_all(&xml, &sst, Some(3));
964 assert_eq!(rows2.len(), 1);
965 assert_eq!(rows2[0].row_number, 3);
966 }
967
968 #[test]
969 fn test_formula_with_cached_number() {
970 let sst = SharedStringTable::new();
971 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><f>SUM(B1:B10)</f><v>42</v></c></row>"#);
972
973 let rows = read_all(&xml, &sst, None);
974 match &rows[0].cells[0].1 {
975 CellValue::Formula { expr, result } => {
976 assert_eq!(expr, "SUM(B1:B10)");
977 assert_eq!(result.as_deref(), Some(&CellValue::Number(42.0)));
978 }
979 other => panic!("expected Formula, got {:?}", other),
980 }
981 }
982
983 #[test]
984 fn test_formula_with_cached_string() {
985 let sst = SharedStringTable::new();
986 let xml = worksheet_xml(
987 r#"<row r="1"><c r="A1" t="str"><f>CONCAT("a","b")</f><v>ab</v></c></row>"#,
988 );
989
990 let rows = read_all(&xml, &sst, None);
991 match &rows[0].cells[0].1 {
992 CellValue::Formula { expr, result } => {
993 assert_eq!(expr, r#"CONCAT("a","b")"#);
994 assert_eq!(
995 result.as_deref(),
996 Some(&CellValue::String("ab".to_string()))
997 );
998 }
999 other => panic!("expected Formula, got {:?}", other),
1000 }
1001 }
1002
1003 #[test]
1004 fn test_formula_with_cached_boolean() {
1005 let sst = SharedStringTable::new();
1006 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="b"><f>TRUE()</f><v>1</v></c></row>"#);
1007
1008 let rows = read_all(&xml, &sst, None);
1009 match &rows[0].cells[0].1 {
1010 CellValue::Formula { expr, result } => {
1011 assert_eq!(expr, "TRUE()");
1012 assert_eq!(result.as_deref(), Some(&CellValue::Bool(true)));
1013 }
1014 other => panic!("expected Formula, got {:?}", other),
1015 }
1016 }
1017
1018 #[test]
1019 fn test_formula_with_cached_error() {
1020 let sst = SharedStringTable::new();
1021 let xml = worksheet_xml(r#"<row r="1"><c r="A1" t="e"><f>1/0</f><v>#DIV/0!</v></c></row>"#);
1022
1023 let rows = read_all(&xml, &sst, None);
1024 match &rows[0].cells[0].1 {
1025 CellValue::Formula { expr, result } => {
1026 assert_eq!(expr, "1/0");
1027 assert_eq!(
1028 result.as_deref(),
1029 Some(&CellValue::Error("#DIV/0!".to_string()))
1030 );
1031 }
1032 other => panic!("expected Formula, got {:?}", other),
1033 }
1034 }
1035
1036 #[test]
1037 fn test_formula_without_cached_value() {
1038 let sst = SharedStringTable::new();
1039 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><f>A2+A3</f></c></row>"#);
1040
1041 let rows = read_all(&xml, &sst, None);
1042 match &rows[0].cells[0].1 {
1043 CellValue::Formula { expr, result } => {
1044 assert_eq!(expr, "A2+A3");
1045 assert!(result.is_none());
1046 }
1047 other => panic!("expected Formula, got {:?}", other),
1048 }
1049 }
1050
1051 #[test]
1052 fn test_inline_string_with_rich_text_runs() {
1053 let sst = SharedStringTable::new();
1054 let xml = worksheet_xml(
1055 r#"<row r="1"><c r="A1" t="inlineStr"><is><r><t>Bold</t></r><r><t> Normal</t></r></is></c></row>"#,
1056 );
1057
1058 let rows = read_all(&xml, &sst, None);
1059 assert_eq!(
1060 rows[0].cells[0].1,
1061 CellValue::String("Bold Normal".to_string())
1062 );
1063 }
1064
1065 #[test]
1066 fn test_reader_close() {
1067 let sst = SharedStringTable::new();
1068 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
1069 let cursor = Cursor::new(xml.as_bytes().to_vec());
1070 let reader = SheetStreamReader::new(cursor, &sst);
1071 reader.close();
1072 }
1073
1074 #[test]
1075 fn test_reader_drop_without_reading_all() {
1076 let sst = SharedStringTable::new();
1077 let xml = worksheet_xml(
1078 r#"
1079<row r="1"><c r="A1"><v>1</v></c></row>
1080<row r="2"><c r="A2"><v>2</v></c></row>
1081"#,
1082 );
1083 let cursor = Cursor::new(xml.as_bytes().to_vec());
1084 let mut reader = SheetStreamReader::new(cursor, &sst);
1085 let batch = reader.next_batch(1).unwrap();
1086 assert_eq!(batch.len(), 1);
1087 drop(reader);
1088 }
1089
1090 #[test]
1091 fn test_has_more_transitions() {
1092 let sst = SharedStringTable::new();
1093 let xml = worksheet_xml(r#"<row r="1"><c r="A1"><v>1</v></c></row>"#);
1094
1095 let cursor = Cursor::new(xml.as_bytes().to_vec());
1096 let mut reader = SheetStreamReader::new(cursor, &sst);
1097 assert!(reader.has_more());
1098
1099 let batch = reader.next_batch(100).unwrap();
1100 assert_eq!(batch.len(), 1);
1101
1102 let batch2 = reader.next_batch(100).unwrap();
1103 assert!(batch2.is_empty());
1104 assert!(!reader.has_more());
1105 }
1106
1107 #[test]
1108 fn test_batch_size_one() {
1109 let sst = SharedStringTable::new();
1110 let xml = worksheet_xml(
1111 r#"
1112<row r="1"><c r="A1"><v>1</v></c></row>
1113<row r="2"><c r="A2"><v>2</v></c></row>
1114<row r="3"><c r="A3"><v>3</v></c></row>
1115"#,
1116 );
1117
1118 let cursor = Cursor::new(xml.as_bytes().to_vec());
1119 let mut reader = SheetStreamReader::new(cursor, &sst);
1120
1121 for expected_row in 1..=3 {
1122 let batch = reader.next_batch(1).unwrap();
1123 assert_eq!(batch.len(), 1);
1124 assert_eq!(batch[0].row_number, expected_row);
1125 }
1126
1127 let batch = reader.next_batch(1).unwrap();
1128 assert!(batch.is_empty());
1129 }
1130
1131 #[test]
1132 fn test_cell_with_no_value() {
1133 let sst = SharedStringTable::new();
1134 let xml = worksheet_xml(r#"<row r="1"><c r="A1"></c><c r="B1"><v>42</v></c></row>"#);
1135
1136 let rows = read_all(&xml, &sst, None);
1137 assert_eq!(rows[0].cells.len(), 2);
1138 assert_eq!(rows[0].cells[0].1, CellValue::Empty);
1139 assert_eq!(rows[0].cells[1].1, CellValue::Number(42.0));
1140 }
1141
1142 #[test]
1143 fn test_self_closing_cell_element() {
1144 let sst = SharedStringTable::new();
1145 let xml = worksheet_xml(
1146 r#"<row r="1"><c r="A1"/><c r="B1"><v>42</v></c><c r="C1" t="b"/></row>"#,
1147 );
1148
1149 let rows = read_all(&xml, &sst, None);
1150 assert_eq!(rows[0].cells.len(), 3);
1151 assert_eq!(rows[0].cells[0], (1, CellValue::Empty));
1152 assert_eq!(rows[0].cells[1], (2, CellValue::Number(42.0)));
1153 assert_eq!(rows[0].cells[2], (3, CellValue::Empty));
1154 }
1155
1156 #[test]
1157 fn test_integration_with_saved_workbook() {
1158 let mut wb = crate::workbook::Workbook::new();
1159 wb.set_cell_value("Sheet1", "A1", "Name").unwrap();
1160 wb.set_cell_value("Sheet1", "B1", "Score").unwrap();
1161 wb.set_cell_value("Sheet1", "A2", "Alice").unwrap();
1162 wb.set_cell_value("Sheet1", "B2", 95.5f64).unwrap();
1163 wb.set_cell_value("Sheet1", "A3", "Bob").unwrap();
1164 wb.set_cell_value("Sheet1", "B3", 87.0f64).unwrap();
1165
1166 let dir = tempfile::TempDir::new().unwrap();
1167 let path = dir.path().join("stream_reader_test.xlsx");
1168 wb.save(&path).unwrap();
1169
1170 let wb2 = crate::workbook::Workbook::open_with_options(
1171 &path,
1172 &crate::workbook::OpenOptions::new().read_mode(crate::workbook::ReadMode::Lazy),
1173 )
1174 .unwrap();
1175
1176 let mut reader = wb2.open_sheet_reader("Sheet1").unwrap();
1177 let rows = reader.next_batch(100).unwrap();
1178
1179 assert_eq!(rows.len(), 3);
1180 assert_eq!(rows[0].row_number, 1);
1181 assert_eq!(rows[0].cells[0].1, CellValue::String("Name".to_string()));
1182 assert_eq!(rows[0].cells[1].1, CellValue::String("Score".to_string()));
1183 assert_eq!(rows[1].cells[0].1, CellValue::String("Alice".to_string()));
1184 assert_eq!(rows[1].cells[1].1, CellValue::Number(95.5));
1185 assert_eq!(rows[2].cells[0].1, CellValue::String("Bob".to_string()));
1186 assert_eq!(rows[2].cells[1].1, CellValue::Number(87.0));
1187 }
1188
1189 #[test]
1190 fn test_integration_with_row_limit() {
1191 let mut wb = crate::workbook::Workbook::new();
1192 for i in 1..=10 {
1193 let cell = format!("A{i}");
1194 wb.set_cell_value("Sheet1", &cell, i as f64).unwrap();
1195 }
1196
1197 let dir = tempfile::TempDir::new().unwrap();
1198 let path = dir.path().join("stream_limit_test.xlsx");
1199 wb.save(&path).unwrap();
1200
1201 let wb2 = crate::workbook::Workbook::open_with_options(
1202 &path,
1203 &crate::workbook::OpenOptions::new()
1204 .read_mode(crate::workbook::ReadMode::Lazy)
1205 .sheet_rows(5),
1206 )
1207 .unwrap();
1208
1209 let mut reader = wb2.open_sheet_reader("Sheet1").unwrap();
1210 let mut all_rows = Vec::new();
1211 loop {
1212 let batch = reader.next_batch(3).unwrap();
1213 if batch.is_empty() {
1214 break;
1215 }
1216 all_rows.extend(batch);
1217 }
1218
1219 assert_eq!(all_rows.len(), 5);
1220 assert_eq!(all_rows[4].row_number, 5);
1221 }
1222
1223 #[test]
1224 fn test_integration_sheet_not_found() {
1225 let wb = crate::workbook::Workbook::new();
1226 let result = wb.open_sheet_reader("NonExistent");
1227 assert!(result.is_err());
1228 }
1229
1230 fn write_mixed_style_workbook(path: &std::path::Path) {
1234 use crate::style::{builtin_num_fmts, NumFmtStyle, Style};
1235 let mut wb = crate::workbook::Workbook::new();
1236 let builtin_date_style = wb
1237 .add_style(&Style {
1238 num_fmt: Some(NumFmtStyle::Builtin(builtin_num_fmts::DATE_MDY)),
1239 ..Style::default()
1240 })
1241 .unwrap();
1242 let custom_date_style = wb
1243 .add_style(&Style {
1244 num_fmt: Some(NumFmtStyle::Custom("yyyy-mm-dd hh:mm".to_string())),
1245 ..Style::default()
1246 })
1247 .unwrap();
1248 let decimal_style = wb
1249 .add_style(&Style {
1250 num_fmt: Some(NumFmtStyle::Builtin(builtin_num_fmts::DECIMAL_2)),
1251 ..Style::default()
1252 })
1253 .unwrap();
1254
1255 wb.set_cell_value("Sheet1", "A1", 46127.0_f64).unwrap();
1257 wb.set_cell_style("Sheet1", "A1", builtin_date_style)
1258 .unwrap();
1259 wb.set_cell_value("Sheet1", "B1", 46127.9993_f64).unwrap();
1261 wb.set_cell_style("Sheet1", "B1", custom_date_style)
1262 .unwrap();
1263 wb.set_cell_value("Sheet1", "C1", 2.5_f64).unwrap();
1265 wb.set_cell_style("Sheet1", "C1", decimal_style).unwrap();
1266 wb.set_cell_value("Sheet1", "D1", 42.0_f64).unwrap();
1268
1269 wb.save(path).unwrap();
1270 }
1271
1272 #[test]
1273 fn test_integration_date_interpretation_cell_type_opt_in() {
1274 let dir = tempfile::TempDir::new().unwrap();
1275 let path = dir.path().join("dates_cell_type.xlsx");
1276 write_mixed_style_workbook(&path);
1277
1278 let wb = crate::workbook::Workbook::open_with_options(
1281 &path,
1282 &crate::workbook::OpenOptions::new()
1283 .read_mode(crate::workbook::ReadMode::Lazy)
1284 .date_interpretation(crate::workbook::DateInterpretation::CellType),
1285 )
1286 .unwrap();
1287 let mut reader = wb.open_sheet_reader("Sheet1").unwrap();
1288 let rows = reader.next_batch(10).unwrap();
1289
1290 assert_eq!(rows[0].cells[0].1, CellValue::Number(46127.0));
1293 match &rows[0].cells[1].1 {
1294 CellValue::Number(v) => assert!((*v - 46127.9993).abs() < 1e-9),
1295 other => panic!("expected Number, got {:?}", other),
1296 }
1297 assert_eq!(rows[0].cells[2].1, CellValue::Number(2.5));
1298 assert_eq!(rows[0].cells[3].1, CellValue::Number(42.0));
1299 }
1300
1301 #[test]
1302 fn test_integration_date_interpretation_num_fmt_promotes_date_styles() {
1303 let dir = tempfile::TempDir::new().unwrap();
1304 let path = dir.path().join("dates_num_fmt.xlsx");
1305 write_mixed_style_workbook(&path);
1306
1307 let wb = crate::workbook::Workbook::open_with_options(
1308 &path,
1309 &crate::workbook::OpenOptions::new()
1310 .read_mode(crate::workbook::ReadMode::Lazy)
1311 .date_interpretation(crate::workbook::DateInterpretation::NumFmt),
1312 )
1313 .unwrap();
1314 let mut reader = wb.open_sheet_reader("Sheet1").unwrap();
1315 let rows = reader.next_batch(10).unwrap();
1316
1317 assert_eq!(rows[0].cells[0].1, CellValue::Date(46127.0));
1319 match &rows[0].cells[1].1 {
1321 CellValue::Date(v) => assert!((*v - 46127.9993).abs() < 1e-9),
1322 other => panic!("expected Date, got {:?}", other),
1323 }
1324 assert_eq!(rows[0].cells[2].1, CellValue::Number(2.5));
1326 assert_eq!(rows[0].cells[3].1, CellValue::Number(42.0));
1328 }
1329}