1use crate::arrow_store;
2use crate::arrow_store::IngestBuilder;
3use crate::stripes::NumericChunk;
4use arrow_array::Array;
5use arrow_schema::DataType;
6use formualizer_common::{CoercionPolicy, DateSystem, ExcelError, LiteralValue};
7use std::sync::Arc;
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[derive(Clone)]
11pub enum RangeBacking<'a> {
12 Borrowed(&'a arrow_store::ArrowSheet),
13 Owned(Arc<arrow_store::ArrowSheet>),
14}
15
16#[derive(Clone)]
19pub struct RangeView<'a> {
20 backing: RangeBacking<'a>,
21 sr: usize,
22 sc: usize,
23 er: usize,
24 ec: usize,
25 rows: usize,
26 cols: usize,
27 cancel_token: Option<Arc<AtomicBool>>,
28}
29
30impl<'a> core::fmt::Debug for RangeView<'a> {
31 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
32 f.debug_struct("RangeView")
33 .field("rows", &self.rows)
34 .field("cols", &self.cols)
35 .field("kind", &self.kind_probe())
36 .finish()
37 }
38}
39
40#[derive(Copy, Clone, Debug, Eq, PartialEq)]
41pub enum RangeKind {
42 Empty,
43 NumericOnly,
44 TextOnly,
45 Mixed,
46}
47
48pub struct ChunkCol {
49 pub numbers: Option<arrow_array::ArrayRef>,
50 pub booleans: Option<arrow_array::ArrayRef>,
51 pub text: Option<arrow_array::ArrayRef>,
52 pub errors: Option<arrow_array::ArrayRef>,
53 pub type_tag: arrow_array::ArrayRef,
54}
55
56pub struct ChunkSlice {
57 pub row_start: usize, pub row_len: usize,
59 pub cols: Vec<ChunkCol>,
60}
61
62pub struct RowChunkIterator<'a> {
63 view: &'a RangeView<'a>,
64 current_chunk_idx: usize,
65}
66
67impl<'a> Iterator for RowChunkIterator<'a> {
68 type Item = Result<ChunkSlice, ExcelError>;
69
70 fn next(&mut self) -> Option<Self::Item> {
71 if let Some(token) = &self.view.cancel_token
72 && token.load(Ordering::Relaxed)
73 {
74 return Some(Err(ExcelError::new(
75 formualizer_common::ExcelErrorKind::Cancelled,
76 )));
77 }
78
79 let sheet = self.view.sheet();
80 let chunk_starts = &sheet.chunk_starts;
81 let sheet_rows = sheet.nrows as usize;
82 let row_end = self.view.er.min(sheet_rows.saturating_sub(1));
83
84 while self.current_chunk_idx < chunk_starts.len() {
85 let ci = self.current_chunk_idx;
86 let start = chunk_starts[ci];
87 self.current_chunk_idx += 1;
88
89 let end = if ci + 1 < chunk_starts.len() {
90 chunk_starts[ci + 1]
91 } else {
92 sheet_rows
93 };
94 let len = end.saturating_sub(start);
95 if len == 0 {
96 continue;
97 }
98 let chunk_end_abs = start + len - 1;
99 let is = start.max(self.view.sr);
100 let ie = chunk_end_abs.min(row_end);
101 if is > ie {
102 continue;
103 }
104 let seg_len = ie - is + 1;
105 let rel_off = is - start;
106
107 let mut cols = Vec::with_capacity(self.view.cols);
108 for col_idx in self.view.sc..=self.view.ec {
109 if col_idx >= sheet.columns.len() {
110 let numbers = Some(arrow_array::new_null_array(&DataType::Float64, seg_len));
111 let booleans = Some(arrow_array::new_null_array(&DataType::Boolean, seg_len));
112 let text = Some(arrow_array::new_null_array(&DataType::Utf8, seg_len));
113 let errors = Some(arrow_array::new_null_array(&DataType::UInt8, seg_len));
114 let type_tag: arrow_array::ArrayRef =
115 Arc::new(arrow_array::UInt8Array::from(vec![
116 arrow_store::TypeTag::Empty
117 as u8;
118 seg_len
119 ]));
120 cols.push(ChunkCol {
121 numbers,
122 booleans,
123 text,
124 errors,
125 type_tag,
126 });
127 } else {
128 let col = &sheet.columns[col_idx];
129 let Some(ch) = col.chunk(ci) else {
130 let numbers =
131 Some(arrow_array::new_null_array(&DataType::Float64, seg_len));
132 let booleans =
133 Some(arrow_array::new_null_array(&DataType::Boolean, seg_len));
134 let text = Some(arrow_array::new_null_array(&DataType::Utf8, seg_len));
135 let errors = Some(arrow_array::new_null_array(&DataType::UInt8, seg_len));
136 let type_tag: arrow_array::ArrayRef =
137 Arc::new(arrow_array::UInt8Array::from(vec![
138 arrow_store::TypeTag::Empty
139 as u8;
140 seg_len
141 ]));
142 cols.push(ChunkCol {
143 numbers,
144 booleans,
145 text,
146 errors,
147 type_tag,
148 });
149 continue;
150 };
151
152 let numbers_base: arrow_array::ArrayRef = ch.numbers_or_null();
153 let booleans_base: arrow_array::ArrayRef = ch.booleans_or_null();
154 let text_base: arrow_array::ArrayRef = ch.text_or_null();
155 let errors_base: arrow_array::ArrayRef = ch.errors_or_null();
156
157 let numbers = Some(numbers_base.slice(rel_off, seg_len));
158 let booleans = Some(booleans_base.slice(rel_off, seg_len));
159 let text = Some(text_base.slice(rel_off, seg_len));
160 let errors = Some(errors_base.slice(rel_off, seg_len));
161 let type_tag: arrow_array::ArrayRef =
162 Arc::new(ch.type_tag.slice(rel_off, seg_len));
163 cols.push(ChunkCol {
164 numbers,
165 booleans,
166 text,
167 errors,
168 type_tag,
169 });
170 }
171 }
172 return Some(Ok(ChunkSlice {
173 row_start: is - self.view.sr,
174 row_len: seg_len,
175 cols,
176 }));
177 }
178 None
179 }
180}
181
182impl<'a> RangeView<'a> {
183 pub(crate) fn new(
184 backing: RangeBacking<'a>,
185 sr: usize,
186 sc: usize,
187 er: usize,
188 ec: usize,
189 rows: usize,
190 cols: usize,
191 ) -> Self {
192 Self {
193 backing,
194 sr,
195 sc,
196 er,
197 ec,
198 rows,
199 cols,
200 cancel_token: None,
201 }
202 }
203
204 #[must_use]
205 pub fn with_cancel_token(mut self, token: Option<Arc<AtomicBool>>) -> Self {
206 self.cancel_token = token;
207 self
208 }
209
210 #[inline]
211 pub fn sheet(&self) -> &arrow_store::ArrowSheet {
212 match &self.backing {
213 RangeBacking::Borrowed(s) => s,
214 RangeBacking::Owned(s) => s,
215 }
216 }
217
218 pub fn from_owned_rows(
219 rows: Vec<Vec<LiteralValue>>,
220 date_system: DateSystem,
221 ) -> RangeView<'static> {
222 let nrows = rows.len();
223 let ncols = rows.iter().map(|r| r.len()).max().unwrap_or(0);
224
225 let chunk_rows = 32 * 1024;
226 let mut ib = IngestBuilder::new("__tmp", ncols, chunk_rows, date_system);
227
228 for mut r in rows {
229 r.resize(ncols, LiteralValue::Empty);
230 ib.append_row(&r).expect("append_row for RangeView");
231 }
232
233 let sheet = Arc::new(ib.finish());
234
235 if nrows == 0 || ncols == 0 {
236 return RangeView {
237 backing: RangeBacking::Owned(sheet),
238 sr: 1,
239 sc: 1,
240 er: 0,
241 ec: 0,
242 rows: 0,
243 cols: 0,
244 cancel_token: None,
245 };
246 }
247
248 RangeView {
249 backing: RangeBacking::Owned(sheet),
250 sr: 0,
251 sc: 0,
252 er: nrows - 1,
253 ec: ncols - 1,
254 rows: nrows,
255 cols: ncols,
256 cancel_token: None,
257 }
258 }
259
260 pub fn dims(&self) -> (usize, usize) {
261 (self.rows, self.cols)
262 }
263
264 pub fn expand_to(&self, rows: usize, cols: usize) -> RangeView<'a> {
265 let er = self.sr + rows.saturating_sub(1);
266 let ec = self.sc + cols.saturating_sub(1);
267 RangeView {
268 backing: match &self.backing {
269 RangeBacking::Borrowed(s) => RangeBacking::Borrowed(s),
270 RangeBacking::Owned(s) => RangeBacking::Owned(s.clone()),
271 },
272 sr: self.sr,
273 sc: self.sc,
274 er,
275 ec,
276 rows,
277 cols,
278 cancel_token: self.cancel_token.clone(),
279 }
280 }
281
282 pub fn sub_view(&self, rs: usize, cs: usize, rows: usize, cols: usize) -> RangeView<'a> {
283 let abs_sr = self.sr + rs;
284 let abs_sc = self.sc + cs;
285 let er = abs_sr + rows.saturating_sub(1);
286 let ec = abs_sc + cols.saturating_sub(1);
287 RangeView {
288 backing: match &self.backing {
289 RangeBacking::Borrowed(s) => RangeBacking::Borrowed(s),
290 RangeBacking::Owned(s) => RangeBacking::Owned(s.clone()),
291 },
292 sr: abs_sr,
293 sc: abs_sc,
294 er,
295 ec,
296 rows,
297 cols,
298 cancel_token: self.cancel_token.clone(),
299 }
300 }
301
302 #[inline]
303 pub fn is_empty(&self) -> bool {
304 self.rows == 0 || self.cols == 0
305 }
306
307 pub fn start_row(&self) -> usize {
309 self.sr
310 }
311 pub fn end_row(&self) -> usize {
313 self.er
314 }
315 pub fn start_col(&self) -> usize {
317 self.sc
318 }
319 pub fn end_col(&self) -> usize {
321 self.ec
322 }
323 pub fn sheet_name(&self) -> &str {
325 &self.sheet().name
326 }
327
328 pub fn kind_probe(&self) -> RangeKind {
329 if self.is_empty() {
330 return RangeKind::Empty;
331 }
332
333 let mut has_num = false;
334 let mut has_text = false;
335
336 for r in 0..self.rows {
337 for c in 0..self.cols {
338 match self.get_cell(r, c) {
339 LiteralValue::Empty => {}
340 LiteralValue::Number(_) | LiteralValue::Int(_) => has_num = true,
341 LiteralValue::Text(_) => has_text = true,
342 _ => return RangeKind::Mixed,
343 }
344 if has_num && has_text {
345 return RangeKind::Mixed;
346 }
347 }
348 }
349
350 match (has_num, has_text) {
351 (false, false) => RangeKind::Empty,
352 (true, false) => RangeKind::NumericOnly,
353 (false, true) => RangeKind::TextOnly,
354 (true, true) => RangeKind::Mixed,
355 }
356 }
357
358 pub fn as_1x1(&self) -> Option<LiteralValue> {
359 if self.rows == 1 && self.cols == 1 {
360 Some(self.get_cell(0, 0))
361 } else {
362 None
363 }
364 }
365
366 pub fn get_cell(&self, row: usize, col: usize) -> LiteralValue {
369 if row >= self.rows || col >= self.cols {
370 return LiteralValue::Empty;
371 }
372 let abs_row = self.sr + row;
373 let abs_col = self.sc + col;
374 let sheet = self.sheet();
375 let sheet_rows = sheet.nrows as usize;
376 if abs_row >= sheet_rows {
377 return LiteralValue::Empty;
378 }
379 if abs_col >= sheet.columns.len() {
380 return LiteralValue::Empty;
381 }
382 let col_ref = &sheet.columns[abs_col];
383 let chunk_starts = &sheet.chunk_starts;
385 let ch_idx = match chunk_starts.binary_search(&abs_row) {
386 Ok(i) => i,
387 Err(0) => 0,
388 Err(i) => i - 1,
389 };
390 let Some(ch) = col_ref.chunk(ch_idx) else {
391 return LiteralValue::Empty;
392 };
393 let row_start = chunk_starts[ch_idx];
394 let in_off = abs_row - row_start;
395 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
397 if let Some(ov) = cascade.get_scalar(in_off) {
398 return ov.to_literal();
399 }
400 let tag_u8 = ch.type_tag.value(in_off);
402 match arrow_store::TypeTag::from_u8(tag_u8) {
403 arrow_store::TypeTag::Empty => LiteralValue::Empty,
404 arrow_store::TypeTag::Number => {
405 if let Some(arr) = &ch.numbers {
406 if arr.is_null(in_off) {
407 return LiteralValue::Empty;
408 }
409 LiteralValue::Number(arr.value(in_off))
410 } else {
411 LiteralValue::Empty
412 }
413 }
414 arrow_store::TypeTag::DateTime => {
415 if let Some(arr) = &ch.numbers {
416 if arr.is_null(in_off) {
417 return LiteralValue::Empty;
418 }
419 LiteralValue::from_serial_number(arr.value(in_off))
420 } else {
421 LiteralValue::Empty
422 }
423 }
424 arrow_store::TypeTag::Duration => {
425 if let Some(arr) = &ch.numbers {
426 if arr.is_null(in_off) {
427 return LiteralValue::Empty;
428 }
429 let serial = arr.value(in_off);
430 let nanos_f = serial * 86_400.0 * 1_000_000_000.0;
431 let nanos = nanos_f.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64;
432 LiteralValue::Duration(chrono::Duration::nanoseconds(nanos))
433 } else {
434 LiteralValue::Empty
435 }
436 }
437 arrow_store::TypeTag::Boolean => {
438 if let Some(arr) = &ch.booleans {
439 if arr.is_null(in_off) {
440 return LiteralValue::Empty;
441 }
442 LiteralValue::Boolean(arr.value(in_off))
443 } else {
444 LiteralValue::Empty
445 }
446 }
447 arrow_store::TypeTag::Text => {
448 if let Some(arr) = &ch.text {
449 if arr.is_null(in_off) {
450 return LiteralValue::Empty;
451 }
452 let sa = arr
453 .as_any()
454 .downcast_ref::<arrow_array::StringArray>()
455 .unwrap();
456 LiteralValue::Text(sa.value(in_off).to_string())
457 } else {
458 LiteralValue::Empty
459 }
460 }
461 arrow_store::TypeTag::Error => {
462 if let Some(arr) = &ch.errors {
463 if arr.is_null(in_off) {
464 return LiteralValue::Empty;
465 }
466 let kind = arrow_store::unmap_error_code(arr.value(in_off));
467 LiteralValue::Error(ExcelError::new(kind))
468 } else {
469 LiteralValue::Empty
470 }
471 }
472 arrow_store::TypeTag::Pending => LiteralValue::Pending,
473 }
474 }
475
476 pub fn iter_row_chunks(&self) -> RowChunkIterator<'_> {
478 RowChunkIterator {
479 view: self,
480 current_chunk_idx: 0,
481 }
482 }
483
484 pub fn for_each_cell(
486 &self,
487 f: &mut dyn FnMut(&LiteralValue) -> Result<(), ExcelError>,
488 ) -> Result<(), ExcelError> {
489 for res in self.iter_row_chunks() {
490 let cs = res?;
491 for r in 0..cs.row_len {
492 for c in 0..self.cols {
493 let tmp = self.get_cell(cs.row_start + r, c);
494 f(&tmp)?;
495 }
496 }
497 }
498 Ok(())
499 }
500
501 pub fn for_each_row(
503 &self,
504 f: &mut dyn FnMut(&[LiteralValue]) -> Result<(), ExcelError>,
505 ) -> Result<(), ExcelError> {
506 let mut buf: Vec<LiteralValue> = Vec::with_capacity(self.cols);
507 for r in 0..self.rows {
508 buf.clear();
509 for c in 0..self.cols {
510 buf.push(self.get_cell(r, c));
511 }
512 f(&buf[..])?;
513 }
514 Ok(())
515 }
516
517 pub fn for_each_col(
519 &self,
520 f: &mut dyn FnMut(&[LiteralValue]) -> Result<(), ExcelError>,
521 ) -> Result<(), ExcelError> {
522 let mut col_buf: Vec<LiteralValue> = Vec::with_capacity(self.rows);
523 for c in 0..self.cols {
524 col_buf.clear();
525 for r in 0..self.rows {
526 col_buf.push(self.get_cell(r, c));
527 }
528 f(&col_buf[..])?;
529 }
530 Ok(())
531 }
532
533 pub fn get_cell_numeric(&self, row: usize, col: usize, policy: CoercionPolicy) -> Option<f64> {
536 if row >= self.rows || col >= self.cols {
537 return None;
538 }
539
540 let val = self.get_cell(row, col);
541 pack_numeric(&val, policy).ok().flatten()
542 }
543
544 pub fn numbers_chunked(
546 &self,
547 policy: CoercionPolicy,
548 min_chunk: usize,
549 f: &mut dyn FnMut(NumericChunk) -> Result<(), ExcelError>,
550 ) -> Result<(), ExcelError> {
551 if matches!(policy, CoercionPolicy::NumberStrict) {
553 for res in self.numbers_slices() {
554 let (_, _, cols) = res?;
555 for col in cols {
556 if col.null_count() < col.len() {
557 let data = col.values();
558 let validity = if col.null_count() > 0 {
564 None } else {
568 None
569 };
570
571 if col.null_count() == 0 {
572 f(NumericChunk { data, validity })?;
573 } else {
574 let mut buf = Vec::with_capacity(col.len());
576 for i in 0..col.len() {
577 if !col.is_null(i) {
578 buf.push(col.value(i));
579 }
580 }
581 if !buf.is_empty() {
582 f(NumericChunk {
583 data: &buf,
584 validity: None,
585 })?;
586 }
587 }
588 }
589 }
590 }
591 return Ok(());
592 }
593
594 let min_chunk = min_chunk.max(1);
595 let mut buf: Vec<f64> = Vec::with_capacity(min_chunk);
596 let mut flush = |buf: &mut Vec<f64>| -> Result<(), ExcelError> {
597 if buf.is_empty() {
598 return Ok(());
599 }
600 let ptr = buf.as_ptr();
602 let len = buf.len();
603 let slice = unsafe { std::slice::from_raw_parts(ptr, len) };
604 let chunk = NumericChunk {
605 data: slice,
606 validity: None,
607 };
608 f(chunk)?;
609 buf.clear();
610 Ok(())
611 };
612
613 self.for_each_cell(&mut |v| {
614 if let Some(n) = pack_numeric(v, policy)? {
615 buf.push(n);
616 if buf.len() >= min_chunk {
617 flush(&mut buf)?;
618 }
619 }
620 Ok(())
621 })?;
622 flush(&mut buf)?;
623
624 Ok(())
625 }
626
627 pub fn numbers_slices(
629 &self,
630 ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::Float64Array>>), ExcelError>> + '_
631 {
632 self.iter_row_chunks().map(move |res| {
633 let cs = res?;
634 let mut out_cols: Vec<Arc<arrow_array::Float64Array>> =
635 Vec::with_capacity(cs.cols.len());
636 let sheet = self.sheet();
637 let chunk_starts = &sheet.chunk_starts;
638
639 for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
640 let base = cs.cols[local_c]
641 .numbers
642 .as_ref()
643 .expect("numbers lane exists")
644 .clone();
645 let base_fa = base
646 .as_any()
647 .downcast_ref::<arrow_array::Float64Array>()
648 .unwrap()
649 .clone();
650 let base_arc = Arc::new(base_fa);
651
652 let abs_seg_start = self.sr + cs.row_start;
654 let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
655 Ok(i) => i,
656 Err(0) => 0,
657 Err(i) => i - 1,
658 };
659 if col_idx >= sheet.columns.len() {
660 out_cols.push(base_arc);
661 continue;
662 }
663 let col = &sheet.columns[col_idx];
664 let Some(ch) = col.chunk(ch_idx) else {
665 out_cols.push(base_arc);
666 continue;
667 };
668 let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
669 let seg_range = rel_off..(rel_off + cs.row_len);
670 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
671 if cascade.has_any_in_range(seg_range.clone()) {
672 let base_fa = base
673 .as_any()
674 .downcast_ref::<arrow_array::Float64Array>()
675 .unwrap();
676 out_cols.push(cascade.select_numbers(seg_range, base_fa));
677 } else {
678 out_cols.push(base_arc);
679 }
680 }
681 Ok((cs.row_start, cs.row_len, out_cols))
682 })
683 }
684
685 pub fn booleans_slices(
687 &self,
688 ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::BooleanArray>>), ExcelError>> + '_
689 {
690 self.iter_row_chunks().map(move |res| {
691 let cs = res?;
692 let mut out_cols: Vec<Arc<arrow_array::BooleanArray>> =
693 Vec::with_capacity(cs.cols.len());
694 let sheet = self.sheet();
695 let chunk_starts = &sheet.chunk_starts;
696
697 for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
698 let base = cs.cols[local_c]
699 .booleans
700 .as_ref()
701 .expect("booleans lane exists")
702 .clone();
703 let base_ba = base
704 .as_any()
705 .downcast_ref::<arrow_array::BooleanArray>()
706 .unwrap()
707 .clone();
708 let base_arc = Arc::new(base_ba);
709
710 let abs_seg_start = self.sr + cs.row_start;
712 let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
713 Ok(i) => i,
714 Err(0) => 0,
715 Err(i) => i - 1,
716 };
717 if col_idx >= sheet.columns.len() {
718 out_cols.push(base_arc);
719 continue;
720 }
721 let col = &sheet.columns[col_idx];
722 let Some(ch) = col.chunk(ch_idx) else {
723 out_cols.push(base_arc);
724 continue;
725 };
726 let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
727 let seg_range = rel_off..(rel_off + cs.row_len);
728 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
729 if cascade.has_any_in_range(seg_range.clone()) {
730 let base_ba = base
731 .as_any()
732 .downcast_ref::<arrow_array::BooleanArray>()
733 .unwrap();
734 out_cols.push(cascade.select_booleans(seg_range, base_ba));
735 } else {
736 out_cols.push(base_arc);
737 }
738 }
739 Ok((cs.row_start, cs.row_len, out_cols))
740 })
741 }
742
743 pub fn text_slices(
745 &self,
746 ) -> impl Iterator<Item = Result<(usize, usize, Vec<arrow_array::ArrayRef>), ExcelError>> + '_
747 {
748 self.iter_row_chunks().map(move |res| {
749 let cs = res?;
750 let mut out_cols: Vec<arrow_array::ArrayRef> = Vec::with_capacity(cs.cols.len());
751 let sheet = self.sheet();
752 let chunk_starts = &sheet.chunk_starts;
753
754 for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
755 let base = cs.cols[local_c]
756 .text
757 .as_ref()
758 .expect("text lane exists")
759 .clone();
760 let abs_seg_start = self.sr + cs.row_start;
761 let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
762 Ok(i) => i,
763 Err(0) => 0,
764 Err(i) => i - 1,
765 };
766 if col_idx >= sheet.columns.len() {
767 out_cols.push(base.clone());
768 continue;
769 }
770 let col = &sheet.columns[col_idx];
771 let Some(ch) = col.chunk(ch_idx) else {
772 out_cols.push(base.clone());
773 continue;
774 };
775 let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
776 let seg_range = rel_off..(rel_off + cs.row_len);
777 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
778 if cascade.has_any_in_range(seg_range.clone()) {
779 let base_sa = base
780 .as_any()
781 .downcast_ref::<arrow_array::StringArray>()
782 .unwrap();
783 out_cols.push(cascade.select_text(seg_range, base_sa));
784 } else {
785 out_cols.push(base.clone());
786 }
787 }
788 Ok((cs.row_start, cs.row_len, out_cols))
789 })
790 }
791
792 pub fn lowered_text_slices(
794 &self,
795 ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::StringArray>>), ExcelError>> + '_
796 {
797 self.iter_row_chunks().map(move |res| {
798 let cs = res?;
799 let mut out_cols: Vec<Arc<arrow_array::StringArray>> =
800 Vec::with_capacity(cs.cols.len());
801 let sheet = self.sheet();
802 let chunk_starts = &sheet.chunk_starts;
803
804 for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
805 let abs_seg_start = self.sr + cs.row_start;
807 let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
808 Ok(i) => i,
809 Err(0) => 0,
810 Err(i) => i - 1,
811 };
812 if col_idx >= sheet.columns.len() {
813 out_cols.push(Arc::new(arrow_array::StringArray::new_null(cs.row_len)));
814 continue;
815 }
816 let col = &sheet.columns[col_idx];
817 let Some(ch) = col.chunk(ch_idx) else {
818 out_cols.push(Arc::new(arrow_array::StringArray::new_null(cs.row_len)));
819 continue;
820 };
821 let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
822 let seg_range = rel_off..(rel_off + cs.row_len);
823
824 let base_lowered = ch.text_lower_or_null();
825 let base_seg = base_lowered.slice(rel_off, cs.row_len);
826 let base_sa = base_seg
827 .as_any()
828 .downcast_ref::<arrow_array::StringArray>()
829 .expect("lowered slice downcast");
830
831 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
832 if cascade.has_any_in_range(seg_range.clone()) {
833 out_cols.push(cascade.select_lowered_text(seg_range, base_sa));
834 } else {
835 out_cols.push(Arc::new(base_sa.clone()));
836 }
837 }
838 Ok((cs.row_start, cs.row_len, out_cols))
839 })
840 }
841
842 pub fn errors_slices(
844 &self,
845 ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::UInt8Array>>), ExcelError>> + '_
846 {
847 self.iter_row_chunks().map(move |res| {
848 let cs = res?;
849 let mut out_cols: Vec<Arc<arrow_array::UInt8Array>> = Vec::with_capacity(cs.cols.len());
850 let sheet = self.sheet();
851 let chunk_starts = &sheet.chunk_starts;
852
853 for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
854 let base = cs.cols[local_c]
855 .errors
856 .as_ref()
857 .expect("errors lane exists")
858 .clone();
859 let base_e = base
860 .as_any()
861 .downcast_ref::<arrow_array::UInt8Array>()
862 .unwrap()
863 .clone();
864 let base_arc: Arc<arrow_array::UInt8Array> = Arc::new(base_e);
865 let abs_seg_start = self.sr + cs.row_start;
866 let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
867 Ok(i) => i,
868 Err(0) => 0,
869 Err(i) => i - 1,
870 };
871 if col_idx >= sheet.columns.len() {
872 out_cols.push(base_arc);
873 continue;
874 }
875 let col = &sheet.columns[col_idx];
876 let Some(ch) = col.chunk(ch_idx) else {
877 out_cols.push(base_arc);
878 continue;
879 };
880 let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
881 let seg_range = rel_off..(rel_off + cs.row_len);
882 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
883 if cascade.has_any_in_range(seg_range.clone()) {
884 let base_ea = base
885 .as_any()
886 .downcast_ref::<arrow_array::UInt8Array>()
887 .unwrap();
888 out_cols.push(cascade.select_errors(seg_range, base_ea));
889 } else {
890 out_cols.push(base_arc);
891 }
892 }
893 Ok((cs.row_start, cs.row_len, out_cols))
894 })
895 }
896
897 pub fn type_tags_slices(
899 &self,
900 ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::UInt8Array>>), ExcelError>> + '_
901 {
902 self.iter_row_chunks().map(move |res| {
903 let cs = res?;
904 let mut out_cols: Vec<Arc<arrow_array::UInt8Array>> = Vec::with_capacity(cs.cols.len());
905 let sheet = self.sheet();
906 let chunk_starts = &sheet.chunk_starts;
907
908 for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
909 let base = cs.cols[local_c].type_tag.clone();
910 let base_ta = base
911 .as_any()
912 .downcast_ref::<arrow_array::UInt8Array>()
913 .unwrap()
914 .clone();
915 let base_arc = Arc::new(base_ta);
916
917 let abs_seg_start = self.sr + cs.row_start;
918 let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
919 Ok(i) => i,
920 Err(0) => 0,
921 Err(i) => i - 1,
922 };
923 if col_idx >= sheet.columns.len() {
924 out_cols.push(base_arc);
925 continue;
926 }
927 let col = &sheet.columns[col_idx];
928 let Some(ch) = col.chunk(ch_idx) else {
929 out_cols.push(base_arc);
930 continue;
931 };
932 let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
933 let seg_range = rel_off..(rel_off + cs.row_len);
934 let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
935 if cascade.has_any_in_range(seg_range.clone()) {
936 let base_ta = base
937 .as_any()
938 .downcast_ref::<arrow_array::UInt8Array>()
939 .unwrap();
940 out_cols.push(cascade.select_type_tags(seg_range, base_ta));
941 } else {
942 out_cols.push(base_arc);
943 }
944 }
945 Ok((cs.row_start, cs.row_len, out_cols))
946 })
947 }
948
949 pub fn lowered_text_columns(&self) -> Vec<arrow_array::ArrayRef> {
952 use crate::compute_prelude::concat_arrays;
953
954 let mut out: Vec<arrow_array::ArrayRef> = Vec::with_capacity(self.cols);
955 if self.rows == 0 || self.cols == 0 {
956 return out;
957 }
958 let sheet = self.sheet();
959 let chunk_starts = &sheet.chunk_starts;
960 let sheet_rows = sheet.nrows as usize;
962 if sheet_rows == 0 || self.sr >= sheet_rows {
963 for _ in 0..self.cols {
964 out.push(arrow_array::new_null_array(&DataType::Utf8, 0));
965 }
966 return out;
967 }
968 let row_end = self.er.min(sheet_rows.saturating_sub(1));
969 let physical_len = row_end.saturating_sub(self.sr) + 1;
970 for col_idx in self.sc..=self.ec {
971 let mut segs: Vec<arrow_array::ArrayRef> = Vec::new();
972 if col_idx >= sheet.columns.len() {
973 segs.push(arrow_array::new_null_array(&DataType::Utf8, physical_len));
975 } else {
976 let col_ref = &sheet.columns[col_idx];
977 for (ci, &start) in chunk_starts.iter().enumerate() {
978 let chunk_end = chunk_starts
979 .get(ci + 1)
980 .copied()
981 .unwrap_or(sheet.nrows as usize);
982 let len = chunk_end.saturating_sub(start);
983 if len == 0 {
984 continue;
985 }
986 let end = start + len - 1;
987 let is = start.max(self.sr);
988 let ie = end.min(row_end);
989 if is > ie {
990 continue;
991 }
992 let seg_len = ie - is + 1;
993 let rel_off = is - start;
994 if let Some(ch) = col_ref.chunk(ci) {
995 let seg_range = rel_off..(rel_off + seg_len);
997 let cascade =
998 arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
999 if cascade.has_any_in_range(seg_range.clone()) {
1000 let base_lowered = ch.text_lower_or_null();
1001 let base_seg = base_lowered.slice(rel_off, seg_len);
1002 let base_sa = base_seg
1003 .as_any()
1004 .downcast_ref::<arrow_array::StringArray>()
1005 .expect("lowered slice downcast");
1006 segs.push(cascade.select_lowered_text(seg_range, base_sa));
1007 } else {
1008 let lowered = ch.text_lower_or_null();
1010 segs.push(lowered.slice(rel_off, seg_len));
1011 }
1012 } else {
1013 segs.push(arrow_array::new_null_array(&DataType::Utf8, seg_len));
1014 }
1015 }
1016 }
1017 if segs.is_empty() {
1019 segs.push(arrow_array::new_null_array(&DataType::Utf8, physical_len));
1020 }
1021 let anys: Vec<&dyn arrow_array::Array> = segs
1023 .iter()
1024 .map(|a| a.as_ref() as &dyn arrow_array::Array)
1025 .collect();
1026 let conc = concat_arrays(&anys).expect("concat lowered segments");
1027 out.push(conc);
1028 }
1029 out
1030 }
1031
1032 pub fn slice_numbers(
1034 &self,
1035 rel_start: usize,
1036 len: usize,
1037 ) -> Vec<Option<Arc<arrow_array::Float64Array>>> {
1038 let abs_start = self.sr + rel_start;
1039 let abs_end = abs_start + len;
1040 let sheet = self.sheet();
1041 let chunk_starts = &sheet.chunk_starts;
1042
1043 let mut out_cols = Vec::with_capacity(self.cols);
1044 for col_idx in self.sc..=self.ec {
1045 if col_idx >= sheet.columns.len() {
1046 out_cols.push(None);
1047 continue;
1048 }
1049 let col = &sheet.columns[col_idx];
1050
1051 let start_ch_idx = match chunk_starts.binary_search(&abs_start) {
1052 Ok(i) => i,
1053 Err(0) => 0,
1054 Err(i) => i - 1,
1055 };
1056
1057 let mut segments: Vec<Arc<arrow_array::Float64Array>> = Vec::new();
1058 let mut null_only = true;
1059
1060 let mut curr = abs_start;
1061 let mut remaining = len;
1062 let mut ch_idx = start_ch_idx;
1063
1064 while remaining > 0 && ch_idx < chunk_starts.len() {
1065 let ch_start = chunk_starts[ch_idx];
1066 let ch_end = chunk_starts
1067 .get(ch_idx + 1)
1068 .copied()
1069 .unwrap_or(sheet.nrows as usize);
1070 let ch_len = ch_end.saturating_sub(ch_start);
1071 if ch_len == 0 {
1072 ch_idx += 1;
1073 continue;
1074 }
1075
1076 let overlap_start = curr.max(ch_start);
1077 let overlap_end = ch_end.min(abs_end);
1078
1079 if overlap_start < overlap_end {
1080 let seg_len = overlap_end - overlap_start;
1081 let rel_off_in_chunk = overlap_start - ch_start;
1082
1083 if let Some(ch) = col.chunk(ch_idx) {
1084 let base_nums_arc = ch.numbers_or_null();
1085 let base_nums = base_nums_arc.as_ref();
1086
1087 let seg_range = rel_off_in_chunk..(rel_off_in_chunk + seg_len);
1088 let cascade =
1089 arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
1090
1091 let final_arr = if cascade.has_any_in_range(seg_range.clone()) {
1092 let base_slice = base_nums.slice(rel_off_in_chunk, seg_len);
1093 let base_fa = base_slice
1094 .as_any()
1095 .downcast_ref::<arrow_array::Float64Array>()
1096 .unwrap();
1097 cascade.select_numbers(seg_range, base_fa).as_ref().clone()
1098 } else {
1099 let sl = base_nums.slice(rel_off_in_chunk, seg_len);
1100 sl.as_any()
1101 .downcast_ref::<arrow_array::Float64Array>()
1102 .unwrap()
1103 .clone()
1104 };
1105
1106 if final_arr.null_count() < final_arr.len() {
1107 null_only = false;
1108 }
1109 segments.push(Arc::new(final_arr));
1110 } else {
1111 segments.push(Arc::new(arrow_array::Float64Array::new_null(seg_len)));
1112 }
1113 curr += seg_len;
1114 remaining -= seg_len;
1115 }
1116 ch_idx += 1;
1117 }
1118
1119 if remaining > 0 {
1120 segments.push(Arc::new(arrow_array::Float64Array::new_null(remaining)));
1121 }
1122
1123 if segments.len() == 1 {
1124 if null_only && segments[0].null_count() == segments[0].len() {
1125 out_cols.push(None);
1126 } else {
1127 out_cols.push(Some(segments.pop().unwrap()));
1128 }
1129 } else {
1130 let refs: Vec<&dyn Array> =
1131 segments.iter().map(|a| a.as_ref() as &dyn Array).collect();
1132 let c = crate::compute_prelude::concat_arrays(&refs).expect("concat slice");
1133 let fa = c
1134 .as_any()
1135 .downcast_ref::<arrow_array::Float64Array>()
1136 .unwrap()
1137 .clone();
1138 out_cols.push(Some(Arc::new(fa)));
1139 }
1140 }
1141 out_cols
1142 }
1143
1144 pub fn slice_lowered_text(
1146 &self,
1147 rel_start: usize,
1148 len: usize,
1149 ) -> Vec<Option<Arc<arrow_array::StringArray>>> {
1150 let abs_start = self.sr + rel_start;
1151 let abs_end = abs_start + len;
1152 let sheet = self.sheet();
1153 let chunk_starts = &sheet.chunk_starts;
1154
1155 let mut out_cols = Vec::with_capacity(self.cols);
1156 for col_idx in self.sc..=self.ec {
1157 if col_idx >= sheet.columns.len() {
1158 out_cols.push(None);
1159 continue;
1160 }
1161 let col = &sheet.columns[col_idx];
1162 let start_ch_idx = match chunk_starts.binary_search(&abs_start) {
1163 Ok(i) => i,
1164 Err(0) => 0,
1165 Err(i) => i - 1,
1166 };
1167
1168 let mut segments: Vec<Arc<arrow_array::StringArray>> = Vec::new();
1169 let mut null_only = true;
1170
1171 let mut curr = abs_start;
1172 let mut remaining = len;
1173 let mut ch_idx = start_ch_idx;
1174
1175 while remaining > 0 && ch_idx < chunk_starts.len() {
1176 let ch_start = chunk_starts[ch_idx];
1177 let ch_end = chunk_starts
1178 .get(ch_idx + 1)
1179 .copied()
1180 .unwrap_or(sheet.nrows as usize);
1181 let ch_len = ch_end.saturating_sub(ch_start);
1182 if ch_len == 0 {
1183 ch_idx += 1;
1184 continue;
1185 }
1186
1187 let overlap_start = curr.max(ch_start);
1188 let overlap_end = ch_end.min(abs_end);
1189
1190 if overlap_start < overlap_end {
1191 let seg_len = overlap_end - overlap_start;
1192 let rel_off_in_chunk = overlap_start - ch_start;
1193
1194 if let Some(ch) = col.chunk(ch_idx) {
1195 let base_lowered = ch.text_lower_or_null();
1196 let seg_range = rel_off_in_chunk..(rel_off_in_chunk + seg_len);
1197 let cascade =
1198 arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
1199
1200 let final_arr = if cascade.has_any_in_range(seg_range.clone()) {
1201 let base_slice = base_lowered.slice(rel_off_in_chunk, seg_len);
1202 let base_sa = base_slice
1203 .as_any()
1204 .downcast_ref::<arrow_array::StringArray>()
1205 .unwrap();
1206 cascade
1207 .select_lowered_text(seg_range, base_sa)
1208 .as_ref()
1209 .clone()
1210 } else {
1211 let sl = base_lowered.slice(rel_off_in_chunk, seg_len);
1212 sl.as_any()
1213 .downcast_ref::<arrow_array::StringArray>()
1214 .unwrap()
1215 .clone()
1216 };
1217
1218 if final_arr.null_count() < final_arr.len() {
1219 null_only = false;
1220 }
1221 segments.push(Arc::new(final_arr));
1222 } else {
1223 segments.push(Arc::new(arrow_array::StringArray::new_null(seg_len)));
1224 }
1225 curr += seg_len;
1226 remaining -= seg_len;
1227 }
1228 ch_idx += 1;
1229 }
1230
1231 if remaining > 0 {
1232 segments.push(Arc::new(arrow_array::StringArray::new_null(remaining)));
1233 }
1234
1235 if segments.len() == 1 {
1236 if null_only && segments[0].null_count() == segments[0].len() {
1237 out_cols.push(None);
1238 } else {
1239 out_cols.push(Some(segments.pop().unwrap()));
1240 }
1241 } else {
1242 let refs: Vec<&dyn Array> =
1243 segments.iter().map(|a| a.as_ref() as &dyn Array).collect();
1244 let c = crate::compute_prelude::concat_arrays(&refs).expect("concat text");
1245 let sa = c
1246 .as_any()
1247 .downcast_ref::<arrow_array::StringArray>()
1248 .unwrap()
1249 .clone();
1250 out_cols.push(Some(Arc::new(sa)));
1251 }
1252 }
1253 out_cols
1254 }
1255}
1256
1257#[inline]
1258fn pack_numeric(v: &LiteralValue, policy: CoercionPolicy) -> Result<Option<f64>, ExcelError> {
1259 match policy {
1260 CoercionPolicy::NumberLenientText => match v {
1261 LiteralValue::Error(e) => Err(e.clone()),
1262 LiteralValue::Empty => Ok(None),
1263 other => Ok(crate::coercion::to_number_lenient(other).ok()),
1264 },
1265 CoercionPolicy::NumberStrict => match v {
1266 LiteralValue::Error(e) => Err(e.clone()),
1267 LiteralValue::Empty => Ok(None),
1268 other => Ok(crate::coercion::to_number_strict(other).ok()),
1269 },
1270 _ => match v {
1271 LiteralValue::Error(e) => Err(e.clone()),
1272 _ => Ok(None),
1273 },
1274 }
1275}
1276
1277#[cfg(test)]
1278mod tests {
1279 use super::*;
1280
1281 #[test]
1282 fn owned_rows_numeric_chunking() {
1283 let data: Vec<Vec<LiteralValue>> = vec![
1284 vec![
1285 LiteralValue::Number(1.0),
1286 LiteralValue::Text("x".into()),
1287 LiteralValue::Number(3.0),
1288 ],
1289 vec![
1290 LiteralValue::Boolean(true),
1291 LiteralValue::Empty,
1292 LiteralValue::Number(2.5),
1293 ],
1294 ];
1295 let view = RangeView::from_owned_rows(data, DateSystem::Excel1900);
1296 let mut sum = 0.0f64;
1297 view.numbers_chunked(CoercionPolicy::NumberLenientText, 2, &mut |chunk| {
1298 for &n in chunk.data {
1299 sum += n;
1300 }
1301 Ok(())
1302 })
1303 .unwrap();
1304 assert!((sum - 7.5).abs() < 1e-9);
1305 }
1306
1307 #[test]
1308 fn as_1x1_works() {
1309 let view = RangeView::from_owned_rows(
1310 vec![vec![LiteralValue::Number(7.0)]],
1311 DateSystem::Excel1900,
1312 );
1313 assert_eq!(view.as_1x1(), Some(LiteralValue::Number(7.0)));
1314 }
1315}