Skip to main content

formualizer_eval/engine/
range_view.rs

1use crate::arrow_store;
2use crate::arrow_store::IngestBuilder;
3use crate::stripes::NumericChunk;
4use arrow_array::Array;
5use arrow_schema::DataType;
6use formualizer_common::{CoercionPolicy, DateSystem, ExcelError, LiteralValue};
7use std::sync::Arc;
8use std::sync::atomic::{AtomicBool, Ordering};
9
10#[derive(Clone)]
11pub enum RangeBacking<'a> {
12    Borrowed(&'a arrow_store::ArrowSheet),
13    Owned(Arc<arrow_store::ArrowSheet>),
14}
15
16/// Unified view over a 2D range with efficient traversal utilities.
17/// Phase 4: Arrow-only backing.
18#[derive(Clone)]
19pub struct RangeView<'a> {
20    backing: RangeBacking<'a>,
21    sr: usize,
22    sc: usize,
23    er: usize,
24    ec: usize,
25    rows: usize,
26    cols: usize,
27    cancel_token: Option<Arc<AtomicBool>>,
28}
29
30impl<'a> core::fmt::Debug for RangeView<'a> {
31    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
32        f.debug_struct("RangeView")
33            .field("rows", &self.rows)
34            .field("cols", &self.cols)
35            .field("kind", &self.kind_probe())
36            .finish()
37    }
38}
39
40#[derive(Copy, Clone, Debug, Eq, PartialEq)]
41pub enum RangeKind {
42    Empty,
43    NumericOnly,
44    TextOnly,
45    Mixed,
46}
47
48pub struct ChunkCol {
49    pub numbers: Option<arrow_array::ArrayRef>,
50    pub booleans: Option<arrow_array::ArrayRef>,
51    pub text: Option<arrow_array::ArrayRef>,
52    pub errors: Option<arrow_array::ArrayRef>,
53    pub type_tag: arrow_array::ArrayRef,
54}
55
56pub struct ChunkSlice {
57    pub row_start: usize, // relative to view top
58    pub row_len: usize,
59    pub cols: Vec<ChunkCol>,
60}
61
62pub struct RowChunkIterator<'a> {
63    view: &'a RangeView<'a>,
64    current_chunk_idx: usize,
65}
66
67impl<'a> Iterator for RowChunkIterator<'a> {
68    type Item = Result<ChunkSlice, ExcelError>;
69
70    fn next(&mut self) -> Option<Self::Item> {
71        if let Some(token) = &self.view.cancel_token
72            && token.load(Ordering::Relaxed)
73        {
74            return Some(Err(ExcelError::new(
75                formualizer_common::ExcelErrorKind::Cancelled,
76            )));
77        }
78
79        let sheet = self.view.sheet();
80        let chunk_starts = &sheet.chunk_starts;
81        let sheet_rows = sheet.nrows as usize;
82        let row_end = self.view.er.min(sheet_rows.saturating_sub(1));
83
84        while self.current_chunk_idx < chunk_starts.len() {
85            let ci = self.current_chunk_idx;
86            let start = chunk_starts[ci];
87            self.current_chunk_idx += 1;
88
89            let end = if ci + 1 < chunk_starts.len() {
90                chunk_starts[ci + 1]
91            } else {
92                sheet_rows
93            };
94            let len = end.saturating_sub(start);
95            if len == 0 {
96                continue;
97            }
98            let chunk_end_abs = start + len - 1;
99            let is = start.max(self.view.sr);
100            let ie = chunk_end_abs.min(row_end);
101            if is > ie {
102                continue;
103            }
104            let seg_len = ie - is + 1;
105            let rel_off = is - start;
106
107            let mut cols = Vec::with_capacity(self.view.cols);
108            for col_idx in self.view.sc..=self.view.ec {
109                if col_idx >= sheet.columns.len() {
110                    let numbers = Some(arrow_array::new_null_array(&DataType::Float64, seg_len));
111                    let booleans = Some(arrow_array::new_null_array(&DataType::Boolean, seg_len));
112                    let text = Some(arrow_array::new_null_array(&DataType::Utf8, seg_len));
113                    let errors = Some(arrow_array::new_null_array(&DataType::UInt8, seg_len));
114                    let type_tag: arrow_array::ArrayRef =
115                        Arc::new(arrow_array::UInt8Array::from(vec![
116                            arrow_store::TypeTag::Empty
117                                as u8;
118                            seg_len
119                        ]));
120                    cols.push(ChunkCol {
121                        numbers,
122                        booleans,
123                        text,
124                        errors,
125                        type_tag,
126                    });
127                } else {
128                    let col = &sheet.columns[col_idx];
129                    let Some(ch) = col.chunk(ci) else {
130                        let numbers =
131                            Some(arrow_array::new_null_array(&DataType::Float64, seg_len));
132                        let booleans =
133                            Some(arrow_array::new_null_array(&DataType::Boolean, seg_len));
134                        let text = Some(arrow_array::new_null_array(&DataType::Utf8, seg_len));
135                        let errors = Some(arrow_array::new_null_array(&DataType::UInt8, seg_len));
136                        let type_tag: arrow_array::ArrayRef =
137                            Arc::new(arrow_array::UInt8Array::from(vec![
138                                arrow_store::TypeTag::Empty
139                                    as u8;
140                                seg_len
141                            ]));
142                        cols.push(ChunkCol {
143                            numbers,
144                            booleans,
145                            text,
146                            errors,
147                            type_tag,
148                        });
149                        continue;
150                    };
151
152                    let numbers_base: arrow_array::ArrayRef = ch.numbers_or_null();
153                    let booleans_base: arrow_array::ArrayRef = ch.booleans_or_null();
154                    let text_base: arrow_array::ArrayRef = ch.text_or_null();
155                    let errors_base: arrow_array::ArrayRef = ch.errors_or_null();
156
157                    let numbers = Some(numbers_base.slice(rel_off, seg_len));
158                    let booleans = Some(booleans_base.slice(rel_off, seg_len));
159                    let text = Some(text_base.slice(rel_off, seg_len));
160                    let errors = Some(errors_base.slice(rel_off, seg_len));
161                    let type_tag: arrow_array::ArrayRef =
162                        Arc::new(ch.type_tag.slice(rel_off, seg_len));
163                    cols.push(ChunkCol {
164                        numbers,
165                        booleans,
166                        text,
167                        errors,
168                        type_tag,
169                    });
170                }
171            }
172            return Some(Ok(ChunkSlice {
173                row_start: is - self.view.sr,
174                row_len: seg_len,
175                cols,
176            }));
177        }
178        None
179    }
180}
181
182impl<'a> RangeView<'a> {
183    pub(crate) fn new(
184        backing: RangeBacking<'a>,
185        sr: usize,
186        sc: usize,
187        er: usize,
188        ec: usize,
189        rows: usize,
190        cols: usize,
191    ) -> Self {
192        Self {
193            backing,
194            sr,
195            sc,
196            er,
197            ec,
198            rows,
199            cols,
200            cancel_token: None,
201        }
202    }
203
204    #[must_use]
205    pub fn with_cancel_token(mut self, token: Option<Arc<AtomicBool>>) -> Self {
206        self.cancel_token = token;
207        self
208    }
209
210    #[inline]
211    pub fn sheet(&self) -> &arrow_store::ArrowSheet {
212        match &self.backing {
213            RangeBacking::Borrowed(s) => s,
214            RangeBacking::Owned(s) => s,
215        }
216    }
217
218    pub fn from_owned_rows(
219        rows: Vec<Vec<LiteralValue>>,
220        date_system: DateSystem,
221    ) -> RangeView<'static> {
222        let nrows = rows.len();
223        let ncols = rows.iter().map(|r| r.len()).max().unwrap_or(0);
224
225        let chunk_rows = 32 * 1024;
226        let mut ib = IngestBuilder::new("__tmp", ncols, chunk_rows, date_system);
227
228        for mut r in rows {
229            r.resize(ncols, LiteralValue::Empty);
230            ib.append_row(&r).expect("append_row for RangeView");
231        }
232
233        let sheet = Arc::new(ib.finish());
234
235        if nrows == 0 || ncols == 0 {
236            return RangeView {
237                backing: RangeBacking::Owned(sheet),
238                sr: 1,
239                sc: 1,
240                er: 0,
241                ec: 0,
242                rows: 0,
243                cols: 0,
244                cancel_token: None,
245            };
246        }
247
248        RangeView {
249            backing: RangeBacking::Owned(sheet),
250            sr: 0,
251            sc: 0,
252            er: nrows - 1,
253            ec: ncols - 1,
254            rows: nrows,
255            cols: ncols,
256            cancel_token: None,
257        }
258    }
259
260    pub fn dims(&self) -> (usize, usize) {
261        (self.rows, self.cols)
262    }
263
264    pub fn expand_to(&self, rows: usize, cols: usize) -> RangeView<'a> {
265        let er = self.sr + rows.saturating_sub(1);
266        let ec = self.sc + cols.saturating_sub(1);
267        RangeView {
268            backing: match &self.backing {
269                RangeBacking::Borrowed(s) => RangeBacking::Borrowed(s),
270                RangeBacking::Owned(s) => RangeBacking::Owned(s.clone()),
271            },
272            sr: self.sr,
273            sc: self.sc,
274            er,
275            ec,
276            rows,
277            cols,
278            cancel_token: self.cancel_token.clone(),
279        }
280    }
281
282    pub fn sub_view(&self, rs: usize, cs: usize, rows: usize, cols: usize) -> RangeView<'a> {
283        let abs_sr = self.sr + rs;
284        let abs_sc = self.sc + cs;
285        let er = abs_sr + rows.saturating_sub(1);
286        let ec = abs_sc + cols.saturating_sub(1);
287        RangeView {
288            backing: match &self.backing {
289                RangeBacking::Borrowed(s) => RangeBacking::Borrowed(s),
290                RangeBacking::Owned(s) => RangeBacking::Owned(s.clone()),
291            },
292            sr: abs_sr,
293            sc: abs_sc,
294            er,
295            ec,
296            rows,
297            cols,
298            cancel_token: self.cancel_token.clone(),
299        }
300    }
301
302    #[inline]
303    pub fn is_empty(&self) -> bool {
304        self.rows == 0 || self.cols == 0
305    }
306
307    /// Absolute 0-based start row of this view.
308    pub fn start_row(&self) -> usize {
309        self.sr
310    }
311    /// Absolute 0-based end row of this view (inclusive).
312    pub fn end_row(&self) -> usize {
313        self.er
314    }
315    /// Absolute 0-based start column of this view.
316    pub fn start_col(&self) -> usize {
317        self.sc
318    }
319    /// Absolute 0-based end column of this view (inclusive).
320    pub fn end_col(&self) -> usize {
321        self.ec
322    }
323    /// Owning sheet name.
324    pub fn sheet_name(&self) -> &str {
325        &self.sheet().name
326    }
327
328    pub fn kind_probe(&self) -> RangeKind {
329        if self.is_empty() {
330            return RangeKind::Empty;
331        }
332
333        let mut has_num = false;
334        let mut has_text = false;
335
336        for r in 0..self.rows {
337            for c in 0..self.cols {
338                match self.get_cell(r, c) {
339                    LiteralValue::Empty => {}
340                    LiteralValue::Number(_) | LiteralValue::Int(_) => has_num = true,
341                    LiteralValue::Text(_) => has_text = true,
342                    _ => return RangeKind::Mixed,
343                }
344                if has_num && has_text {
345                    return RangeKind::Mixed;
346                }
347            }
348        }
349
350        match (has_num, has_text) {
351            (false, false) => RangeKind::Empty,
352            (true, false) => RangeKind::NumericOnly,
353            (false, true) => RangeKind::TextOnly,
354            (true, true) => RangeKind::Mixed,
355        }
356    }
357
358    pub fn as_1x1(&self) -> Option<LiteralValue> {
359        if self.rows == 1 && self.cols == 1 {
360            Some(self.get_cell(0, 0))
361        } else {
362            None
363        }
364    }
365
366    /// Get a specific cell by row and column index (0-based).
367    /// Returns Empty for out-of-bounds access.
368    pub fn get_cell(&self, row: usize, col: usize) -> LiteralValue {
369        if row >= self.rows || col >= self.cols {
370            return LiteralValue::Empty;
371        }
372        let abs_row = self.sr + row;
373        let abs_col = self.sc + col;
374        let sheet = self.sheet();
375        let sheet_rows = sheet.nrows as usize;
376        if abs_row >= sheet_rows {
377            return LiteralValue::Empty;
378        }
379        if abs_col >= sheet.columns.len() {
380            return LiteralValue::Empty;
381        }
382        let col_ref = &sheet.columns[abs_col];
383        // Locate chunk by binary searching start offsets
384        let chunk_starts = &sheet.chunk_starts;
385        let ch_idx = match chunk_starts.binary_search(&abs_row) {
386            Ok(i) => i,
387            Err(0) => 0,
388            Err(i) => i - 1,
389        };
390        let Some(ch) = col_ref.chunk(ch_idx) else {
391            return LiteralValue::Empty;
392        };
393        let row_start = chunk_starts[ch_idx];
394        let in_off = abs_row - row_start;
395        // Overlay takes precedence: user edits over computed over base.
396        let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
397        if let Some(ov) = cascade.get_scalar(in_off) {
398            return ov.to_literal();
399        }
400        // Read tag and route to lane
401        let tag_u8 = ch.type_tag.value(in_off);
402        match arrow_store::TypeTag::from_u8(tag_u8) {
403            arrow_store::TypeTag::Empty => LiteralValue::Empty,
404            arrow_store::TypeTag::Number => {
405                if let Some(arr) = &ch.numbers {
406                    if arr.is_null(in_off) {
407                        return LiteralValue::Empty;
408                    }
409                    LiteralValue::Number(arr.value(in_off))
410                } else {
411                    LiteralValue::Empty
412                }
413            }
414            arrow_store::TypeTag::DateTime => {
415                if let Some(arr) = &ch.numbers {
416                    if arr.is_null(in_off) {
417                        return LiteralValue::Empty;
418                    }
419                    LiteralValue::from_serial_number(arr.value(in_off))
420                } else {
421                    LiteralValue::Empty
422                }
423            }
424            arrow_store::TypeTag::Duration => {
425                if let Some(arr) = &ch.numbers {
426                    if arr.is_null(in_off) {
427                        return LiteralValue::Empty;
428                    }
429                    let serial = arr.value(in_off);
430                    let nanos_f = serial * 86_400.0 * 1_000_000_000.0;
431                    let nanos = nanos_f.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64;
432                    LiteralValue::Duration(chrono::Duration::nanoseconds(nanos))
433                } else {
434                    LiteralValue::Empty
435                }
436            }
437            arrow_store::TypeTag::Boolean => {
438                if let Some(arr) = &ch.booleans {
439                    if arr.is_null(in_off) {
440                        return LiteralValue::Empty;
441                    }
442                    LiteralValue::Boolean(arr.value(in_off))
443                } else {
444                    LiteralValue::Empty
445                }
446            }
447            arrow_store::TypeTag::Text => {
448                if let Some(arr) = &ch.text {
449                    if arr.is_null(in_off) {
450                        return LiteralValue::Empty;
451                    }
452                    let sa = arr
453                        .as_any()
454                        .downcast_ref::<arrow_array::StringArray>()
455                        .unwrap();
456                    LiteralValue::Text(sa.value(in_off).to_string())
457                } else {
458                    LiteralValue::Empty
459                }
460            }
461            arrow_store::TypeTag::Error => {
462                if let Some(arr) = &ch.errors {
463                    if arr.is_null(in_off) {
464                        return LiteralValue::Empty;
465                    }
466                    let kind = arrow_store::unmap_error_code(arr.value(in_off));
467                    LiteralValue::Error(ExcelError::new(kind))
468                } else {
469                    LiteralValue::Empty
470                }
471            }
472            arrow_store::TypeTag::Pending => LiteralValue::Pending,
473        }
474    }
475
476    /// Iterate overlapping chunks by row segment.
477    pub fn iter_row_chunks(&self) -> RowChunkIterator<'_> {
478        RowChunkIterator {
479            view: self,
480            current_chunk_idx: 0,
481        }
482    }
483
484    /// Row-major cell traversal.
485    pub fn for_each_cell(
486        &self,
487        f: &mut dyn FnMut(&LiteralValue) -> Result<(), ExcelError>,
488    ) -> Result<(), ExcelError> {
489        for res in self.iter_row_chunks() {
490            let cs = res?;
491            for r in 0..cs.row_len {
492                for c in 0..self.cols {
493                    let tmp = self.get_cell(cs.row_start + r, c);
494                    f(&tmp)?;
495                }
496            }
497        }
498        Ok(())
499    }
500
501    /// Visit each row as a borrowed slice (buffered).
502    pub fn for_each_row(
503        &self,
504        f: &mut dyn FnMut(&[LiteralValue]) -> Result<(), ExcelError>,
505    ) -> Result<(), ExcelError> {
506        let mut buf: Vec<LiteralValue> = Vec::with_capacity(self.cols);
507        for r in 0..self.rows {
508            buf.clear();
509            for c in 0..self.cols {
510                buf.push(self.get_cell(r, c));
511            }
512            f(&buf[..])?;
513        }
514        Ok(())
515    }
516
517    /// Visit each column as a contiguous slice (buffered).
518    pub fn for_each_col(
519        &self,
520        f: &mut dyn FnMut(&[LiteralValue]) -> Result<(), ExcelError>,
521    ) -> Result<(), ExcelError> {
522        let mut col_buf: Vec<LiteralValue> = Vec::with_capacity(self.rows);
523        for c in 0..self.cols {
524            col_buf.clear();
525            for r in 0..self.rows {
526                col_buf.push(self.get_cell(r, c));
527            }
528            f(&col_buf[..])?;
529        }
530        Ok(())
531    }
532
533    /// Get a numeric value at a specific cell, with coercion.
534    /// Returns None for empty cells or non-coercible values.
535    pub fn get_cell_numeric(&self, row: usize, col: usize, policy: CoercionPolicy) -> Option<f64> {
536        if row >= self.rows || col >= self.cols {
537            return None;
538        }
539
540        let val = self.get_cell(row, col);
541        pack_numeric(&val, policy).ok().flatten()
542    }
543
544    /// Numeric chunk iteration with coercion policy.
545    pub fn numbers_chunked(
546        &self,
547        policy: CoercionPolicy,
548        min_chunk: usize,
549        f: &mut dyn FnMut(NumericChunk) -> Result<(), ExcelError>,
550    ) -> Result<(), ExcelError> {
551        // Fast path for Arrow numbers lane when policy allows ignoring non-numeric cells in ranges (standard Excel behavior for SUM/AVERAGE/etc over ranges)
552        if matches!(policy, CoercionPolicy::NumberStrict) {
553            for res in self.numbers_slices() {
554                let (_, _, cols) = res?;
555                for col in cols {
556                    if col.null_count() < col.len() {
557                        let data = col.values();
558                        // If there are nulls, we need to handle them.
559                        // Currently NumericChunk doesn't have a perfect way to represent sparse Arrow slices
560                        // without copying if we want a contiguous f64 slice.
561                        // For now, we can just provide the raw data and the validity mask if it exists.
562
563                        let validity = if col.null_count() > 0 {
564                            // Extract validity mask.
565                            // Note: This is still slightly awkward with the current NumericChunk design.
566                            None // TODO: Implement validity mask propagation
567                        } else {
568                            None
569                        };
570
571                        if col.null_count() == 0 {
572                            f(NumericChunk { data, validity })?;
573                        } else {
574                            // Fallback for nulls: iterate and push to a small buffer
575                            let mut buf = Vec::with_capacity(col.len());
576                            for i in 0..col.len() {
577                                if !col.is_null(i) {
578                                    buf.push(col.value(i));
579                                }
580                            }
581                            if !buf.is_empty() {
582                                f(NumericChunk {
583                                    data: &buf,
584                                    validity: None,
585                                })?;
586                            }
587                        }
588                    }
589                }
590            }
591            return Ok(());
592        }
593
594        let min_chunk = min_chunk.max(1);
595        let mut buf: Vec<f64> = Vec::with_capacity(min_chunk);
596        let mut flush = |buf: &mut Vec<f64>| -> Result<(), ExcelError> {
597            if buf.is_empty() {
598                return Ok(());
599            }
600            // SAFETY: read-only borrow for callback duration
601            let ptr = buf.as_ptr();
602            let len = buf.len();
603            let slice = unsafe { std::slice::from_raw_parts(ptr, len) };
604            let chunk = NumericChunk {
605                data: slice,
606                validity: None,
607            };
608            f(chunk)?;
609            buf.clear();
610            Ok(())
611        };
612
613        self.for_each_cell(&mut |v| {
614            if let Some(n) = pack_numeric(v, policy)? {
615                buf.push(n);
616                if buf.len() >= min_chunk {
617                    flush(&mut buf)?;
618                }
619            }
620            Ok(())
621        })?;
622        flush(&mut buf)?;
623
624        Ok(())
625    }
626
627    /// Typed numeric slices per row-segment: (row_start, row_len, per-column Float64 arrays)
628    pub fn numbers_slices(
629        &self,
630    ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::Float64Array>>), ExcelError>> + '_
631    {
632        self.iter_row_chunks().map(move |res| {
633            let cs = res?;
634            let mut out_cols: Vec<Arc<arrow_array::Float64Array>> =
635                Vec::with_capacity(cs.cols.len());
636            let sheet = self.sheet();
637            let chunk_starts = &sheet.chunk_starts;
638
639            for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
640                let base = cs.cols[local_c]
641                    .numbers
642                    .as_ref()
643                    .expect("numbers lane exists")
644                    .clone();
645                let base_fa = base
646                    .as_any()
647                    .downcast_ref::<arrow_array::Float64Array>()
648                    .unwrap()
649                    .clone();
650                let base_arc = Arc::new(base_fa);
651
652                // Identify chunk and overlay segment
653                let abs_seg_start = self.sr + cs.row_start;
654                let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
655                    Ok(i) => i,
656                    Err(0) => 0,
657                    Err(i) => i - 1,
658                };
659                if col_idx >= sheet.columns.len() {
660                    out_cols.push(base_arc);
661                    continue;
662                }
663                let col = &sheet.columns[col_idx];
664                let Some(ch) = col.chunk(ch_idx) else {
665                    out_cols.push(base_arc);
666                    continue;
667                };
668                let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
669                let seg_range = rel_off..(rel_off + cs.row_len);
670                let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
671                if cascade.has_any_in_range(seg_range.clone()) {
672                    let base_fa = base
673                        .as_any()
674                        .downcast_ref::<arrow_array::Float64Array>()
675                        .unwrap();
676                    out_cols.push(cascade.select_numbers(seg_range, base_fa));
677                } else {
678                    out_cols.push(base_arc);
679                }
680            }
681            Ok((cs.row_start, cs.row_len, out_cols))
682        })
683    }
684
685    /// Typed boolean slices per row-segment, overlay-aware via zip.
686    pub fn booleans_slices(
687        &self,
688    ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::BooleanArray>>), ExcelError>> + '_
689    {
690        self.iter_row_chunks().map(move |res| {
691            let cs = res?;
692            let mut out_cols: Vec<Arc<arrow_array::BooleanArray>> =
693                Vec::with_capacity(cs.cols.len());
694            let sheet = self.sheet();
695            let chunk_starts = &sheet.chunk_starts;
696
697            for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
698                let base = cs.cols[local_c]
699                    .booleans
700                    .as_ref()
701                    .expect("booleans lane exists")
702                    .clone();
703                let base_ba = base
704                    .as_any()
705                    .downcast_ref::<arrow_array::BooleanArray>()
706                    .unwrap()
707                    .clone();
708                let base_arc = Arc::new(base_ba);
709
710                // Identify chunk and overlay segment
711                let abs_seg_start = self.sr + cs.row_start;
712                let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
713                    Ok(i) => i,
714                    Err(0) => 0,
715                    Err(i) => i - 1,
716                };
717                if col_idx >= sheet.columns.len() {
718                    out_cols.push(base_arc);
719                    continue;
720                }
721                let col = &sheet.columns[col_idx];
722                let Some(ch) = col.chunk(ch_idx) else {
723                    out_cols.push(base_arc);
724                    continue;
725                };
726                let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
727                let seg_range = rel_off..(rel_off + cs.row_len);
728                let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
729                if cascade.has_any_in_range(seg_range.clone()) {
730                    let base_ba = base
731                        .as_any()
732                        .downcast_ref::<arrow_array::BooleanArray>()
733                        .unwrap();
734                    out_cols.push(cascade.select_booleans(seg_range, base_ba));
735                } else {
736                    out_cols.push(base_arc);
737                }
738            }
739            Ok((cs.row_start, cs.row_len, out_cols))
740        })
741    }
742
743    /// Text slices per row-segment (erased as ArrayRef for Utf8 today; future Dict/View support).
744    pub fn text_slices(
745        &self,
746    ) -> impl Iterator<Item = Result<(usize, usize, Vec<arrow_array::ArrayRef>), ExcelError>> + '_
747    {
748        self.iter_row_chunks().map(move |res| {
749            let cs = res?;
750            let mut out_cols: Vec<arrow_array::ArrayRef> = Vec::with_capacity(cs.cols.len());
751            let sheet = self.sheet();
752            let chunk_starts = &sheet.chunk_starts;
753
754            for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
755                let base = cs.cols[local_c]
756                    .text
757                    .as_ref()
758                    .expect("text lane exists")
759                    .clone();
760                let abs_seg_start = self.sr + cs.row_start;
761                let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
762                    Ok(i) => i,
763                    Err(0) => 0,
764                    Err(i) => i - 1,
765                };
766                if col_idx >= sheet.columns.len() {
767                    out_cols.push(base.clone());
768                    continue;
769                }
770                let col = &sheet.columns[col_idx];
771                let Some(ch) = col.chunk(ch_idx) else {
772                    out_cols.push(base.clone());
773                    continue;
774                };
775                let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
776                let seg_range = rel_off..(rel_off + cs.row_len);
777                let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
778                if cascade.has_any_in_range(seg_range.clone()) {
779                    let base_sa = base
780                        .as_any()
781                        .downcast_ref::<arrow_array::StringArray>()
782                        .unwrap();
783                    out_cols.push(cascade.select_text(seg_range, base_sa));
784                } else {
785                    out_cols.push(base.clone());
786                }
787            }
788            Ok((cs.row_start, cs.row_len, out_cols))
789        })
790    }
791
792    /// Typed lowered text slices per row-segment, overlay-aware via zip.
793    pub fn lowered_text_slices(
794        &self,
795    ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::StringArray>>), ExcelError>> + '_
796    {
797        self.iter_row_chunks().map(move |res| {
798            let cs = res?;
799            let mut out_cols: Vec<Arc<arrow_array::StringArray>> =
800                Vec::with_capacity(cs.cols.len());
801            let sheet = self.sheet();
802            let chunk_starts = &sheet.chunk_starts;
803
804            for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
805                // Identify chunk
806                let abs_seg_start = self.sr + cs.row_start;
807                let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
808                    Ok(i) => i,
809                    Err(0) => 0,
810                    Err(i) => i - 1,
811                };
812                if col_idx >= sheet.columns.len() {
813                    out_cols.push(Arc::new(arrow_array::StringArray::new_null(cs.row_len)));
814                    continue;
815                }
816                let col = &sheet.columns[col_idx];
817                let Some(ch) = col.chunk(ch_idx) else {
818                    out_cols.push(Arc::new(arrow_array::StringArray::new_null(cs.row_len)));
819                    continue;
820                };
821                let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
822                let seg_range = rel_off..(rel_off + cs.row_len);
823
824                let base_lowered = ch.text_lower_or_null();
825                let base_seg = base_lowered.slice(rel_off, cs.row_len);
826                let base_sa = base_seg
827                    .as_any()
828                    .downcast_ref::<arrow_array::StringArray>()
829                    .expect("lowered slice downcast");
830
831                let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
832                if cascade.has_any_in_range(seg_range.clone()) {
833                    out_cols.push(cascade.select_lowered_text(seg_range, base_sa));
834                } else {
835                    out_cols.push(Arc::new(base_sa.clone()));
836                }
837            }
838            Ok((cs.row_start, cs.row_len, out_cols))
839        })
840    }
841
842    /// Typed error-code slices per row-segment.
843    pub fn errors_slices(
844        &self,
845    ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::UInt8Array>>), ExcelError>> + '_
846    {
847        self.iter_row_chunks().map(move |res| {
848            let cs = res?;
849            let mut out_cols: Vec<Arc<arrow_array::UInt8Array>> = Vec::with_capacity(cs.cols.len());
850            let sheet = self.sheet();
851            let chunk_starts = &sheet.chunk_starts;
852
853            for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
854                let base = cs.cols[local_c]
855                    .errors
856                    .as_ref()
857                    .expect("errors lane exists")
858                    .clone();
859                let base_e = base
860                    .as_any()
861                    .downcast_ref::<arrow_array::UInt8Array>()
862                    .unwrap()
863                    .clone();
864                let base_arc: Arc<arrow_array::UInt8Array> = Arc::new(base_e);
865                let abs_seg_start = self.sr + cs.row_start;
866                let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
867                    Ok(i) => i,
868                    Err(0) => 0,
869                    Err(i) => i - 1,
870                };
871                if col_idx >= sheet.columns.len() {
872                    out_cols.push(base_arc);
873                    continue;
874                }
875                let col = &sheet.columns[col_idx];
876                let Some(ch) = col.chunk(ch_idx) else {
877                    out_cols.push(base_arc);
878                    continue;
879                };
880                let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
881                let seg_range = rel_off..(rel_off + cs.row_len);
882                let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
883                if cascade.has_any_in_range(seg_range.clone()) {
884                    let base_ea = base
885                        .as_any()
886                        .downcast_ref::<arrow_array::UInt8Array>()
887                        .unwrap();
888                    out_cols.push(cascade.select_errors(seg_range, base_ea));
889                } else {
890                    out_cols.push(base_arc);
891                }
892            }
893            Ok((cs.row_start, cs.row_len, out_cols))
894        })
895    }
896
897    /// Typed type-tag slices per row-segment.
898    pub fn type_tags_slices(
899        &self,
900    ) -> impl Iterator<Item = Result<(usize, usize, Vec<Arc<arrow_array::UInt8Array>>), ExcelError>> + '_
901    {
902        self.iter_row_chunks().map(move |res| {
903            let cs = res?;
904            let mut out_cols: Vec<Arc<arrow_array::UInt8Array>> = Vec::with_capacity(cs.cols.len());
905            let sheet = self.sheet();
906            let chunk_starts = &sheet.chunk_starts;
907
908            for (local_c, col_idx) in (self.sc..=self.ec).enumerate() {
909                let base = cs.cols[local_c].type_tag.clone();
910                let base_ta = base
911                    .as_any()
912                    .downcast_ref::<arrow_array::UInt8Array>()
913                    .unwrap()
914                    .clone();
915                let base_arc = Arc::new(base_ta);
916
917                let abs_seg_start = self.sr + cs.row_start;
918                let ch_idx = match chunk_starts.binary_search(&abs_seg_start) {
919                    Ok(i) => i,
920                    Err(0) => 0,
921                    Err(i) => i - 1,
922                };
923                if col_idx >= sheet.columns.len() {
924                    out_cols.push(base_arc);
925                    continue;
926                }
927                let col = &sheet.columns[col_idx];
928                let Some(ch) = col.chunk(ch_idx) else {
929                    out_cols.push(base_arc);
930                    continue;
931                };
932                let rel_off = (self.sr + cs.row_start) - chunk_starts[ch_idx];
933                let seg_range = rel_off..(rel_off + cs.row_len);
934                let cascade = arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
935                if cascade.has_any_in_range(seg_range.clone()) {
936                    let base_ta = base
937                        .as_any()
938                        .downcast_ref::<arrow_array::UInt8Array>()
939                        .unwrap();
940                    out_cols.push(cascade.select_type_tags(seg_range, base_ta));
941                } else {
942                    out_cols.push(base_arc);
943                }
944            }
945            Ok((cs.row_start, cs.row_len, out_cols))
946        })
947    }
948
949    /// Build per-column concatenated lowered text arrays for this view.
950    /// Uses per-chunk lowered cache for base text and merges overlays via zip_select.
951    pub fn lowered_text_columns(&self) -> Vec<arrow_array::ArrayRef> {
952        use crate::compute_prelude::concat_arrays;
953
954        let mut out: Vec<arrow_array::ArrayRef> = Vec::with_capacity(self.cols);
955        if self.rows == 0 || self.cols == 0 {
956            return out;
957        }
958        let sheet = self.sheet();
959        let chunk_starts = &sheet.chunk_starts;
960        // Clamp to physically materialized sheet rows; this view may be logically larger (e.g. A:A).
961        let sheet_rows = sheet.nrows as usize;
962        if sheet_rows == 0 || self.sr >= sheet_rows {
963            for _ in 0..self.cols {
964                out.push(arrow_array::new_null_array(&DataType::Utf8, 0));
965            }
966            return out;
967        }
968        let row_end = self.er.min(sheet_rows.saturating_sub(1));
969        let physical_len = row_end.saturating_sub(self.sr) + 1;
970        for col_idx in self.sc..=self.ec {
971            let mut segs: Vec<arrow_array::ArrayRef> = Vec::new();
972            if col_idx >= sheet.columns.len() {
973                // OOB: nulls across rows
974                segs.push(arrow_array::new_null_array(&DataType::Utf8, physical_len));
975            } else {
976                let col_ref = &sheet.columns[col_idx];
977                for (ci, &start) in chunk_starts.iter().enumerate() {
978                    let chunk_end = chunk_starts
979                        .get(ci + 1)
980                        .copied()
981                        .unwrap_or(sheet.nrows as usize);
982                    let len = chunk_end.saturating_sub(start);
983                    if len == 0 {
984                        continue;
985                    }
986                    let end = start + len - 1;
987                    let is = start.max(self.sr);
988                    let ie = end.min(row_end);
989                    if is > ie {
990                        continue;
991                    }
992                    let seg_len = ie - is + 1;
993                    let rel_off = is - start;
994                    if let Some(ch) = col_ref.chunk(ci) {
995                        // Overlay-aware lowered segment
996                        let seg_range = rel_off..(rel_off + seg_len);
997                        let cascade =
998                            arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
999                        if cascade.has_any_in_range(seg_range.clone()) {
1000                            let base_lowered = ch.text_lower_or_null();
1001                            let base_seg = base_lowered.slice(rel_off, seg_len);
1002                            let base_sa = base_seg
1003                                .as_any()
1004                                .downcast_ref::<arrow_array::StringArray>()
1005                                .expect("lowered slice downcast");
1006                            segs.push(cascade.select_lowered_text(seg_range, base_sa));
1007                        } else {
1008                            // No overlay: slice from lowered base
1009                            let lowered = ch.text_lower_or_null();
1010                            segs.push(lowered.slice(rel_off, seg_len));
1011                        }
1012                    } else {
1013                        segs.push(arrow_array::new_null_array(&DataType::Utf8, seg_len));
1014                    }
1015                }
1016            }
1017            // Ensure concat has at least one segment (can happen on sparse/empty sheets).
1018            if segs.is_empty() {
1019                segs.push(arrow_array::new_null_array(&DataType::Utf8, physical_len));
1020            }
1021            // Concat segments for this column
1022            let anys: Vec<&dyn arrow_array::Array> = segs
1023                .iter()
1024                .map(|a| a.as_ref() as &dyn arrow_array::Array)
1025                .collect();
1026            let conc = concat_arrays(&anys).expect("concat lowered segments");
1027            out.push(conc);
1028        }
1029        out
1030    }
1031
1032    /// Slice typed float arrays for a specific row interval (relative to view).
1033    pub fn slice_numbers(
1034        &self,
1035        rel_start: usize,
1036        len: usize,
1037    ) -> Vec<Option<Arc<arrow_array::Float64Array>>> {
1038        let abs_start = self.sr + rel_start;
1039        let abs_end = abs_start + len;
1040        let sheet = self.sheet();
1041        let chunk_starts = &sheet.chunk_starts;
1042
1043        let mut out_cols = Vec::with_capacity(self.cols);
1044        for col_idx in self.sc..=self.ec {
1045            if col_idx >= sheet.columns.len() {
1046                out_cols.push(None);
1047                continue;
1048            }
1049            let col = &sheet.columns[col_idx];
1050
1051            let start_ch_idx = match chunk_starts.binary_search(&abs_start) {
1052                Ok(i) => i,
1053                Err(0) => 0,
1054                Err(i) => i - 1,
1055            };
1056
1057            let mut segments: Vec<Arc<arrow_array::Float64Array>> = Vec::new();
1058            let mut null_only = true;
1059
1060            let mut curr = abs_start;
1061            let mut remaining = len;
1062            let mut ch_idx = start_ch_idx;
1063
1064            while remaining > 0 && ch_idx < chunk_starts.len() {
1065                let ch_start = chunk_starts[ch_idx];
1066                let ch_end = chunk_starts
1067                    .get(ch_idx + 1)
1068                    .copied()
1069                    .unwrap_or(sheet.nrows as usize);
1070                let ch_len = ch_end.saturating_sub(ch_start);
1071                if ch_len == 0 {
1072                    ch_idx += 1;
1073                    continue;
1074                }
1075
1076                let overlap_start = curr.max(ch_start);
1077                let overlap_end = ch_end.min(abs_end);
1078
1079                if overlap_start < overlap_end {
1080                    let seg_len = overlap_end - overlap_start;
1081                    let rel_off_in_chunk = overlap_start - ch_start;
1082
1083                    if let Some(ch) = col.chunk(ch_idx) {
1084                        let base_nums_arc = ch.numbers_or_null();
1085                        let base_nums = base_nums_arc.as_ref();
1086
1087                        let seg_range = rel_off_in_chunk..(rel_off_in_chunk + seg_len);
1088                        let cascade =
1089                            arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
1090
1091                        let final_arr = if cascade.has_any_in_range(seg_range.clone()) {
1092                            let base_slice = base_nums.slice(rel_off_in_chunk, seg_len);
1093                            let base_fa = base_slice
1094                                .as_any()
1095                                .downcast_ref::<arrow_array::Float64Array>()
1096                                .unwrap();
1097                            cascade.select_numbers(seg_range, base_fa).as_ref().clone()
1098                        } else {
1099                            let sl = base_nums.slice(rel_off_in_chunk, seg_len);
1100                            sl.as_any()
1101                                .downcast_ref::<arrow_array::Float64Array>()
1102                                .unwrap()
1103                                .clone()
1104                        };
1105
1106                        if final_arr.null_count() < final_arr.len() {
1107                            null_only = false;
1108                        }
1109                        segments.push(Arc::new(final_arr));
1110                    } else {
1111                        segments.push(Arc::new(arrow_array::Float64Array::new_null(seg_len)));
1112                    }
1113                    curr += seg_len;
1114                    remaining -= seg_len;
1115                }
1116                ch_idx += 1;
1117            }
1118
1119            if remaining > 0 {
1120                segments.push(Arc::new(arrow_array::Float64Array::new_null(remaining)));
1121            }
1122
1123            if segments.len() == 1 {
1124                if null_only && segments[0].null_count() == segments[0].len() {
1125                    out_cols.push(None);
1126                } else {
1127                    out_cols.push(Some(segments.pop().unwrap()));
1128                }
1129            } else {
1130                let refs: Vec<&dyn Array> =
1131                    segments.iter().map(|a| a.as_ref() as &dyn Array).collect();
1132                let c = crate::compute_prelude::concat_arrays(&refs).expect("concat slice");
1133                let fa = c
1134                    .as_any()
1135                    .downcast_ref::<arrow_array::Float64Array>()
1136                    .unwrap()
1137                    .clone();
1138                out_cols.push(Some(Arc::new(fa)));
1139            }
1140        }
1141        out_cols
1142    }
1143
1144    /// Slice typed lowered text arrays for a specific row interval (relative to view).
1145    pub fn slice_lowered_text(
1146        &self,
1147        rel_start: usize,
1148        len: usize,
1149    ) -> Vec<Option<Arc<arrow_array::StringArray>>> {
1150        let abs_start = self.sr + rel_start;
1151        let abs_end = abs_start + len;
1152        let sheet = self.sheet();
1153        let chunk_starts = &sheet.chunk_starts;
1154
1155        let mut out_cols = Vec::with_capacity(self.cols);
1156        for col_idx in self.sc..=self.ec {
1157            if col_idx >= sheet.columns.len() {
1158                out_cols.push(None);
1159                continue;
1160            }
1161            let col = &sheet.columns[col_idx];
1162            let start_ch_idx = match chunk_starts.binary_search(&abs_start) {
1163                Ok(i) => i,
1164                Err(0) => 0,
1165                Err(i) => i - 1,
1166            };
1167
1168            let mut segments: Vec<Arc<arrow_array::StringArray>> = Vec::new();
1169            let mut null_only = true;
1170
1171            let mut curr = abs_start;
1172            let mut remaining = len;
1173            let mut ch_idx = start_ch_idx;
1174
1175            while remaining > 0 && ch_idx < chunk_starts.len() {
1176                let ch_start = chunk_starts[ch_idx];
1177                let ch_end = chunk_starts
1178                    .get(ch_idx + 1)
1179                    .copied()
1180                    .unwrap_or(sheet.nrows as usize);
1181                let ch_len = ch_end.saturating_sub(ch_start);
1182                if ch_len == 0 {
1183                    ch_idx += 1;
1184                    continue;
1185                }
1186
1187                let overlap_start = curr.max(ch_start);
1188                let overlap_end = ch_end.min(abs_end);
1189
1190                if overlap_start < overlap_end {
1191                    let seg_len = overlap_end - overlap_start;
1192                    let rel_off_in_chunk = overlap_start - ch_start;
1193
1194                    if let Some(ch) = col.chunk(ch_idx) {
1195                        let base_lowered = ch.text_lower_or_null();
1196                        let seg_range = rel_off_in_chunk..(rel_off_in_chunk + seg_len);
1197                        let cascade =
1198                            arrow_store::OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
1199
1200                        let final_arr = if cascade.has_any_in_range(seg_range.clone()) {
1201                            let base_slice = base_lowered.slice(rel_off_in_chunk, seg_len);
1202                            let base_sa = base_slice
1203                                .as_any()
1204                                .downcast_ref::<arrow_array::StringArray>()
1205                                .unwrap();
1206                            cascade
1207                                .select_lowered_text(seg_range, base_sa)
1208                                .as_ref()
1209                                .clone()
1210                        } else {
1211                            let sl = base_lowered.slice(rel_off_in_chunk, seg_len);
1212                            sl.as_any()
1213                                .downcast_ref::<arrow_array::StringArray>()
1214                                .unwrap()
1215                                .clone()
1216                        };
1217
1218                        if final_arr.null_count() < final_arr.len() {
1219                            null_only = false;
1220                        }
1221                        segments.push(Arc::new(final_arr));
1222                    } else {
1223                        segments.push(Arc::new(arrow_array::StringArray::new_null(seg_len)));
1224                    }
1225                    curr += seg_len;
1226                    remaining -= seg_len;
1227                }
1228                ch_idx += 1;
1229            }
1230
1231            if remaining > 0 {
1232                segments.push(Arc::new(arrow_array::StringArray::new_null(remaining)));
1233            }
1234
1235            if segments.len() == 1 {
1236                if null_only && segments[0].null_count() == segments[0].len() {
1237                    out_cols.push(None);
1238                } else {
1239                    out_cols.push(Some(segments.pop().unwrap()));
1240                }
1241            } else {
1242                let refs: Vec<&dyn Array> =
1243                    segments.iter().map(|a| a.as_ref() as &dyn Array).collect();
1244                let c = crate::compute_prelude::concat_arrays(&refs).expect("concat text");
1245                let sa = c
1246                    .as_any()
1247                    .downcast_ref::<arrow_array::StringArray>()
1248                    .unwrap()
1249                    .clone();
1250                out_cols.push(Some(Arc::new(sa)));
1251            }
1252        }
1253        out_cols
1254    }
1255}
1256
1257#[inline]
1258fn pack_numeric(v: &LiteralValue, policy: CoercionPolicy) -> Result<Option<f64>, ExcelError> {
1259    match policy {
1260        CoercionPolicy::NumberLenientText => match v {
1261            LiteralValue::Error(e) => Err(e.clone()),
1262            LiteralValue::Empty => Ok(None),
1263            other => Ok(crate::coercion::to_number_lenient(other).ok()),
1264        },
1265        CoercionPolicy::NumberStrict => match v {
1266            LiteralValue::Error(e) => Err(e.clone()),
1267            LiteralValue::Empty => Ok(None),
1268            other => Ok(crate::coercion::to_number_strict(other).ok()),
1269        },
1270        _ => match v {
1271            LiteralValue::Error(e) => Err(e.clone()),
1272            _ => Ok(None),
1273        },
1274    }
1275}
1276
1277#[cfg(test)]
1278mod tests {
1279    use super::*;
1280
1281    #[test]
1282    fn owned_rows_numeric_chunking() {
1283        let data: Vec<Vec<LiteralValue>> = vec![
1284            vec![
1285                LiteralValue::Number(1.0),
1286                LiteralValue::Text("x".into()),
1287                LiteralValue::Number(3.0),
1288            ],
1289            vec![
1290                LiteralValue::Boolean(true),
1291                LiteralValue::Empty,
1292                LiteralValue::Number(2.5),
1293            ],
1294        ];
1295        let view = RangeView::from_owned_rows(data, DateSystem::Excel1900);
1296        let mut sum = 0.0f64;
1297        view.numbers_chunked(CoercionPolicy::NumberLenientText, 2, &mut |chunk| {
1298            for &n in chunk.data {
1299                sum += n;
1300            }
1301            Ok(())
1302        })
1303        .unwrap();
1304        assert!((sum - 7.5).abs() < 1e-9);
1305    }
1306
1307    #[test]
1308    fn as_1x1_works() {
1309        let view = RangeView::from_owned_rows(
1310            vec![vec![LiteralValue::Number(7.0)]],
1311            DateSystem::Excel1900,
1312        );
1313        assert_eq!(view.as_1x1(), Some(LiteralValue::Number(7.0)));
1314    }
1315}