use arrow_array::Array;
use arrow_array::new_null_array;
use arrow_schema::DataType;
use chrono::Timelike;
use std::sync::Arc;
use arrow_array::builder::{BooleanBuilder, Float64Builder, StringBuilder, UInt8Builder};
use arrow_array::{ArrayRef, BooleanArray, Float64Array, StringArray, UInt8Array, UInt32Array};
use once_cell::sync::OnceCell;
use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
use rustc_hash::FxHashMap;
use std::collections::{BTreeMap, HashMap};
#[repr(u8)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum TypeTag {
Empty = 0,
Number = 1,
Boolean = 2,
Text = 3,
Error = 4,
DateTime = 5, Duration = 6, Pending = 7,
}
impl TypeTag {
fn from_value(v: &LiteralValue) -> Self {
match v {
LiteralValue::Empty => TypeTag::Empty,
LiteralValue::Int(_) | LiteralValue::Number(_) => TypeTag::Number,
LiteralValue::Boolean(_) => TypeTag::Boolean,
LiteralValue::Text(_) => TypeTag::Text,
LiteralValue::Error(_) => TypeTag::Error,
LiteralValue::Date(_) | LiteralValue::DateTime(_) | LiteralValue::Time(_) => {
TypeTag::DateTime
}
LiteralValue::Duration(_) => TypeTag::Duration,
LiteralValue::Pending => TypeTag::Pending,
LiteralValue::Array(_) => TypeTag::Error, }
}
}
impl TypeTag {
#[inline]
pub fn from_u8(b: u8) -> Self {
match b {
x if x == TypeTag::Empty as u8 => TypeTag::Empty,
x if x == TypeTag::Number as u8 => TypeTag::Number,
x if x == TypeTag::Boolean as u8 => TypeTag::Boolean,
x if x == TypeTag::Text as u8 => TypeTag::Text,
x if x == TypeTag::Error as u8 => TypeTag::Error,
x if x == TypeTag::DateTime as u8 => TypeTag::DateTime,
x if x == TypeTag::Duration as u8 => TypeTag::Duration,
x if x == TypeTag::Pending as u8 => TypeTag::Pending,
_ => TypeTag::Empty,
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct ColumnChunkMeta {
pub len: usize,
pub non_null_num: usize,
pub non_null_bool: usize,
pub non_null_text: usize,
pub non_null_err: usize,
}
#[derive(Debug, Clone)]
pub struct ColumnChunk {
pub numbers: Option<Arc<Float64Array>>,
pub booleans: Option<Arc<BooleanArray>>,
pub text: Option<ArrayRef>, pub errors: Option<Arc<UInt8Array>>, pub type_tag: Arc<UInt8Array>,
pub formula_id: Option<Arc<UInt32Array>>, pub meta: ColumnChunkMeta,
lazy_null_numbers: OnceCell<Arc<Float64Array>>,
lazy_null_booleans: OnceCell<Arc<BooleanArray>>,
lazy_null_text: OnceCell<ArrayRef>,
lazy_null_errors: OnceCell<Arc<UInt8Array>>,
lowered_text: OnceCell<ArrayRef>,
pub overlay: Overlay,
pub computed_overlay: Overlay,
}
impl ColumnChunk {
#[inline]
pub fn len(&self) -> usize {
self.type_tag.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn numbers_or_null(&self) -> Arc<Float64Array> {
if let Some(a) = &self.numbers {
return a.clone();
}
self.lazy_null_numbers
.get_or_init(|| {
let arr = new_null_array(&DataType::Float64, self.len());
Arc::new(arr.as_any().downcast_ref::<Float64Array>().unwrap().clone())
})
.clone()
}
#[inline]
pub fn booleans_or_null(&self) -> Arc<BooleanArray> {
if let Some(a) = &self.booleans {
return a.clone();
}
self.lazy_null_booleans
.get_or_init(|| {
let arr = new_null_array(&DataType::Boolean, self.len());
Arc::new(arr.as_any().downcast_ref::<BooleanArray>().unwrap().clone())
})
.clone()
}
#[inline]
pub fn errors_or_null(&self) -> Arc<UInt8Array> {
if let Some(a) = &self.errors {
return a.clone();
}
self.lazy_null_errors
.get_or_init(|| {
let arr = new_null_array(&DataType::UInt8, self.len());
Arc::new(arr.as_any().downcast_ref::<UInt8Array>().unwrap().clone())
})
.clone()
}
#[inline]
pub fn text_or_null(&self) -> ArrayRef {
if let Some(a) = &self.text {
return a.clone();
}
self.lazy_null_text
.get_or_init(|| new_null_array(&DataType::Utf8, self.len()))
.clone()
}
pub fn text_lower_or_null(&self) -> ArrayRef {
if let Some(a) = self.lowered_text.get() {
return a.clone();
}
let out: ArrayRef = if let Some(txt) = &self.text {
let sa = txt.as_any().downcast_ref::<StringArray>().unwrap();
let mut b = arrow_array::builder::StringBuilder::with_capacity(sa.len(), sa.len() * 8);
for i in 0..sa.len() {
if sa.is_null(i) {
b.append_null();
} else {
b.append_value(sa.value(i).to_lowercase());
}
}
let lowered = b.finish();
Arc::new(lowered)
} else {
new_null_array(&DataType::Utf8, self.len())
};
self.lowered_text.get_or_init(|| out.clone());
out
}
pub fn grow_len_to(&mut self, new_len: usize) {
let old_len = self.len();
if new_len <= old_len {
return;
}
let mut tags: Vec<u8> = self.type_tag.values().to_vec();
tags.resize(new_len, TypeTag::Empty as u8);
self.type_tag = Arc::new(UInt8Array::from(tags));
if let Some(a) = &self.numbers {
use arrow_array::builder::Float64Builder;
let mut b = Float64Builder::with_capacity(new_len);
for i in 0..old_len {
if a.is_null(i) {
b.append_null();
} else {
b.append_value(a.value(i));
}
}
for _ in old_len..new_len {
b.append_null();
}
self.numbers = Some(Arc::new(b.finish()));
}
if let Some(a) = &self.booleans {
use arrow_array::builder::BooleanBuilder;
let mut b = BooleanBuilder::with_capacity(new_len);
for i in 0..old_len {
if a.is_null(i) {
b.append_null();
} else {
b.append_value(a.value(i));
}
}
for _ in old_len..new_len {
b.append_null();
}
self.booleans = Some(Arc::new(b.finish()));
}
if let Some(a) = &self.errors {
use arrow_array::builder::UInt8Builder;
let mut b = UInt8Builder::with_capacity(new_len);
for i in 0..old_len {
if a.is_null(i) {
b.append_null();
} else {
b.append_value(a.value(i));
}
}
for _ in old_len..new_len {
b.append_null();
}
self.errors = Some(Arc::new(b.finish()));
}
if let Some(a) = &self.text {
use arrow_array::builder::StringBuilder;
let sa = a.as_any().downcast_ref::<StringArray>().unwrap();
let mut b = StringBuilder::with_capacity(new_len, 0);
for i in 0..old_len {
if sa.is_null(i) {
b.append_null();
} else {
b.append_value(sa.value(i));
}
}
for _ in old_len..new_len {
b.append_null();
}
self.text = Some(Arc::new(b.finish()) as ArrayRef);
}
self.lazy_null_numbers = OnceCell::new();
self.lazy_null_booleans = OnceCell::new();
self.lazy_null_text = OnceCell::new();
self.lazy_null_errors = OnceCell::new();
self.lowered_text = OnceCell::new();
self.meta.len = new_len;
}
}
#[derive(Debug, Clone)]
pub struct ArrowColumn {
pub chunks: Vec<ColumnChunk>,
pub sparse_chunks: FxHashMap<usize, ColumnChunk>,
pub index: u32,
}
impl ArrowColumn {
#[inline]
pub fn chunk(&self, idx: usize) -> Option<&ColumnChunk> {
if idx < self.chunks.len() {
Some(&self.chunks[idx])
} else {
self.sparse_chunks.get(&idx)
}
}
#[inline]
pub fn chunk_mut(&mut self, idx: usize) -> Option<&mut ColumnChunk> {
if idx < self.chunks.len() {
Some(&mut self.chunks[idx])
} else {
self.sparse_chunks.get_mut(&idx)
}
}
#[inline]
pub fn has_sparse_chunks(&self) -> bool {
!self.sparse_chunks.is_empty()
}
#[inline]
pub fn total_chunk_count(&self) -> usize {
self.chunks.len() + self.sparse_chunks.len()
}
}
#[derive(Debug, Clone)]
pub struct ArrowSheet {
pub name: Arc<str>,
pub columns: Vec<ArrowColumn>,
pub nrows: u32,
pub chunk_starts: Vec<usize>,
pub chunk_rows: usize,
}
#[derive(Debug, Default, Clone)]
pub struct SheetStore {
pub sheets: Vec<ArrowSheet>,
}
impl SheetStore {
pub fn sheet(&self, name: &str) -> Option<&ArrowSheet> {
self.sheets.iter().find(|s| s.name.as_ref() == name)
}
pub fn sheet_mut(&mut self, name: &str) -> Option<&mut ArrowSheet> {
self.sheets.iter_mut().find(|s| s.name.as_ref() == name)
}
}
pub struct IngestBuilder {
name: Arc<str>,
ncols: usize,
chunk_rows: usize,
date_system: crate::engine::DateSystem,
num_builders: Vec<Float64Builder>,
bool_builders: Vec<BooleanBuilder>,
text_builders: Vec<StringBuilder>,
err_builders: Vec<UInt8Builder>,
tag_builders: Vec<UInt8Builder>,
lane_counts: Vec<LaneCounts>,
chunks: Vec<Vec<ColumnChunk>>, row_in_chunk: usize,
total_rows: u32,
}
#[derive(Debug, Clone, Copy, Default)]
struct LaneCounts {
n_num: usize,
n_bool: usize,
n_text: usize,
n_err: usize,
}
impl IngestBuilder {
pub fn new(
sheet_name: &str,
ncols: usize,
chunk_rows: usize,
date_system: crate::engine::DateSystem,
) -> Self {
let mut chunks = Vec::with_capacity(ncols);
chunks.resize_with(ncols, Vec::new);
Self {
name: Arc::from(sheet_name.to_string()),
ncols,
chunk_rows: chunk_rows.max(1),
date_system,
num_builders: (0..ncols)
.map(|_| Float64Builder::with_capacity(chunk_rows))
.collect(),
bool_builders: (0..ncols)
.map(|_| BooleanBuilder::with_capacity(chunk_rows))
.collect(),
text_builders: (0..ncols)
.map(|_| StringBuilder::with_capacity(chunk_rows, chunk_rows * 12))
.collect(),
err_builders: (0..ncols)
.map(|_| UInt8Builder::with_capacity(chunk_rows))
.collect(),
tag_builders: (0..ncols)
.map(|_| UInt8Builder::with_capacity(chunk_rows))
.collect(),
lane_counts: vec![LaneCounts::default(); ncols],
chunks,
row_in_chunk: 0,
total_rows: 0,
}
}
pub fn append_row_cells<'a>(&mut self, row: &[CellIngest<'a>]) -> Result<(), ExcelError> {
assert_eq!(row.len(), self.ncols, "row width mismatch");
for (c, cell) in row.iter().enumerate() {
match cell {
CellIngest::Empty => {
self.tag_builders[c].append_value(TypeTag::Empty as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Number(n) => {
self.tag_builders[c].append_value(TypeTag::Number as u8);
self.num_builders[c].append_value(*n);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Boolean(b) => {
self.tag_builders[c].append_value(TypeTag::Boolean as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_value(*b);
self.lane_counts[c].n_bool += 1;
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Text(s) => {
self.tag_builders[c].append_value(TypeTag::Text as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_value(s);
self.lane_counts[c].n_text += 1;
self.err_builders[c].append_null();
}
CellIngest::ErrorCode(code) => {
self.tag_builders[c].append_value(TypeTag::Error as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_value(*code);
self.lane_counts[c].n_err += 1;
}
CellIngest::DateSerial(serial) => {
self.tag_builders[c].append_value(TypeTag::DateTime as u8);
self.num_builders[c].append_value(*serial);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Pending => {
self.tag_builders[c].append_value(TypeTag::Pending as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
}
}
self.row_in_chunk += 1;
self.total_rows += 1;
if self.row_in_chunk >= self.chunk_rows {
self.finish_chunk();
}
Ok(())
}
pub fn append_row_cells_iter<'a, I>(&mut self, iter: I) -> Result<(), ExcelError>
where
I: ExactSizeIterator<Item = CellIngest<'a>>,
{
assert_eq!(iter.len(), self.ncols, "row width mismatch");
for (c, cell) in iter.enumerate() {
match cell {
CellIngest::Empty => {
self.tag_builders[c].append_value(TypeTag::Empty as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Number(n) => {
self.tag_builders[c].append_value(TypeTag::Number as u8);
self.num_builders[c].append_value(n);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Boolean(b) => {
self.tag_builders[c].append_value(TypeTag::Boolean as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_value(b);
self.lane_counts[c].n_bool += 1;
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Text(s) => {
self.tag_builders[c].append_value(TypeTag::Text as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_value(s);
self.lane_counts[c].n_text += 1;
self.err_builders[c].append_null();
}
CellIngest::ErrorCode(code) => {
self.tag_builders[c].append_value(TypeTag::Error as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_value(code);
self.lane_counts[c].n_err += 1;
}
CellIngest::DateSerial(serial) => {
self.tag_builders[c].append_value(TypeTag::DateTime as u8);
self.num_builders[c].append_value(serial);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
CellIngest::Pending => {
self.tag_builders[c].append_value(TypeTag::Pending as u8);
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
}
}
self.row_in_chunk += 1;
self.total_rows += 1;
if self.row_in_chunk >= self.chunk_rows {
self.finish_chunk();
}
Ok(())
}
pub fn append_row(&mut self, row: &[LiteralValue]) -> Result<(), ExcelError> {
assert_eq!(row.len(), self.ncols, "row width mismatch");
for (c, v) in row.iter().enumerate() {
let tag = TypeTag::from_value(v) as u8;
self.tag_builders[c].append_value(tag);
match v {
LiteralValue::Empty => {
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Int(i) => {
self.num_builders[c].append_value(*i as f64);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Number(n) => {
self.num_builders[c].append_value(*n);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Boolean(b) => {
self.num_builders[c].append_null();
self.bool_builders[c].append_value(*b);
self.lane_counts[c].n_bool += 1;
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Text(s) => {
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_value(s);
self.lane_counts[c].n_text += 1;
self.err_builders[c].append_null();
}
LiteralValue::Error(e) => {
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_value(map_error_code(e.kind));
self.lane_counts[c].n_err += 1;
}
LiteralValue::Date(d) => {
let dt = d.and_hms_opt(0, 0, 0).unwrap();
let serial =
crate::builtins::datetime::datetime_to_serial_for(self.date_system, &dt);
self.num_builders[c].append_value(serial);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::DateTime(dt) => {
let serial =
crate::builtins::datetime::datetime_to_serial_for(self.date_system, dt);
self.num_builders[c].append_value(serial);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Time(t) => {
let serial = t.num_seconds_from_midnight() as f64 / 86_400.0;
self.num_builders[c].append_value(serial);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Duration(dur) => {
let serial = dur.num_seconds() as f64 / 86_400.0;
self.num_builders[c].append_value(serial);
self.lane_counts[c].n_num += 1;
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
LiteralValue::Array(_) => {
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_value(map_error_code(ExcelErrorKind::Value));
self.lane_counts[c].n_err += 1;
}
LiteralValue::Pending => {
self.num_builders[c].append_null();
self.bool_builders[c].append_null();
self.text_builders[c].append_null();
self.err_builders[c].append_null();
}
}
}
self.row_in_chunk += 1;
self.total_rows += 1;
if self.row_in_chunk >= self.chunk_rows {
self.finish_chunk();
}
Ok(())
}
fn finish_chunk(&mut self) {
if self.row_in_chunk == 0 {
return;
}
for c in 0..self.ncols {
let len = self.row_in_chunk;
let numbers_arc: Option<Arc<Float64Array>> = if self.lane_counts[c].n_num == 0 {
None
} else {
Some(Arc::new(self.num_builders[c].finish()))
};
let booleans_arc: Option<Arc<BooleanArray>> = if self.lane_counts[c].n_bool == 0 {
None
} else {
Some(Arc::new(self.bool_builders[c].finish()))
};
let text_ref: Option<ArrayRef> = if self.lane_counts[c].n_text == 0 {
None
} else {
Some(Arc::new(self.text_builders[c].finish()))
};
let errors_arc: Option<Arc<UInt8Array>> = if self.lane_counts[c].n_err == 0 {
None
} else {
Some(Arc::new(self.err_builders[c].finish()))
};
let tags: UInt8Array = self.tag_builders[c].finish();
let chunk = ColumnChunk {
numbers: numbers_arc,
booleans: booleans_arc,
text: text_ref,
errors: errors_arc,
type_tag: Arc::new(tags),
formula_id: None,
meta: ColumnChunkMeta {
len,
non_null_num: self.lane_counts[c].n_num,
non_null_bool: self.lane_counts[c].n_bool,
non_null_text: self.lane_counts[c].n_text,
non_null_err: self.lane_counts[c].n_err,
},
lazy_null_numbers: OnceCell::new(),
lazy_null_booleans: OnceCell::new(),
lazy_null_text: OnceCell::new(),
lazy_null_errors: OnceCell::new(),
lowered_text: OnceCell::new(),
overlay: Overlay::new(),
computed_overlay: Overlay::new(),
};
self.chunks[c].push(chunk);
self.num_builders[c] = Float64Builder::with_capacity(self.chunk_rows);
self.bool_builders[c] = BooleanBuilder::with_capacity(self.chunk_rows);
self.text_builders[c] =
StringBuilder::with_capacity(self.chunk_rows, self.chunk_rows * 12);
self.err_builders[c] = UInt8Builder::with_capacity(self.chunk_rows);
self.tag_builders[c] = UInt8Builder::with_capacity(self.chunk_rows);
self.lane_counts[c] = LaneCounts::default();
}
self.row_in_chunk = 0;
}
pub fn finish(mut self) -> ArrowSheet {
if self.row_in_chunk > 0 {
self.finish_chunk();
}
let mut columns = Vec::with_capacity(self.ncols);
for (idx, chunks) in self.chunks.into_iter().enumerate() {
columns.push(ArrowColumn {
chunks,
sparse_chunks: FxHashMap::default(),
index: idx as u32,
});
}
let mut chunk_starts: Vec<usize> = Vec::new();
if let Some(col0) = columns.first() {
let chunks_len0 = col0.chunks.len();
for (ci, col) in columns.iter().enumerate() {
if col.chunks.len() != chunks_len0 {
panic!(
"ArrowSheet chunk misalignment: column {} chunks={} != {}",
ci,
col.chunks.len(),
chunks_len0
);
}
}
let mut cur = 0usize;
for i in 0..chunks_len0 {
let len_i = col0.chunks[i].type_tag.len();
for (ci, col) in columns.iter().enumerate() {
let got = col.chunks[i].type_tag.len();
if got != len_i {
panic!(
"ArrowSheet chunk row-length misalignment at chunk {i}: col {ci} len={got} != {len_i}"
);
}
}
chunk_starts.push(cur);
cur += len_i;
}
}
ArrowSheet {
name: self.name,
columns,
nrows: self.total_rows,
chunk_starts,
chunk_rows: self.chunk_rows,
}
}
}
pub fn map_error_code(kind: ExcelErrorKind) -> u8 {
match kind {
ExcelErrorKind::Null => 1,
ExcelErrorKind::Ref => 2,
ExcelErrorKind::Name => 3,
ExcelErrorKind::Value => 4,
ExcelErrorKind::Div => 5,
ExcelErrorKind::Na => 6,
ExcelErrorKind::Num => 7,
ExcelErrorKind::Error => 8,
ExcelErrorKind::NImpl => 9,
ExcelErrorKind::Spill => 10,
ExcelErrorKind::Calc => 11,
ExcelErrorKind::Circ => 12,
ExcelErrorKind::Cancelled => 13,
}
}
pub fn unmap_error_code(code: u8) -> ExcelErrorKind {
match code {
1 => ExcelErrorKind::Null,
2 => ExcelErrorKind::Ref,
3 => ExcelErrorKind::Name,
4 => ExcelErrorKind::Value,
5 => ExcelErrorKind::Div,
6 => ExcelErrorKind::Na,
7 => ExcelErrorKind::Num,
8 => ExcelErrorKind::Error,
9 => ExcelErrorKind::NImpl,
10 => ExcelErrorKind::Spill,
11 => ExcelErrorKind::Calc,
12 => ExcelErrorKind::Circ,
13 => ExcelErrorKind::Cancelled,
_ => ExcelErrorKind::Error,
}
}
pub enum CellIngest<'a> {
Empty,
Number(f64),
Boolean(bool),
Text(&'a str),
ErrorCode(u8),
DateSerial(f64),
Pending,
}
#[derive(Debug, Clone, PartialEq)]
pub enum OverlayValue {
Empty,
Number(f64),
DateTime(f64),
Duration(f64),
Boolean(bool),
Text(Arc<str>),
Error(u8),
Pending,
}
impl OverlayValue {
pub fn from_literal_value(
value: &LiteralValue,
date_system: crate::engine::DateSystem,
) -> Self {
match value {
LiteralValue::Empty => OverlayValue::Empty,
LiteralValue::Int(i) => OverlayValue::Number(*i as f64),
LiteralValue::Number(n) => OverlayValue::Number(*n),
LiteralValue::Boolean(b) => OverlayValue::Boolean(*b),
LiteralValue::Text(s) => OverlayValue::Text(Arc::from(s.clone())),
LiteralValue::Error(e) => OverlayValue::Error(map_error_code(e.kind)),
LiteralValue::Date(d) => {
let dt = d.and_hms_opt(0, 0, 0).unwrap();
OverlayValue::DateTime(crate::builtins::datetime::datetime_to_serial_for(
date_system,
&dt,
))
}
LiteralValue::DateTime(dt) => OverlayValue::DateTime(
crate::builtins::datetime::datetime_to_serial_for(date_system, dt),
),
LiteralValue::Time(t) => {
OverlayValue::DateTime(t.num_seconds_from_midnight() as f64 / 86_400.0)
}
LiteralValue::Duration(d) => OverlayValue::Duration(d.num_seconds() as f64 / 86_400.0),
LiteralValue::Pending => OverlayValue::Pending,
LiteralValue::Array(_) => OverlayValue::Error(map_error_code(ExcelErrorKind::Value)),
}
}
#[inline]
pub(crate) fn estimated_payload_bytes(&self) -> usize {
match self {
OverlayValue::Empty | OverlayValue::Pending => 0,
OverlayValue::Number(_) | OverlayValue::DateTime(_) | OverlayValue::Duration(_) => {
core::mem::size_of::<f64>()
}
OverlayValue::Boolean(_) => core::mem::size_of::<bool>(),
OverlayValue::Error(_) => core::mem::size_of::<u8>(),
OverlayValue::Text(s) => s.len(),
}
}
#[inline]
pub(crate) fn type_tag(&self) -> TypeTag {
match self {
OverlayValue::Empty => TypeTag::Empty,
OverlayValue::Number(_) => TypeTag::Number,
OverlayValue::DateTime(_) => TypeTag::DateTime,
OverlayValue::Duration(_) => TypeTag::Duration,
OverlayValue::Boolean(_) => TypeTag::Boolean,
OverlayValue::Text(_) => TypeTag::Text,
OverlayValue::Error(_) => TypeTag::Error,
OverlayValue::Pending => TypeTag::Pending,
}
}
#[inline]
pub(crate) fn numeric_lane_value(&self) -> Option<f64> {
match self {
OverlayValue::Number(n) | OverlayValue::DateTime(n) | OverlayValue::Duration(n) => {
Some(*n)
}
_ => None,
}
}
#[inline]
pub(crate) fn boolean_lane_value(&self) -> Option<bool> {
match self {
OverlayValue::Boolean(b) => Some(*b),
_ => None,
}
}
#[inline]
pub(crate) fn text_lane_value(&self) -> Option<&str> {
match self {
OverlayValue::Text(s) => Some(s.as_ref()),
_ => None,
}
}
#[inline]
pub(crate) fn error_lane_value(&self) -> Option<u8> {
match self {
OverlayValue::Error(code) => Some(*code),
_ => None,
}
}
pub(crate) fn lowered_text_value(&self) -> Option<String> {
match self {
OverlayValue::Text(s) => Some(s.to_lowercase()),
OverlayValue::Number(n) | OverlayValue::DateTime(n) | OverlayValue::Duration(n) => {
Some(n.to_string())
}
OverlayValue::Boolean(b) => Some(if *b { "true" } else { "false" }.to_string()),
OverlayValue::Empty | OverlayValue::Error(_) | OverlayValue::Pending => None,
}
}
pub(crate) fn to_literal(&self) -> LiteralValue {
match self {
OverlayValue::Empty => LiteralValue::Empty,
OverlayValue::Number(n) => LiteralValue::Number(*n),
OverlayValue::DateTime(serial) => LiteralValue::from_serial_number(*serial),
OverlayValue::Duration(serial) => {
let nanos_f = *serial * 86_400.0 * 1_000_000_000.0;
let nanos = nanos_f.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64;
LiteralValue::Duration(chrono::Duration::nanoseconds(nanos))
}
OverlayValue::Boolean(b) => LiteralValue::Boolean(*b),
OverlayValue::Text(s) => LiteralValue::Text((**s).to_string()),
OverlayValue::Error(code) => {
LiteralValue::Error(ExcelError::new(unmap_error_code(*code)))
}
OverlayValue::Pending => LiteralValue::Pending,
}
}
}
#[derive(Debug, Clone)]
pub(crate) enum OverlayScalar<'a> {
Borrowed(&'a OverlayValue),
Owned(OverlayValue),
}
impl<'a> OverlayScalar<'a> {
#[inline]
fn as_value(&self) -> &OverlayValue {
match self {
OverlayScalar::Borrowed(value) => value,
OverlayScalar::Owned(value) => value,
}
}
#[inline]
pub(crate) fn to_overlay_value(&self) -> OverlayValue {
self.as_value().clone()
}
#[inline]
pub(crate) fn type_tag(&self) -> TypeTag {
self.as_value().type_tag()
}
#[inline]
pub(crate) fn numeric_lane_value(&self) -> Option<f64> {
self.as_value().numeric_lane_value()
}
#[inline]
pub(crate) fn boolean_lane_value(&self) -> Option<bool> {
self.as_value().boolean_lane_value()
}
#[inline]
pub(crate) fn text_lane_value(&self) -> Option<&str> {
self.as_value().text_lane_value()
}
#[inline]
pub(crate) fn error_lane_value(&self) -> Option<u8> {
self.as_value().error_lane_value()
}
pub(crate) fn lowered_text_value(&self) -> Option<String> {
self.as_value().lowered_text_value()
}
pub(crate) fn to_literal(&self) -> LiteralValue {
self.as_value().to_literal()
}
}
const OVERLAY_ENTRY_BASE_BYTES: usize = 32;
const OVERLAY_FRAGMENT_BASE_BYTES: usize = 48;
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub(crate) struct OverlayFragmentPayload {
type_tags: Arc<UInt8Array>,
numbers: Option<Arc<Float64Array>>,
booleans: Option<Arc<BooleanArray>>,
text: Option<ArrayRef>,
errors: Option<Arc<UInt8Array>>,
estimated_bytes: usize,
}
impl OverlayFragmentPayload {
fn from_values(values: Vec<OverlayValue>) -> Self {
let len = values.len();
let mut tag_b = UInt8Builder::with_capacity(len);
let mut nb = Float64Builder::with_capacity(len);
let mut bb = BooleanBuilder::with_capacity(len);
let mut sb = StringBuilder::with_capacity(len, len.saturating_mul(8));
let mut eb = UInt8Builder::with_capacity(len);
let mut non_num = 0usize;
let mut non_bool = 0usize;
let mut non_text = 0usize;
let mut non_err = 0usize;
for value in &values {
append_overlay_value_to_lane_builders(
value,
&mut tag_b,
&mut nb,
&mut bb,
&mut sb,
&mut eb,
&mut non_num,
&mut non_bool,
&mut non_text,
&mut non_err,
);
}
let type_tags = Arc::new(tag_b.finish());
let numbers = {
let a = nb.finish();
(non_num > 0).then(|| Arc::new(a))
};
let booleans = {
let a = bb.finish();
(non_bool > 0).then(|| Arc::new(a))
};
let text = {
let a = sb.finish();
(non_text > 0).then(|| Arc::new(a) as ArrayRef)
};
let errors = {
let a = eb.finish();
(non_err > 0).then(|| Arc::new(a))
};
let estimated_bytes = type_tags
.get_array_memory_size()
.saturating_add(
numbers
.as_ref()
.map(|a| a.get_array_memory_size())
.unwrap_or(0),
)
.saturating_add(
booleans
.as_ref()
.map(|a| a.get_array_memory_size())
.unwrap_or(0),
)
.saturating_add(
text.as_ref()
.map(|a| a.get_array_memory_size())
.unwrap_or(0),
)
.saturating_add(
errors
.as_ref()
.map(|a| a.get_array_memory_size())
.unwrap_or(0),
);
Self {
type_tags,
numbers,
booleans,
text,
errors,
estimated_bytes,
}
}
fn overlay_value(&self, idx: usize) -> Option<OverlayValue> {
if idx >= self.type_tags.len() || self.type_tags.is_null(idx) {
return None;
}
match TypeTag::from_u8(self.type_tags.value(idx)) {
TypeTag::Empty => Some(OverlayValue::Empty),
TypeTag::Number => Some(OverlayValue::Number(self.number_at(idx)?)),
TypeTag::DateTime => Some(OverlayValue::DateTime(self.number_at(idx)?)),
TypeTag::Duration => Some(OverlayValue::Duration(self.number_at(idx)?)),
TypeTag::Boolean => Some(OverlayValue::Boolean(self.boolean_at(idx)?)),
TypeTag::Text => Some(OverlayValue::Text(Arc::from(self.text_at(idx)?))),
TypeTag::Error => Some(OverlayValue::Error(self.error_at(idx)?)),
TypeTag::Pending => Some(OverlayValue::Pending),
}
}
#[inline]
fn get_scalar(&self, idx: usize) -> Option<OverlayScalar<'_>> {
self.overlay_value(idx).map(OverlayScalar::Owned)
}
#[inline]
fn number_at(&self, idx: usize) -> Option<f64> {
let arr = self.numbers.as_ref()?;
(!arr.is_null(idx)).then(|| arr.value(idx))
}
#[inline]
fn boolean_at(&self, idx: usize) -> Option<bool> {
let arr = self.booleans.as_ref()?;
(!arr.is_null(idx)).then(|| arr.value(idx))
}
#[inline]
fn text_at(&self, idx: usize) -> Option<&str> {
let arr = self.text.as_ref()?;
let arr = arr.as_any().downcast_ref::<StringArray>()?;
(!arr.is_null(idx)).then(|| arr.value(idx))
}
#[inline]
fn error_at(&self, idx: usize) -> Option<u8> {
let arr = self.errors.as_ref()?;
(!arr.is_null(idx)).then(|| arr.value(idx))
}
#[inline]
fn values_slice(&self, start: usize, len: usize) -> Vec<OverlayValue> {
(start..start.saturating_add(len))
.filter_map(|idx| self.overlay_value(idx))
.collect()
}
#[inline]
fn estimated_bytes(&self) -> usize {
self.estimated_bytes
}
}
#[derive(Debug, Clone)]
pub(crate) enum OverlayFragment {
SparseOffsets {
offsets: Vec<u32>,
payload: OverlayFragmentPayload,
},
DenseRange {
start: u32,
len: u32,
payload: OverlayFragmentPayload,
},
RunRange {
start: u32,
len: u32,
run_ends: Vec<u32>,
payload: OverlayFragmentPayload,
},
}
impl OverlayFragment {
const MAX_SPLIT_SEGMENTS_BEFORE_SPARSE_FALLBACK: usize = 128;
pub(crate) fn sparse_offsets(items: Vec<(usize, OverlayValue)>) -> Option<Self> {
let mut by_offset: BTreeMap<usize, OverlayValue> = BTreeMap::new();
for (offset, value) in items {
by_offset.insert(offset, value);
}
if by_offset.is_empty() {
return None;
}
let mut offsets = Vec::with_capacity(by_offset.len());
let mut values = Vec::with_capacity(by_offset.len());
for (offset, value) in by_offset {
offsets.push(u32::try_from(offset).expect("overlay offset fits in u32"));
values.push(value);
}
Some(Self::SparseOffsets {
offsets,
payload: OverlayFragmentPayload::from_values(values),
})
}
pub(crate) fn sparse_offsets_if_estimated_smaller_than_points(
items: Vec<(usize, OverlayValue)>,
point_estimate: usize,
) -> Option<Result<Self, Vec<(usize, OverlayValue)>>> {
let fragment = Self::sparse_offsets(items)?;
if fragment.estimated_bytes() < point_estimate {
Some(Ok(fragment))
} else {
Some(Err(fragment.cells()))
}
}
pub(crate) fn dense_range(start: usize, values: Vec<OverlayValue>) -> Option<Self> {
let len = values.len();
if len == 0 {
return None;
}
Some(Self::DenseRange {
start: u32::try_from(start).expect("overlay start fits in u32"),
len: u32::try_from(len).expect("overlay length fits in u32"),
payload: OverlayFragmentPayload::from_values(values),
})
}
pub(crate) fn run_range(start: usize, values: Vec<OverlayValue>) -> Option<Self> {
if values.is_empty() {
return None;
}
let mut run_ends = Vec::new();
let mut run_values = Vec::new();
let mut current = values[0].clone();
for (idx, value) in values.iter().enumerate().skip(1) {
if *value != current {
run_ends.push(idx);
run_values.push(current);
current = value.clone();
}
}
run_ends.push(values.len());
run_values.push(current);
Self::run_range_from_parts(start, values.len(), run_ends, run_values)
}
fn run_range_from_parts(
start: usize,
len: usize,
run_ends: Vec<usize>,
values: Vec<OverlayValue>,
) -> Option<Self> {
if len == 0 || run_ends.is_empty() || run_ends.len() != values.len() {
return None;
}
let mut merged_ends: Vec<u32> = Vec::with_capacity(run_ends.len());
let mut merged_values: Vec<OverlayValue> = Vec::with_capacity(values.len());
let mut prev_end = 0usize;
for (end, value) in run_ends.into_iter().zip(values.into_iter()) {
if end <= prev_end || end > len {
return None;
}
if merged_values.last().is_some_and(|last| *last == value) {
if let Some(last_end) = merged_ends.last_mut() {
*last_end = u32::try_from(end).expect("run end fits in u32");
}
} else {
merged_ends.push(u32::try_from(end).expect("run end fits in u32"));
merged_values.push(value);
}
prev_end = end;
}
if prev_end != len || merged_ends.last().copied() != Some(len as u32) {
return None;
}
Some(Self::RunRange {
start: u32::try_from(start).expect("overlay start fits in u32"),
len: u32::try_from(len).expect("overlay length fits in u32"),
run_ends: merged_ends,
payload: OverlayFragmentPayload::from_values(merged_values),
})
}
#[inline]
fn estimated_bytes(&self) -> usize {
match self {
OverlayFragment::SparseOffsets { offsets, payload } => OVERLAY_FRAGMENT_BASE_BYTES
.saturating_add(offsets.len().saturating_mul(core::mem::size_of::<u32>()))
.saturating_add(payload.estimated_bytes()),
OverlayFragment::DenseRange { payload, .. } => {
OVERLAY_FRAGMENT_BASE_BYTES.saturating_add(payload.estimated_bytes())
}
OverlayFragment::RunRange {
run_ends, payload, ..
} => OVERLAY_FRAGMENT_BASE_BYTES
.saturating_add(run_ends.len().saturating_mul(core::mem::size_of::<u32>()))
.saturating_add(payload.estimated_bytes()),
}
}
#[inline]
fn coverage_len(&self) -> usize {
match self {
OverlayFragment::SparseOffsets { offsets, .. } => offsets.len(),
OverlayFragment::DenseRange { len, .. } | OverlayFragment::RunRange { len, .. } => {
*len as usize
}
}
}
pub(crate) fn max_covered_offset(&self) -> usize {
match self {
OverlayFragment::SparseOffsets { offsets, .. } => {
offsets.iter().copied().max().unwrap_or(0) as usize
}
OverlayFragment::DenseRange { start, len, .. }
| OverlayFragment::RunRange { start, len, .. } => (*start as usize)
.saturating_add(*len as usize)
.saturating_sub(1),
}
}
fn interval_coverage(&self) -> Option<core::ops::Range<usize>> {
match self {
OverlayFragment::DenseRange { start, len, .. }
| OverlayFragment::RunRange { start, len, .. } => {
let start = *start as usize;
Some(start..start.saturating_add(*len as usize))
}
OverlayFragment::SparseOffsets { .. } => None,
}
}
fn sparse_offsets_slice(&self) -> Option<&[u32]> {
match self {
OverlayFragment::SparseOffsets { offsets, .. } => Some(offsets.as_slice()),
_ => None,
}
}
fn has_any_in_range(&self, range: core::ops::Range<usize>) -> bool {
if range.is_empty() {
return false;
}
match self {
OverlayFragment::SparseOffsets { offsets, .. } => {
let start = u32::try_from(range.start).unwrap_or(u32::MAX);
let idx = offsets.partition_point(|off| *off < start);
offsets
.get(idx)
.is_some_and(|off| (*off as usize) < range.end)
}
OverlayFragment::DenseRange { .. } | OverlayFragment::RunRange { .. } => self
.interval_coverage()
.is_some_and(|r| r.start < range.end && range.start < r.end),
}
}
fn intersects_fragment_exact(&self, replacement: &OverlayFragment) -> bool {
if let Some(offsets) = replacement.sparse_offsets_slice() {
self.intersects_sparse_offsets(offsets)
} else if let Some(range) = replacement.interval_coverage() {
self.intersects_interval(range)
} else {
false
}
}
fn intersects_interval(&self, range: core::ops::Range<usize>) -> bool {
if range.is_empty() {
return false;
}
match self {
OverlayFragment::SparseOffsets { offsets, .. } => {
let start = u32::try_from(range.start).unwrap_or(u32::MAX);
let idx = offsets.partition_point(|off| *off < start);
offsets
.get(idx)
.is_some_and(|off| (*off as usize) < range.end)
}
OverlayFragment::DenseRange { .. } | OverlayFragment::RunRange { .. } => self
.interval_coverage()
.is_some_and(|own| own.start < range.end && range.start < own.end),
}
}
fn intersects_sparse_offsets(&self, replacement_offsets: &[u32]) -> bool {
if replacement_offsets.is_empty() {
return false;
}
match self {
OverlayFragment::SparseOffsets { offsets, .. } => {
Self::sorted_offsets_intersect(offsets, replacement_offsets)
}
OverlayFragment::DenseRange { .. } | OverlayFragment::RunRange { .. } => {
self.interval_coverage().is_some_and(|range| {
let start = u32::try_from(range.start).unwrap_or(u32::MAX);
let idx = replacement_offsets.partition_point(|off| *off < start);
replacement_offsets
.get(idx)
.is_some_and(|off| (*off as usize) < range.end)
})
}
}
}
fn sorted_offsets_intersect(a: &[u32], b: &[u32]) -> bool {
let mut ai = 0usize;
let mut bi = 0usize;
while ai < a.len() && bi < b.len() {
match a[ai].cmp(&b[bi]) {
core::cmp::Ordering::Equal => return true,
core::cmp::Ordering::Less => ai += 1,
core::cmp::Ordering::Greater => bi += 1,
}
}
false
}
fn covers_offset(&self, off: usize) -> bool {
self.get_scalar(off).is_some()
}
fn get_scalar(&self, off: usize) -> Option<OverlayScalar<'_>> {
match self {
OverlayFragment::SparseOffsets { offsets, payload } => {
let off = u32::try_from(off).ok()?;
let idx = offsets.binary_search(&off).ok()?;
payload.get_scalar(idx)
}
OverlayFragment::DenseRange {
start,
len,
payload,
} => {
let start = *start as usize;
let rel = off.checked_sub(start)?;
if rel >= *len as usize {
return None;
}
payload.get_scalar(rel)
}
OverlayFragment::RunRange {
start,
len,
run_ends,
payload,
} => {
let start = *start as usize;
let rel = off.checked_sub(start)?;
if rel >= *len as usize {
return None;
}
let rel_u32 = u32::try_from(rel).ok()?;
let run_idx = run_ends.partition_point(|end| *end <= rel_u32);
payload.get_scalar(run_idx)
}
}
}
fn subtract_fragment(&self, replacement: &OverlayFragment) -> Vec<OverlayFragment> {
if let Some(offsets) = replacement.sparse_offsets_slice() {
self.subtract_sparse_offsets(offsets)
} else if let Some(range) = replacement.interval_coverage() {
self.subtract_interval(range)
} else {
vec![self.clone()]
}
}
fn subtract_offset(&self, off: usize) -> Vec<OverlayFragment> {
match self {
OverlayFragment::SparseOffsets { .. } => {
let Ok(off) = u32::try_from(off) else {
return vec![self.clone()];
};
self.subtract_sparse_offsets(core::slice::from_ref(&off))
}
OverlayFragment::DenseRange { .. } | OverlayFragment::RunRange { .. } => {
self.subtract_interval(off..off.saturating_add(1))
}
}
}
fn subtract_interval(&self, replacement: core::ops::Range<usize>) -> Vec<OverlayFragment> {
if replacement.is_empty() {
return vec![self.clone()];
}
match self {
OverlayFragment::SparseOffsets { offsets, payload } => {
let cells: Vec<_> = offsets
.iter()
.enumerate()
.filter_map(|(idx, off)| {
let off_usize = *off as usize;
(!replacement.contains(&off_usize))
.then(|| payload.overlay_value(idx).map(|value| (off_usize, value)))?
})
.collect();
OverlayFragment::sparse_offsets(cells).into_iter().collect()
}
OverlayFragment::DenseRange { .. } => {
let Some(own) = self.interval_coverage() else {
return vec![self.clone()];
};
if own.end <= replacement.start || replacement.end <= own.start {
return vec![self.clone()];
}
let cut_start = replacement.start.max(own.start);
let cut_end = replacement.end.min(own.end);
let mut out = Vec::with_capacity(2);
if own.start < cut_start
&& let Some(left) =
self.dense_segment_with_start(own.start, own.start, cut_start)
{
out.push(left);
}
if cut_end < own.end
&& let Some(right) = self.dense_segment_with_start(cut_end, cut_end, own.end)
{
out.push(right);
}
out
}
OverlayFragment::RunRange { .. } => {
let Some(own) = self.interval_coverage() else {
return vec![self.clone()];
};
if own.end <= replacement.start || replacement.end <= own.start {
return vec![self.clone()];
}
let cut_start = replacement.start.max(own.start);
let cut_end = replacement.end.min(own.end);
let mut out = Vec::with_capacity(2);
if own.start < cut_start
&& let Some(left) = self.run_segment_with_start(own.start, own.start, cut_start)
{
out.push(left);
}
if cut_end < own.end
&& let Some(right) = self.run_segment_with_start(cut_end, cut_end, own.end)
{
out.push(right);
}
out
}
}
}
fn subtract_sparse_offsets(&self, replacement_offsets: &[u32]) -> Vec<OverlayFragment> {
if replacement_offsets.is_empty() {
return vec![self.clone()];
}
match self {
OverlayFragment::SparseOffsets { offsets, payload } => {
let cells: Vec<_> = offsets
.iter()
.enumerate()
.filter_map(|(idx, off)| {
replacement_offsets.binary_search(off).is_err().then(|| {
payload
.overlay_value(idx)
.map(|value| (*off as usize, value))
})?
})
.collect();
OverlayFragment::sparse_offsets(cells).into_iter().collect()
}
OverlayFragment::DenseRange { .. } => {
self.subtract_sparse_offsets_from_dense(replacement_offsets)
}
OverlayFragment::RunRange { .. } => {
self.subtract_sparse_offsets_from_run(replacement_offsets)
}
}
}
fn sparse_holes_in_interval(offsets: &[u32], range: core::ops::Range<usize>) -> Vec<usize> {
if range.is_empty() {
return Vec::new();
}
let start = u32::try_from(range.start).unwrap_or(u32::MAX);
let mut idx = offsets.partition_point(|off| *off < start);
let mut holes = Vec::new();
let mut last = None;
while let Some(off) = offsets.get(idx).copied() {
let off_usize = off as usize;
if off_usize >= range.end {
break;
}
if last != Some(off_usize) {
holes.push(off_usize);
last = Some(off_usize);
}
idx += 1;
}
holes
}
fn subtract_sparse_offsets_from_dense(
&self,
replacement_offsets: &[u32],
) -> Vec<OverlayFragment> {
let Some(own) = self.interval_coverage() else {
return vec![self.clone()];
};
let holes = Self::sparse_holes_in_interval(replacement_offsets, own.clone());
if holes.is_empty() {
return vec![self.clone()];
}
if holes.len().saturating_add(1) > Self::MAX_SPLIT_SEGMENTS_BEFORE_SPARSE_FALLBACK {
return self.sparse_remainder_excluding_offsets(&holes);
}
let mut out = Vec::with_capacity(holes.len().saturating_add(1));
let mut seg_start = own.start;
for hole in holes {
if seg_start < hole
&& let Some(segment) = self.dense_segment_with_start(seg_start, seg_start, hole)
{
out.push(segment);
}
seg_start = hole.saturating_add(1);
}
if seg_start < own.end
&& let Some(segment) = self.dense_segment_with_start(seg_start, seg_start, own.end)
{
out.push(segment);
}
out
}
fn subtract_sparse_offsets_from_run(
&self,
replacement_offsets: &[u32],
) -> Vec<OverlayFragment> {
let Some(own) = self.interval_coverage() else {
return vec![self.clone()];
};
let holes = Self::sparse_holes_in_interval(replacement_offsets, own.clone());
if holes.is_empty() {
return vec![self.clone()];
}
if holes.len().saturating_add(1) > Self::MAX_SPLIT_SEGMENTS_BEFORE_SPARSE_FALLBACK {
return self.sparse_remainder_excluding_offsets(&holes);
}
let mut out = Vec::with_capacity(holes.len().saturating_add(1));
let mut seg_start = own.start;
for hole in holes {
if seg_start < hole
&& let Some(segment) = self.run_segment_with_start(seg_start, seg_start, hole)
{
out.push(segment);
}
seg_start = hole.saturating_add(1);
}
if seg_start < own.end
&& let Some(segment) = self.run_segment_with_start(seg_start, seg_start, own.end)
{
out.push(segment);
}
out
}
fn sparse_remainder_excluding_offsets(&self, sorted_holes: &[usize]) -> Vec<OverlayFragment> {
let cells: Vec<_> = self
.cells()
.into_iter()
.filter(|(off, _)| sorted_holes.binary_search(off).is_err())
.collect();
OverlayFragment::sparse_offsets(cells).into_iter().collect()
}
fn dense_segment_with_start(
&self,
new_start: usize,
abs_start: usize,
abs_end: usize,
) -> Option<OverlayFragment> {
match self {
OverlayFragment::DenseRange { start, payload, .. } => {
if abs_start >= abs_end {
return None;
}
let base = *start as usize;
let rel_start = abs_start.checked_sub(base)?;
let len = abs_end.saturating_sub(abs_start);
OverlayFragment::dense_range(new_start, payload.values_slice(rel_start, len))
}
_ => None,
}
}
fn run_segment_with_start(
&self,
new_start: usize,
abs_start: usize,
abs_end: usize,
) -> Option<OverlayFragment> {
let OverlayFragment::RunRange {
start,
len,
run_ends,
payload,
} = self
else {
return None;
};
if abs_start >= abs_end {
return None;
}
let base = *start as usize;
let frag_end = base.saturating_add(*len as usize);
if abs_start < base || abs_end > frag_end {
return None;
}
let rel_start = abs_start - base;
let rel_end = abs_end - base;
let mut new_run_ends = Vec::new();
let mut new_values = Vec::new();
let mut prev_end = 0usize;
for (run_idx, end) in run_ends.iter().enumerate() {
let run_start = prev_end;
let run_end = *end as usize;
let inter_start = run_start.max(rel_start);
let inter_end = run_end.min(rel_end);
if inter_start < inter_end {
new_run_ends.push(inter_end - rel_start);
if let Some(value) = payload.overlay_value(run_idx) {
new_values.push(value);
}
}
prev_end = run_end;
if prev_end >= rel_end {
break;
}
}
OverlayFragment::run_range_from_parts(
new_start,
abs_end.saturating_sub(abs_start),
new_run_ends,
new_values,
)
}
fn cells(&self) -> Vec<(usize, OverlayValue)> {
match self {
OverlayFragment::SparseOffsets { offsets, payload } => offsets
.iter()
.enumerate()
.filter_map(|(idx, off)| {
payload
.overlay_value(idx)
.map(|value| (*off as usize, value))
})
.collect(),
OverlayFragment::DenseRange {
start,
len,
payload,
} => {
let start = *start as usize;
(0..*len as usize)
.filter_map(|idx| {
payload
.overlay_value(idx)
.map(|value| (start.saturating_add(idx), value))
})
.collect()
}
OverlayFragment::RunRange { start, len, .. } => {
let start = *start as usize;
(0..*len as usize)
.filter_map(|idx| {
self.get_scalar(start.saturating_add(idx))
.map(|value| (start.saturating_add(idx), value.to_overlay_value()))
})
.collect()
}
}
}
fn slice(&self, off: usize, len: usize) -> Option<OverlayFragment> {
let end = off.saturating_add(len);
if len == 0 {
return None;
}
match self {
OverlayFragment::SparseOffsets { offsets, payload } => {
let start = u32::try_from(off).unwrap_or(u32::MAX);
let lo = offsets.partition_point(|candidate| *candidate < start);
let hi = offsets.partition_point(|candidate| (*candidate as usize) < end);
let cells: Vec<_> = (lo..hi)
.filter_map(|idx| {
let rebased = (offsets[idx] as usize).saturating_sub(off);
payload.overlay_value(idx).map(|value| (rebased, value))
})
.collect();
OverlayFragment::sparse_offsets(cells)
}
OverlayFragment::DenseRange { .. } => {
let own = self.interval_coverage()?;
let seg_start = own.start.max(off);
let seg_end = own.end.min(end);
if seg_start >= seg_end {
return None;
}
self.dense_segment_with_start(seg_start - off, seg_start, seg_end)
}
OverlayFragment::RunRange { .. } => {
let own = self.interval_coverage()?;
let seg_start = own.start.max(off);
let seg_end = own.end.min(end);
if seg_start >= seg_end {
return None;
}
self.run_segment_with_start(seg_start - off, seg_start, seg_end)
}
}
}
}
#[derive(Debug, Default, Clone)]
pub struct Overlay {
points: HashMap<usize, OverlayValue>,
fragments: Vec<OverlayFragment>,
estimated_bytes: usize,
}
impl Overlay {
const ENTRY_BASE_BYTES: usize = OVERLAY_ENTRY_BASE_BYTES;
pub fn new() -> Self {
Self {
points: HashMap::new(),
fragments: Vec::new(),
estimated_bytes: 0,
}
}
#[inline]
fn point_estimate(v: &OverlayValue) -> usize {
Self::ENTRY_BASE_BYTES + v.estimated_payload_bytes()
}
#[inline]
fn adjust_estimated_bytes(&mut self, delta: isize) {
if delta >= 0 {
self.estimated_bytes = self.estimated_bytes.saturating_add(delta as usize);
} else {
self.estimated_bytes = self.estimated_bytes.saturating_sub((-delta) as usize);
}
}
#[inline]
pub(crate) fn get_scalar(&self, off: usize) -> Option<OverlayScalar<'_>> {
self.points
.get(&off)
.map(OverlayScalar::Borrowed)
.or_else(|| self.fragments.iter().rev().find_map(|f| f.get_scalar(off)))
}
#[inline]
pub fn get(&self, off: usize) -> Option<OverlayValue> {
self.get_scalar(off).map(|value| value.to_overlay_value())
}
#[inline]
pub(crate) fn set_scalar(&mut self, off: usize, v: OverlayValue) -> isize {
let removed = self.remove_scalar(off);
let new_est = Self::point_estimate(&v);
self.points.insert(off, v);
self.adjust_estimated_bytes(new_est as isize);
removed.saturating_add(new_est as isize)
}
#[inline]
pub fn set(&mut self, off: usize, v: OverlayValue) -> isize {
self.set_scalar(off, v)
}
pub(crate) fn apply_fragment(&mut self, fragment: OverlayFragment) -> isize {
let mut delta = self.remove_points_covered_by_fragment(&fragment);
delta = delta.saturating_add(self.remove_fragments_covered_by_fragment(&fragment));
let fragment_est = fragment.estimated_bytes();
self.fragments.push(fragment);
self.adjust_estimated_bytes(fragment_est as isize);
delta.saturating_add(fragment_est as isize)
}
fn remove_points_covered_by_fragment(&mut self, fragment: &OverlayFragment) -> isize {
let mut removed = 0usize;
match fragment {
OverlayFragment::SparseOffsets { offsets, .. } => {
for off in offsets.iter().copied() {
if let Some(old) = self.points.remove(&(off as usize)) {
removed = removed.saturating_add(Self::point_estimate(&old));
}
}
}
OverlayFragment::DenseRange { .. } | OverlayFragment::RunRange { .. } => {
if let Some(range) = fragment.interval_coverage() {
let keys: Vec<_> = self
.points
.keys()
.copied()
.filter(|off| range.contains(off))
.collect();
for off in keys {
if let Some(old) = self.points.remove(&off) {
removed = removed.saturating_add(Self::point_estimate(&old));
}
}
}
}
}
self.estimated_bytes = self.estimated_bytes.saturating_sub(removed);
-(removed as isize)
}
fn remove_fragments_covered_by_fragment(&mut self, replacement: &OverlayFragment) -> isize {
if self.fragments.is_empty() {
return 0;
}
let mut delta: isize = 0;
let mut fragments = Vec::with_capacity(self.fragments.len());
for fragment in self.fragments.drain(..) {
if !fragment.intersects_fragment_exact(replacement) {
fragments.push(fragment);
continue;
}
let old_est = fragment.estimated_bytes();
let replacements = fragment.subtract_fragment(replacement);
let new_est = replacements
.iter()
.map(OverlayFragment::estimated_bytes)
.fold(0usize, usize::saturating_add);
fragments.extend(replacements);
delta = delta.saturating_add(new_est as isize - old_est as isize);
}
self.fragments = fragments;
self.adjust_estimated_bytes(delta);
delta
}
#[inline]
pub(crate) fn remove_scalar(&mut self, off: usize) -> isize {
let mut delta = 0isize;
if let Some(old) = self.points.remove(&off) {
let old_est = Self::point_estimate(&old);
self.estimated_bytes = self.estimated_bytes.saturating_sub(old_est);
delta = delta.saturating_sub(old_est as isize);
}
if !self.fragments.is_empty() {
let mut fragments = Vec::with_capacity(self.fragments.len());
for fragment in self.fragments.drain(..) {
if fragment.get_scalar(off).is_none() {
fragments.push(fragment);
continue;
}
let old_est = fragment.estimated_bytes();
let replacements = fragment.subtract_offset(off);
let new_est = replacements
.iter()
.map(OverlayFragment::estimated_bytes)
.fold(0usize, usize::saturating_add);
fragments.extend(replacements);
delta = delta.saturating_add(new_est as isize - old_est as isize);
}
self.fragments = fragments;
self.adjust_estimated_bytes(delta);
}
delta
}
#[inline]
pub fn remove(&mut self, off: usize) -> isize {
self.remove_scalar(off)
}
pub(crate) fn remove_range(&mut self, range: core::ops::Range<usize>) -> isize {
if range.is_empty() {
return 0;
}
let mut delta = 0isize;
let removed_points: Vec<_> = self
.points
.keys()
.copied()
.filter(|off| range.contains(off))
.collect();
for off in removed_points {
if let Some(old) = self.points.remove(&off) {
let old_est = Self::point_estimate(&old);
self.estimated_bytes = self.estimated_bytes.saturating_sub(old_est);
delta = delta.saturating_sub(old_est as isize);
}
}
if !self.fragments.is_empty() {
let mut fragment_delta = 0isize;
let mut fragments = Vec::with_capacity(self.fragments.len());
for fragment in self.fragments.drain(..) {
let old_est = fragment.estimated_bytes();
let replacements = fragment.subtract_interval(range.clone());
let new_est = replacements
.iter()
.map(OverlayFragment::estimated_bytes)
.fold(0usize, usize::saturating_add);
fragments.extend(replacements);
fragment_delta = fragment_delta.saturating_add(new_est as isize - old_est as isize);
}
self.fragments = fragments;
self.adjust_estimated_bytes(fragment_delta);
delta = delta.saturating_add(fragment_delta);
}
delta
}
#[inline]
pub(crate) fn clear_all(&mut self) -> usize {
let freed = self.estimated_bytes;
self.points.clear();
self.fragments.clear();
self.estimated_bytes = 0;
freed
}
#[inline]
pub fn clear(&mut self) -> usize {
self.clear_all()
}
#[inline]
pub fn len(&self) -> usize {
self.points.len().saturating_add(
self.fragments
.iter()
.map(OverlayFragment::coverage_len)
.sum(),
)
}
#[inline]
pub fn estimated_bytes(&self) -> usize {
self.estimated_bytes
}
#[inline]
pub fn is_empty(&self) -> bool {
self.points.is_empty() && self.fragments.is_empty()
}
#[inline]
pub(crate) fn has_any_in_range(&self, range: core::ops::Range<usize>) -> bool {
self.points.keys().any(|k| range.contains(k))
|| self
.fragments
.iter()
.any(|fragment| fragment.has_any_in_range(range.clone()))
}
#[inline]
pub fn any_in_range(&self, range: core::ops::Range<usize>) -> bool {
self.has_any_in_range(range)
}
pub(crate) fn slice(&self, off: usize, len: usize) -> Overlay {
let mut out = Overlay::new();
let end = off.saturating_add(len);
for fragment in &self.fragments {
if let Some(sliced) = fragment.slice(off, len) {
let _ = out.apply_fragment(sliced);
}
}
for (k, v) in self.points.iter() {
if *k >= off && *k < end {
let _ = out.set_scalar(*k - off, v.clone());
}
}
out
}
pub fn iter(&self) -> impl Iterator<Item = (usize, OverlayValue)> {
let mut cells = BTreeMap::new();
for fragment in &self.fragments {
for (off, value) in fragment.cells() {
cells.insert(off, value);
}
}
for (off, value) in &self.points {
cells.insert(*off, value.clone());
}
cells.into_iter()
}
pub(crate) fn iter_points(&self) -> impl Iterator<Item = (&usize, &OverlayValue)> {
self.points.iter()
}
}
#[cfg(test)]
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)]
pub(crate) struct OverlayDebugStats {
pub(crate) points: usize,
pub(crate) sparse_fragments: usize,
pub(crate) dense_fragments: usize,
pub(crate) run_fragments: usize,
pub(crate) covered_len: usize,
}
#[cfg(test)]
impl Overlay {
pub(crate) fn debug_stats(&self) -> OverlayDebugStats {
let mut stats = OverlayDebugStats {
points: self.points.len(),
covered_len: self.len(),
..OverlayDebugStats::default()
};
for fragment in &self.fragments {
match fragment {
OverlayFragment::SparseOffsets { .. } => stats.sparse_fragments += 1,
OverlayFragment::DenseRange { .. } => stats.dense_fragments += 1,
OverlayFragment::RunRange { .. } => stats.run_fragments += 1,
}
}
stats
}
pub(crate) fn debug_is_normalized(&self) -> bool {
let mut covered = std::collections::HashSet::new();
for off in self.points.keys().copied() {
if !covered.insert(off) {
return false;
}
}
for fragment in &self.fragments {
for (off, _) in fragment.cells() {
if !covered.insert(off) {
return false;
}
}
}
covered.len() == self.len()
}
pub(crate) fn debug_recomputed_estimated_bytes(&self) -> usize {
let point_bytes = self
.points
.values()
.map(Self::point_estimate)
.fold(0usize, usize::saturating_add);
let fragment_bytes = self
.fragments
.iter()
.map(OverlayFragment::estimated_bytes)
.fold(0usize, usize::saturating_add);
point_bytes.saturating_add(fragment_bytes)
}
}
#[derive(Debug, Clone, Copy, Default)]
#[cfg_attr(test, derive(serde::Serialize))]
pub(crate) struct OverlaySelectStats {
pub(crate) zip_select_calls: usize,
pub(crate) direct_dense_slices: usize,
pub(crate) direct_run_materializations: usize,
pub(crate) partial_sparse_intersections: usize,
pub(crate) partial_dense_intersections: usize,
pub(crate) partial_run_intersections: usize,
pub(crate) partial_overlay_builds: usize,
pub(crate) row_scalar_fallbacks: usize,
pub(crate) point_entries_applied: usize,
pub(crate) fragment_intersections: usize,
}
#[cfg(test)]
thread_local! {
static OVERLAY_SELECT_STATS: std::cell::RefCell<OverlaySelectStats> =
std::cell::RefCell::new(OverlaySelectStats::default());
}
#[cfg(test)]
pub(crate) fn reset_overlay_select_stats() {
OVERLAY_SELECT_STATS.with(|stats| *stats.borrow_mut() = OverlaySelectStats::default());
}
#[cfg(test)]
pub(crate) fn snapshot_overlay_select_stats() -> OverlaySelectStats {
OVERLAY_SELECT_STATS.with(|stats| *stats.borrow())
}
#[cfg(test)]
fn record_overlay_select_stats(f: impl FnOnce(&mut OverlaySelectStats)) {
OVERLAY_SELECT_STATS.with(|stats| f(&mut stats.borrow_mut()));
}
#[cfg(not(test))]
#[inline]
fn record_overlay_select_stats(_f: impl FnOnce(&mut OverlaySelectStats)) {}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
enum OverlayFragmentShape {
Sparse,
Dense,
Run,
}
struct OverlaySlots<T> {
present: Vec<bool>,
values: Vec<Option<T>>,
any_present: bool,
}
impl<T> OverlaySlots<T> {
fn new(len: usize) -> Self {
Self {
present: vec![false; len],
values: (0..len).map(|_| None).collect(),
any_present: false,
}
}
#[inline]
fn set(&mut self, idx: usize, value: Option<T>) {
if idx >= self.present.len() {
return;
}
self.present[idx] = true;
self.values[idx] = value;
self.any_present = true;
}
#[inline]
fn any_present(&self) -> bool {
self.any_present
}
}
pub(crate) struct OverlayCascade<'a> {
user: &'a Overlay,
computed: &'a Overlay,
}
impl<'a> OverlayCascade<'a> {
#[inline]
pub(crate) fn new(user: &'a Overlay, computed: &'a Overlay) -> Self {
Self { user, computed }
}
#[inline]
pub(crate) fn get_scalar(&self, off: usize) -> Option<OverlayScalar<'a>> {
self.user
.get_scalar(off)
.or_else(|| self.computed.get_scalar(off))
}
#[inline]
pub(crate) fn has_any_in_range(&self, range: core::ops::Range<usize>) -> bool {
self.user.has_any_in_range(range.clone()) || self.computed.has_any_in_range(range)
}
pub(crate) fn select_numbers(
&self,
range: core::ops::Range<usize>,
base: &Float64Array,
) -> Arc<Float64Array> {
if let Some(fragment) = self.user.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_numbers(fragment, range);
}
if let Some(fragment) = self.user.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_numbers(fragment, range);
}
if !self.user.has_any_in_range(range.clone()) {
if let Some(fragment) = self.computed.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_numbers(fragment, range);
}
if let Some(fragment) = self.computed.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_numbers(fragment, range);
}
}
if !self.has_any_in_range(range.clone()) {
return Arc::new(base.clone());
}
record_overlay_select_stats(|stats| stats.partial_overlay_builds += 1);
let len = range.end.saturating_sub(range.start);
let mut slots = OverlaySlots::<f64>::new(len);
Self::apply_number_layer(self.computed, range.clone(), &mut slots);
Self::apply_number_layer(self.user, range.clone(), &mut slots);
if !slots.any_present() {
return Arc::new(base.clone());
}
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = Float64Builder::with_capacity(len);
for idx in 0..len {
mask_b.append_value(slots.present[idx]);
match slots.values[idx] {
Some(value) => values_b.append_value(value),
None => values_b.append_null(),
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
let zipped =
crate::compute_prelude::zip_select(&mask, &values, base).expect("zip numeric overlay");
Arc::new(
zipped
.as_any()
.downcast_ref::<Float64Array>()
.expect("numeric overlay zip type")
.clone(),
)
}
pub(crate) fn select_booleans(
&self,
range: core::ops::Range<usize>,
base: &BooleanArray,
) -> Arc<BooleanArray> {
if let Some(fragment) = self.user.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_booleans(fragment, range);
}
if let Some(fragment) = self.user.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_booleans(fragment, range);
}
if !self.user.has_any_in_range(range.clone()) {
if let Some(fragment) = self.computed.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_booleans(fragment, range);
}
if let Some(fragment) = self.computed.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_booleans(fragment, range);
}
}
if !self.has_any_in_range(range.clone()) {
return Arc::new(base.clone());
}
record_overlay_select_stats(|stats| stats.partial_overlay_builds += 1);
let len = range.end.saturating_sub(range.start);
let mut slots = OverlaySlots::<bool>::new(len);
Self::apply_boolean_layer(self.computed, range.clone(), &mut slots);
Self::apply_boolean_layer(self.user, range.clone(), &mut slots);
if !slots.any_present() {
return Arc::new(base.clone());
}
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = BooleanBuilder::with_capacity(len);
for idx in 0..len {
mask_b.append_value(slots.present[idx]);
match slots.values[idx] {
Some(value) => values_b.append_value(value),
None => values_b.append_null(),
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
let zipped =
crate::compute_prelude::zip_select(&mask, &values, base).expect("zip boolean overlay");
Arc::new(
zipped
.as_any()
.downcast_ref::<BooleanArray>()
.expect("boolean overlay zip type")
.clone(),
)
}
pub(crate) fn select_text(
&self,
range: core::ops::Range<usize>,
base: &StringArray,
) -> ArrayRef {
if let Some(fragment) = self.user.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_text(fragment, range);
}
if let Some(fragment) = self.user.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_text(fragment, range);
}
if !self.user.has_any_in_range(range.clone()) {
if let Some(fragment) = self.computed.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_text(fragment, range);
}
if let Some(fragment) = self.computed.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_text(fragment, range);
}
}
if !self.has_any_in_range(range.clone()) {
return Arc::new(base.clone()) as ArrayRef;
}
record_overlay_select_stats(|stats| stats.partial_overlay_builds += 1);
let len = range.end.saturating_sub(range.start);
let mut slots = OverlaySlots::<String>::new(len);
Self::apply_text_layer(self.computed, range.clone(), &mut slots);
Self::apply_text_layer(self.user, range.clone(), &mut slots);
if !slots.any_present() {
return Arc::new(base.clone()) as ArrayRef;
}
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = StringBuilder::with_capacity(len, len.saturating_mul(8));
for idx in 0..len {
mask_b.append_value(slots.present[idx]);
match &slots.values[idx] {
Some(value) => values_b.append_value(value),
None => values_b.append_null(),
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
crate::compute_prelude::zip_select(&mask, &values, base).expect("zip text overlay")
}
pub(crate) fn select_errors(
&self,
range: core::ops::Range<usize>,
base: &UInt8Array,
) -> Arc<UInt8Array> {
if let Some(fragment) = self.user.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_errors(fragment, range);
}
if let Some(fragment) = self.user.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_errors(fragment, range);
}
if !self.user.has_any_in_range(range.clone()) {
if let Some(fragment) = self.computed.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_errors(fragment, range);
}
if let Some(fragment) = self.computed.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_errors(fragment, range);
}
}
if !self.has_any_in_range(range.clone()) {
return Arc::new(base.clone());
}
record_overlay_select_stats(|stats| stats.partial_overlay_builds += 1);
let len = range.end.saturating_sub(range.start);
let mut slots = OverlaySlots::<u8>::new(len);
Self::apply_error_layer(self.computed, range.clone(), &mut slots);
Self::apply_error_layer(self.user, range.clone(), &mut slots);
if !slots.any_present() {
return Arc::new(base.clone());
}
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = UInt8Builder::with_capacity(len);
for idx in 0..len {
mask_b.append_value(slots.present[idx]);
match slots.values[idx] {
Some(value) => values_b.append_value(value),
None => values_b.append_null(),
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
let zipped =
crate::compute_prelude::zip_select(&mask, &values, base).expect("zip error overlay");
Arc::new(
zipped
.as_any()
.downcast_ref::<UInt8Array>()
.expect("error overlay zip type")
.clone(),
)
}
pub(crate) fn select_type_tags(
&self,
range: core::ops::Range<usize>,
base: &UInt8Array,
) -> Arc<UInt8Array> {
if let Some(fragment) = self.user.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_type_tags(fragment, range);
}
if let Some(fragment) = self.user.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_type_tags(fragment, range);
}
if !self.user.has_any_in_range(range.clone()) {
if let Some(fragment) = self.computed.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_type_tags(fragment, range);
}
if let Some(fragment) = self.computed.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_type_tags(fragment, range);
}
}
if !self.has_any_in_range(range.clone()) {
return Arc::new(base.clone());
}
record_overlay_select_stats(|stats| stats.partial_overlay_builds += 1);
let len = range.end.saturating_sub(range.start);
let mut slots = OverlaySlots::<u8>::new(len);
Self::apply_type_tag_layer(self.computed, range.clone(), &mut slots);
Self::apply_type_tag_layer(self.user, range.clone(), &mut slots);
if !slots.any_present() {
return Arc::new(base.clone());
}
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = UInt8Builder::with_capacity(len);
for idx in 0..len {
mask_b.append_value(slots.present[idx]);
match slots.values[idx] {
Some(value) => values_b.append_value(value),
None => values_b.append_null(),
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
let zipped =
crate::compute_prelude::zip_select(&mask, &values, base).expect("zip type-tag overlay");
Arc::new(
zipped
.as_any()
.downcast_ref::<UInt8Array>()
.expect("type-tag overlay zip type")
.clone(),
)
}
pub(crate) fn select_lowered_text(
&self,
range: core::ops::Range<usize>,
base: &StringArray,
) -> Arc<StringArray> {
if let Some(fragment) = self.user.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_lowered_text(fragment, range);
}
if let Some(fragment) = self.user.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_lowered_text(fragment, range);
}
if !self.user.has_any_in_range(range.clone()) {
if let Some(fragment) = self.computed.full_cover_dense_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_dense_slices += 1);
return Self::dense_lowered_text(fragment, range);
}
if let Some(fragment) = self.computed.full_cover_run_fragment(range.clone()) {
record_overlay_select_stats(|stats| stats.direct_run_materializations += 1);
return Self::run_lowered_text(fragment, range);
}
}
if !self.has_any_in_range(range.clone()) {
return Arc::new(base.clone());
}
if self.user.fragments.is_empty() && self.computed.fragments.is_empty() {
return self.select_lowered_text_point_scalar(range, base);
}
record_overlay_select_stats(|stats| stats.partial_overlay_builds += 1);
let len = range.end.saturating_sub(range.start);
let mut slots = OverlaySlots::<String>::new(len);
Self::apply_lowered_text_layer(self.computed, range.clone(), &mut slots);
Self::apply_lowered_text_layer(self.user, range.clone(), &mut slots);
if !slots.any_present() {
return Arc::new(base.clone());
}
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = StringBuilder::with_capacity(len, len.saturating_mul(8));
for idx in 0..len {
mask_b.append_value(slots.present[idx]);
match &slots.values[idx] {
Some(value) => values_b.append_value(value),
None => values_b.append_null(),
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
let zipped = crate::compute_prelude::zip_select(&mask, &values, base)
.expect("zip lowered text overlay");
Arc::new(
zipped
.as_any()
.downcast_ref::<StringArray>()
.expect("lowered text overlay zip type")
.clone(),
)
}
fn select_lowered_text_point_scalar(
&self,
range: core::ops::Range<usize>,
base: &StringArray,
) -> Arc<StringArray> {
let len = range.end.saturating_sub(range.start);
let mut mask_b = BooleanBuilder::with_capacity(len);
let mut values_b = StringBuilder::with_capacity(len, len.saturating_mul(8));
record_overlay_select_stats(|stats| stats.row_scalar_fallbacks += len);
for off in range {
if let Some(value) = self.get_scalar(off) {
mask_b.append_value(true);
if let Some(s) = value.lowered_text_value() {
values_b.append_value(&s);
} else {
values_b.append_null();
}
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
} else {
mask_b.append_value(false);
values_b.append_null();
}
}
record_overlay_select_stats(|stats| stats.zip_select_calls += 1);
let mask = mask_b.finish();
let values = values_b.finish();
let zipped = crate::compute_prelude::zip_select(&mask, &values, base)
.expect("zip lowered text overlay");
Arc::new(
zipped
.as_any()
.downcast_ref::<StringArray>()
.expect("lowered text overlay zip type")
.clone(),
)
}
fn dense_numbers(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<Float64Array> {
let (rel_start, len, payload) = Self::dense_payload_window(fragment, range);
Self::payload_numbers_slice(payload, rel_start, len)
}
fn dense_booleans(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<BooleanArray> {
let (rel_start, len, payload) = Self::dense_payload_window(fragment, range);
Self::payload_booleans_slice(payload, rel_start, len)
}
fn dense_text(fragment: &OverlayFragment, range: core::ops::Range<usize>) -> ArrayRef {
let (rel_start, len, payload) = Self::dense_payload_window(fragment, range);
Self::payload_text_slice(payload, rel_start, len)
}
fn dense_errors(fragment: &OverlayFragment, range: core::ops::Range<usize>) -> Arc<UInt8Array> {
let (rel_start, len, payload) = Self::dense_payload_window(fragment, range);
Self::payload_errors_slice(payload, rel_start, len)
}
fn dense_type_tags(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<UInt8Array> {
let (rel_start, len, payload) = Self::dense_payload_window(fragment, range);
Self::payload_type_tags_slice(payload, rel_start, len)
}
fn dense_lowered_text(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<StringArray> {
let (rel_start, len, payload) = Self::dense_payload_window(fragment, range);
Self::payload_lowered_text_materialize(payload, rel_start, len)
}
fn dense_payload_window(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> (usize, usize, &OverlayFragmentPayload) {
let OverlayFragment::DenseRange { start, payload, .. } = fragment else {
unreachable!("dense payload window requires DenseRange")
};
let rel_start = range.start.saturating_sub(*start as usize);
(rel_start, range.end.saturating_sub(range.start), payload)
}
fn run_numbers(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<Float64Array> {
let mut b = Float64Builder::with_capacity(range.end.saturating_sub(range.start));
Self::for_each_run_payload_index(fragment, range, |payload, run_idx, repeat| {
if let Some(value) = payload.number_at(run_idx) {
for _ in 0..repeat {
b.append_value(value);
}
} else {
for _ in 0..repeat {
b.append_null();
}
}
});
Arc::new(b.finish())
}
fn run_booleans(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<BooleanArray> {
let mut b = BooleanBuilder::with_capacity(range.end.saturating_sub(range.start));
Self::for_each_run_payload_index(fragment, range, |payload, run_idx, repeat| {
if let Some(value) = payload.boolean_at(run_idx) {
for _ in 0..repeat {
b.append_value(value);
}
} else {
for _ in 0..repeat {
b.append_null();
}
}
});
Arc::new(b.finish())
}
fn run_text(fragment: &OverlayFragment, range: core::ops::Range<usize>) -> ArrayRef {
let mut b = StringBuilder::with_capacity(
range.end.saturating_sub(range.start),
range.end.saturating_sub(range.start).saturating_mul(8),
);
Self::for_each_run_payload_index(fragment, range, |payload, run_idx, repeat| {
if let Some(value) = payload.text_at(run_idx) {
for _ in 0..repeat {
b.append_value(value);
}
} else {
for _ in 0..repeat {
b.append_null();
}
}
});
Arc::new(b.finish()) as ArrayRef
}
fn run_errors(fragment: &OverlayFragment, range: core::ops::Range<usize>) -> Arc<UInt8Array> {
let mut b = UInt8Builder::with_capacity(range.end.saturating_sub(range.start));
Self::for_each_run_payload_index(fragment, range, |payload, run_idx, repeat| {
if let Some(value) = payload.error_at(run_idx) {
for _ in 0..repeat {
b.append_value(value);
}
} else {
for _ in 0..repeat {
b.append_null();
}
}
});
Arc::new(b.finish())
}
fn run_type_tags(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<UInt8Array> {
let mut b = UInt8Builder::with_capacity(range.end.saturating_sub(range.start));
Self::for_each_run_payload_index(fragment, range, |payload, run_idx, repeat| {
let tag = payload.type_tag_at(run_idx).unwrap_or(TypeTag::Empty) as u8;
for _ in 0..repeat {
b.append_value(tag);
}
});
Arc::new(b.finish())
}
fn run_lowered_text(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
) -> Arc<StringArray> {
let mut b = StringBuilder::with_capacity(
range.end.saturating_sub(range.start),
range.end.saturating_sub(range.start).saturating_mul(8),
);
Self::for_each_run_payload_index(fragment, range, |payload, run_idx, repeat| {
let value = Self::payload_lowered_text_at(payload, run_idx);
if let Some(value) = value {
for _ in 0..repeat {
b.append_value(&value);
}
} else {
for _ in 0..repeat {
b.append_null();
}
}
});
Arc::new(b.finish())
}
fn payload_numbers_slice(
payload: &OverlayFragmentPayload,
start: usize,
len: usize,
) -> Arc<Float64Array> {
if let Some(array) = &payload.numbers {
let sliced = array.slice(start, len);
Arc::new(
sliced
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.clone(),
)
} else {
Self::null_numbers(len)
}
}
fn payload_booleans_slice(
payload: &OverlayFragmentPayload,
start: usize,
len: usize,
) -> Arc<BooleanArray> {
if let Some(array) = &payload.booleans {
let sliced = array.slice(start, len);
Arc::new(
sliced
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap()
.clone(),
)
} else {
Self::null_booleans(len)
}
}
fn payload_text_slice(payload: &OverlayFragmentPayload, start: usize, len: usize) -> ArrayRef {
if let Some(array) = &payload.text {
array.slice(start, len)
} else {
new_null_array(&DataType::Utf8, len)
}
}
fn payload_errors_slice(
payload: &OverlayFragmentPayload,
start: usize,
len: usize,
) -> Arc<UInt8Array> {
if let Some(array) = &payload.errors {
let sliced = array.slice(start, len);
Arc::new(
sliced
.as_any()
.downcast_ref::<UInt8Array>()
.unwrap()
.clone(),
)
} else {
Self::null_errors(len)
}
}
fn payload_type_tags_slice(
payload: &OverlayFragmentPayload,
start: usize,
len: usize,
) -> Arc<UInt8Array> {
let sliced = payload.type_tags.slice(start, len);
Arc::new(
sliced
.as_any()
.downcast_ref::<UInt8Array>()
.unwrap()
.clone(),
)
}
fn payload_lowered_text_materialize(
payload: &OverlayFragmentPayload,
start: usize,
len: usize,
) -> Arc<StringArray> {
let mut b = StringBuilder::with_capacity(len, len.saturating_mul(8));
for idx in start..start.saturating_add(len) {
if let Some(value) = Self::payload_lowered_text_at(payload, idx) {
b.append_value(&value);
} else {
b.append_null();
}
}
Arc::new(b.finish())
}
fn payload_lowered_text_at(payload: &OverlayFragmentPayload, idx: usize) -> Option<String> {
match payload.type_tag_at(idx)? {
TypeTag::Text => payload.text_at(idx).map(|value| value.to_lowercase()),
TypeTag::Number | TypeTag::DateTime | TypeTag::Duration => {
payload.number_at(idx).map(|value| value.to_string())
}
TypeTag::Boolean => payload
.boolean_at(idx)
.map(|value| if value { "true" } else { "false" }.to_string()),
TypeTag::Empty | TypeTag::Error | TypeTag::Pending => None,
}
}
fn null_numbers(len: usize) -> Arc<Float64Array> {
let arr = new_null_array(&DataType::Float64, len);
Arc::new(arr.as_any().downcast_ref::<Float64Array>().unwrap().clone())
}
fn null_booleans(len: usize) -> Arc<BooleanArray> {
let arr = new_null_array(&DataType::Boolean, len);
Arc::new(arr.as_any().downcast_ref::<BooleanArray>().unwrap().clone())
}
fn null_errors(len: usize) -> Arc<UInt8Array> {
let arr = new_null_array(&DataType::UInt8, len);
Arc::new(arr.as_any().downcast_ref::<UInt8Array>().unwrap().clone())
}
fn apply_number_layer(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<f64>,
) {
Self::apply_fragment_layer(layer, range.clone(), slots, |payload, idx| {
payload.number_at(idx)
});
for (off, value) in layer.iter_points() {
if range.contains(off) {
slots.set(*off - range.start, value.numeric_lane_value());
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
}
}
}
fn apply_boolean_layer(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<bool>,
) {
Self::apply_fragment_layer(layer, range.clone(), slots, |payload, idx| {
payload.boolean_at(idx)
});
for (off, value) in layer.iter_points() {
if range.contains(off) {
slots.set(*off - range.start, value.boolean_lane_value());
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
}
}
}
fn apply_text_layer(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<String>,
) {
Self::apply_fragment_layer(layer, range.clone(), slots, |payload, idx| {
payload.text_at(idx).map(ToString::to_string)
});
for (off, value) in layer.iter_points() {
if range.contains(off) {
slots.set(
*off - range.start,
value.text_lane_value().map(ToString::to_string),
);
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
}
}
}
fn apply_error_layer(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<u8>,
) {
Self::apply_fragment_layer(layer, range.clone(), slots, |payload, idx| {
payload.error_at(idx)
});
for (off, value) in layer.iter_points() {
if range.contains(off) {
slots.set(*off - range.start, value.error_lane_value());
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
}
}
}
fn apply_type_tag_layer(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<u8>,
) {
Self::apply_fragment_layer(layer, range.clone(), slots, |payload, idx| {
payload.type_tag_at(idx).map(|tag| tag as u8)
});
for (off, value) in layer.iter_points() {
if range.contains(off) {
slots.set(*off - range.start, Some(value.type_tag() as u8));
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
}
}
}
fn apply_lowered_text_layer(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<String>,
) {
Self::apply_fragment_layer(layer, range.clone(), slots, Self::payload_lowered_text_at);
for (off, value) in layer.iter_points() {
if range.contains(off) {
slots.set(*off - range.start, value.lowered_text_value());
record_overlay_select_stats(|stats| stats.point_entries_applied += 1);
}
}
}
fn apply_fragment_layer<T>(
layer: &Overlay,
range: core::ops::Range<usize>,
slots: &mut OverlaySlots<T>,
mut value_at: impl FnMut(&OverlayFragmentPayload, usize) -> Option<T>,
) {
for fragment in &layer.fragments {
if !fragment.has_any_in_range(range.clone()) {
continue;
}
Self::record_fragment_intersection(fragment);
Self::for_each_fragment_payload_index(
fragment,
range.clone(),
|out_idx, payload, payload_idx| {
slots.set(out_idx, value_at(payload, payload_idx));
},
);
}
}
fn record_fragment_intersection(fragment: &OverlayFragment) {
let shape = match fragment {
OverlayFragment::SparseOffsets { .. } => OverlayFragmentShape::Sparse,
OverlayFragment::DenseRange { .. } => OverlayFragmentShape::Dense,
OverlayFragment::RunRange { .. } => OverlayFragmentShape::Run,
};
record_overlay_select_stats(|stats| {
stats.fragment_intersections += 1;
match shape {
OverlayFragmentShape::Sparse => stats.partial_sparse_intersections += 1,
OverlayFragmentShape::Dense => stats.partial_dense_intersections += 1,
OverlayFragmentShape::Run => stats.partial_run_intersections += 1,
}
});
}
fn for_each_fragment_payload_index(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
mut f: impl FnMut(usize, &OverlayFragmentPayload, usize),
) {
if range.is_empty() {
return;
}
match fragment {
OverlayFragment::SparseOffsets { offsets, payload } => {
let start = u32::try_from(range.start).unwrap_or(u32::MAX);
let lo = offsets.partition_point(|off| *off < start);
let hi = offsets.partition_point(|off| (*off as usize) < range.end);
for (idx, off) in offsets.iter().enumerate().take(hi).skip(lo) {
let out_idx = (*off as usize).saturating_sub(range.start);
f(out_idx, payload, idx);
}
}
OverlayFragment::DenseRange {
start,
len,
payload,
} => {
let frag_start = *start as usize;
let frag_end = frag_start.saturating_add(*len as usize);
let inter_start = frag_start.max(range.start);
let inter_end = frag_end.min(range.end);
if inter_start >= inter_end {
return;
}
for abs in inter_start..inter_end {
f(abs - range.start, payload, abs - frag_start);
}
}
OverlayFragment::RunRange {
start,
len,
run_ends,
payload,
} => {
let frag_start = *start as usize;
let frag_end = frag_start.saturating_add(*len as usize);
let inter_start = frag_start.max(range.start);
let inter_end = frag_end.min(range.end);
if inter_start >= inter_end {
return;
}
let mut prev_end = 0usize;
for (run_idx, run_end) in run_ends.iter().enumerate() {
let run_start_abs = frag_start.saturating_add(prev_end);
let run_end_abs = frag_start.saturating_add(*run_end as usize);
let start_abs = run_start_abs.max(inter_start);
let end_abs = run_end_abs.min(inter_end);
if start_abs < end_abs {
for abs in start_abs..end_abs {
f(abs - range.start, payload, run_idx);
}
}
prev_end = *run_end as usize;
if run_end_abs >= inter_end {
break;
}
}
}
}
}
fn for_each_run_payload_index(
fragment: &OverlayFragment,
range: core::ops::Range<usize>,
mut f: impl FnMut(&OverlayFragmentPayload, usize, usize),
) {
let OverlayFragment::RunRange {
start,
len,
run_ends,
payload,
} = fragment
else {
unreachable!("run payload iteration requires RunRange")
};
let frag_start = *start as usize;
let frag_end = frag_start.saturating_add(*len as usize);
let inter_start = frag_start.max(range.start);
let inter_end = frag_end.min(range.end);
if inter_start >= inter_end {
return;
}
let mut prev_end = 0usize;
for (run_idx, run_end) in run_ends.iter().enumerate() {
let run_start_abs = frag_start.saturating_add(prev_end);
let run_end_abs = frag_start.saturating_add(*run_end as usize);
let start_abs = run_start_abs.max(inter_start);
let end_abs = run_end_abs.min(inter_end);
if start_abs < end_abs {
f(payload, run_idx, end_abs - start_abs);
}
prev_end = *run_end as usize;
if run_end_abs >= inter_end {
break;
}
}
}
}
impl OverlayFragmentPayload {
#[inline]
fn type_tag_at(&self, idx: usize) -> Option<TypeTag> {
if idx >= self.type_tags.len() || self.type_tags.is_null(idx) {
return None;
}
Some(TypeTag::from_u8(self.type_tags.value(idx)))
}
}
impl Overlay {
fn full_cover_dense_fragment(
&self,
range: core::ops::Range<usize>,
) -> Option<&OverlayFragment> {
self.full_cover_single_fragment(range, OverlayFragmentShape::Dense)
}
fn full_cover_run_fragment(&self, range: core::ops::Range<usize>) -> Option<&OverlayFragment> {
self.full_cover_single_fragment(range, OverlayFragmentShape::Run)
}
fn full_cover_single_fragment(
&self,
range: core::ops::Range<usize>,
shape: OverlayFragmentShape,
) -> Option<&OverlayFragment> {
if range.is_empty() || self.points.keys().any(|off| range.contains(off)) {
return None;
}
let mut found = None;
for fragment in &self.fragments {
if !fragment.has_any_in_range(range.clone()) {
continue;
}
let shape_matches = matches!(
(shape, fragment),
(
OverlayFragmentShape::Dense,
OverlayFragment::DenseRange { .. }
) | (OverlayFragmentShape::Run, OverlayFragment::RunRange { .. })
);
let covers = fragment
.interval_coverage()
.is_some_and(|own| own.start <= range.start && range.end <= own.end);
if shape_matches && covers && found.is_none() {
found = Some(fragment);
} else {
return None;
}
}
found
}
}
fn append_overlay_value_to_lane_builders(
ov: &OverlayValue,
tag_b: &mut UInt8Builder,
nb: &mut Float64Builder,
bb: &mut BooleanBuilder,
sb: &mut StringBuilder,
eb: &mut UInt8Builder,
non_num: &mut usize,
non_bool: &mut usize,
non_text: &mut usize,
non_err: &mut usize,
) {
match ov {
OverlayValue::Empty => {
tag_b.append_value(TypeTag::Empty as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_null();
}
OverlayValue::Number(n) => {
tag_b.append_value(TypeTag::Number as u8);
nb.append_value(*n);
*non_num += 1;
bb.append_null();
sb.append_null();
eb.append_null();
}
OverlayValue::DateTime(serial) => {
tag_b.append_value(TypeTag::DateTime as u8);
nb.append_value(*serial);
*non_num += 1;
bb.append_null();
sb.append_null();
eb.append_null();
}
OverlayValue::Duration(serial) => {
tag_b.append_value(TypeTag::Duration as u8);
nb.append_value(*serial);
*non_num += 1;
bb.append_null();
sb.append_null();
eb.append_null();
}
OverlayValue::Boolean(b) => {
tag_b.append_value(TypeTag::Boolean as u8);
nb.append_null();
bb.append_value(*b);
*non_bool += 1;
sb.append_null();
eb.append_null();
}
OverlayValue::Text(s) => {
tag_b.append_value(TypeTag::Text as u8);
nb.append_null();
bb.append_null();
sb.append_value(s);
*non_text += 1;
eb.append_null();
}
OverlayValue::Error(code) => {
tag_b.append_value(TypeTag::Error as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_value(*code);
*non_err += 1;
}
OverlayValue::Pending => {
tag_b.append_value(TypeTag::Pending as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_null();
}
}
}
impl ArrowSheet {
pub fn new_sparse(sheet_name: &str, ncols: usize, nrows: usize, chunk_rows: usize) -> Self {
let chunk_rows = chunk_rows.max(1);
let columns = (0..ncols)
.map(|idx| ArrowColumn {
chunks: Vec::new(),
sparse_chunks: FxHashMap::default(),
index: idx as u32,
})
.collect();
let mut sheet = Self {
name: Arc::from(sheet_name.to_string()),
columns,
nrows: 0,
chunk_starts: Vec::new(),
chunk_rows,
};
sheet.ensure_row_capacity(nrows);
sheet
}
pub fn set_sparse_overlay_value(
&mut self,
abs_row: usize,
abs_col: usize,
value: OverlayValue,
) -> isize {
if abs_col >= self.columns.len() {
let start = self.columns.len();
self.columns
.extend((start..=abs_col).map(|idx| ArrowColumn {
chunks: Vec::new(),
sparse_chunks: FxHashMap::default(),
index: idx as u32,
}));
}
if abs_row >= self.nrows as usize {
self.ensure_row_capacity(abs_row + 1);
}
let Some((ch_idx, in_off)) = self.chunk_of_row(abs_row) else {
return 0;
};
let Some(ch) = self.ensure_column_chunk_mut(abs_col, ch_idx) else {
return 0;
};
ch.overlay.set(in_off, value)
}
pub fn shape(&self) -> Vec<ColumnShape> {
self.columns
.iter()
.map(|c| {
let chunks = c.chunks.len();
let rows = self.nrows as usize;
let has_num = c.chunks.iter().any(|ch| ch.meta.non_null_num > 0);
let has_bool = c.chunks.iter().any(|ch| ch.meta.non_null_bool > 0);
let has_text = c.chunks.iter().any(|ch| ch.meta.non_null_text > 0);
let has_err = c.chunks.iter().any(|ch| ch.meta.non_null_err > 0);
ColumnShape {
index: c.index,
chunks,
rows,
has_num,
has_bool,
has_text,
has_err,
}
})
.collect()
}
pub fn range_view(
&self,
sr: usize,
sc: usize,
er: usize,
ec: usize,
) -> crate::engine::range_view::RangeView<'_> {
let r0 = er.checked_sub(sr).map(|d| d + 1).unwrap_or(0);
let c0 = ec.checked_sub(sc).map(|d| d + 1).unwrap_or(0);
let (rows, cols) = if r0 == 0 || c0 == 0 { (0, 0) } else { (r0, c0) };
crate::engine::range_view::RangeView::new(
crate::engine::range_view::RangeBacking::Borrowed(self),
sr,
sc,
er,
ec,
rows,
cols,
)
}
#[inline]
pub fn get_cell_value(&self, abs_row: usize, abs_col: usize) -> LiteralValue {
let sheet_rows = self.nrows as usize;
if abs_row >= sheet_rows {
return LiteralValue::Empty;
}
if abs_col >= self.columns.len() {
return LiteralValue::Empty;
}
let Some((ch_idx, in_off)) = self.chunk_of_row(abs_row) else {
return LiteralValue::Empty;
};
let col_ref = &self.columns[abs_col];
let Some(ch) = col_ref.chunk(ch_idx) else {
return LiteralValue::Empty;
};
let cascade = OverlayCascade::new(&ch.overlay, &ch.computed_overlay);
if let Some(ov) = cascade.get_scalar(in_off) {
return ov.to_literal();
}
let tag_u8 = ch.type_tag.value(in_off);
match TypeTag::from_u8(tag_u8) {
TypeTag::Empty => LiteralValue::Empty,
TypeTag::Number => {
if let Some(arr) = &ch.numbers {
if arr.is_null(in_off) {
return LiteralValue::Empty;
}
LiteralValue::Number(arr.value(in_off))
} else {
LiteralValue::Empty
}
}
TypeTag::DateTime => {
if let Some(arr) = &ch.numbers {
if arr.is_null(in_off) {
return LiteralValue::Empty;
}
LiteralValue::from_serial_number(arr.value(in_off))
} else {
LiteralValue::Empty
}
}
TypeTag::Duration => {
if let Some(arr) = &ch.numbers {
if arr.is_null(in_off) {
return LiteralValue::Empty;
}
let serial = arr.value(in_off);
let nanos_f = serial * 86_400.0 * 1_000_000_000.0;
let nanos = nanos_f.round().clamp(i64::MIN as f64, i64::MAX as f64) as i64;
LiteralValue::Duration(chrono::Duration::nanoseconds(nanos))
} else {
LiteralValue::Empty
}
}
TypeTag::Boolean => {
if let Some(arr) = &ch.booleans {
if arr.is_null(in_off) {
return LiteralValue::Empty;
}
LiteralValue::Boolean(arr.value(in_off))
} else {
LiteralValue::Empty
}
}
TypeTag::Text => {
if let Some(arr) = &ch.text {
if arr.is_null(in_off) {
return LiteralValue::Empty;
}
let sa = arr
.as_any()
.downcast_ref::<arrow_array::StringArray>()
.unwrap();
LiteralValue::Text(sa.value(in_off).to_string())
} else {
LiteralValue::Empty
}
}
TypeTag::Error => {
if let Some(arr) = &ch.errors {
if arr.is_null(in_off) {
return LiteralValue::Empty;
}
let kind = unmap_error_code(arr.value(in_off));
LiteralValue::Error(ExcelError::new(kind))
} else {
LiteralValue::Empty
}
}
TypeTag::Pending => LiteralValue::Pending,
}
}
pub fn ensure_row_capacity(&mut self, target_rows: usize) {
if target_rows as u32 <= self.nrows {
return;
}
let chunk_size = self.chunk_rows.max(1);
if self.chunk_starts.is_empty() {
self.chunk_starts.push(0);
}
let mut next_start = self
.chunk_starts
.last()
.copied()
.unwrap_or(0)
.saturating_add(chunk_size);
while next_start < target_rows {
self.chunk_starts.push(next_start);
next_start = next_start.saturating_add(chunk_size);
}
self.nrows = target_rows as u32;
let starts = self.chunk_starts.clone();
let nrows = self.nrows as usize;
let required_len_for = |ch_idx: usize| -> Option<usize> {
let start = *starts.get(ch_idx)?;
let end = starts.get(ch_idx + 1).copied().unwrap_or(nrows);
Some(end.saturating_sub(start))
};
for col in &mut self.columns {
for (idx, ch) in col.chunks.iter_mut().enumerate() {
if let Some(req) = required_len_for(idx) {
ch.grow_len_to(req);
}
}
if !col.sparse_chunks.is_empty() {
let keys: Vec<usize> = col.sparse_chunks.keys().copied().collect();
for idx in keys {
if let (Some(req), Some(ch)) =
(required_len_for(idx), col.sparse_chunks.get_mut(&idx))
{
ch.grow_len_to(req);
}
}
}
}
}
pub fn ensure_column_chunk_mut(
&mut self,
col_idx: usize,
ch_idx: usize,
) -> Option<&mut ColumnChunk> {
let start = *self.chunk_starts.get(ch_idx)?;
let end = self
.chunk_starts
.get(ch_idx + 1)
.copied()
.unwrap_or(self.nrows as usize);
let len = end.saturating_sub(start);
let col = self.columns.get_mut(col_idx)?;
if ch_idx < col.chunks.len() {
return Some(&mut col.chunks[ch_idx]);
}
Some(
col.sparse_chunks
.entry(ch_idx)
.or_insert_with(|| Self::make_empty_chunk(len)),
)
}
pub fn chunk_of_row(&self, abs_row: usize) -> Option<(usize, usize)> {
if abs_row >= self.nrows as usize {
return None;
}
let ch_idx = match self.chunk_starts.binary_search(&abs_row) {
Ok(i) => i,
Err(0) => 0,
Err(i) => i - 1,
};
let start = self.chunk_starts[ch_idx];
Some((ch_idx, abs_row - start))
}
fn recompute_chunk_starts(&mut self) {
self.chunk_starts.clear();
if let Some(col0) = self.columns.first() {
let mut cur = 0usize;
for ch in &col0.chunks {
self.chunk_starts.push(cur);
cur += ch.type_tag.len();
}
}
}
fn make_empty_chunk(len: usize) -> ColumnChunk {
ColumnChunk {
numbers: None,
booleans: None,
text: None,
errors: None,
type_tag: Arc::new(UInt8Array::from(vec![TypeTag::Empty as u8; len])),
formula_id: None,
meta: ColumnChunkMeta {
len,
non_null_num: 0,
non_null_bool: 0,
non_null_text: 0,
non_null_err: 0,
},
lazy_null_numbers: OnceCell::new(),
lazy_null_booleans: OnceCell::new(),
lazy_null_text: OnceCell::new(),
lazy_null_errors: OnceCell::new(),
lowered_text: OnceCell::new(),
overlay: Overlay::new(),
computed_overlay: Overlay::new(),
}
}
fn slice_chunk(ch: &ColumnChunk, off: usize, len: usize) -> ColumnChunk {
use arrow_array::Array;
let type_tag: Arc<UInt8Array> = Arc::new(
Array::slice(ch.type_tag.as_ref(), off, len)
.as_any()
.downcast_ref::<UInt8Array>()
.unwrap()
.clone(),
);
let numbers: Option<Arc<Float64Array>> = ch.numbers.as_ref().and_then(|a| {
let sl = Array::slice(a.as_ref(), off, len);
let fa = sl.as_any().downcast_ref::<Float64Array>().unwrap().clone();
let nn = len.saturating_sub(fa.null_count());
if nn == 0 { None } else { Some(Arc::new(fa)) }
});
let booleans: Option<Arc<BooleanArray>> = ch.booleans.as_ref().and_then(|a| {
let sl = Array::slice(a.as_ref(), off, len);
let ba = sl.as_any().downcast_ref::<BooleanArray>().unwrap().clone();
let nn = len.saturating_sub(ba.null_count());
if nn == 0 { None } else { Some(Arc::new(ba)) }
});
let text: Option<ArrayRef> = ch.text.as_ref().and_then(|a| {
let sl = Array::slice(a.as_ref(), off, len);
let sa = sl.as_any().downcast_ref::<StringArray>().unwrap().clone();
let nn = len.saturating_sub(sa.null_count());
if nn == 0 {
None
} else {
Some(Arc::new(sa) as ArrayRef)
}
});
let errors: Option<Arc<UInt8Array>> = ch.errors.as_ref().and_then(|a| {
let sl = Array::slice(a.as_ref(), off, len);
let ea = sl.as_any().downcast_ref::<UInt8Array>().unwrap().clone();
let nn = len.saturating_sub(ea.null_count());
if nn == 0 { None } else { Some(Arc::new(ea)) }
});
let overlay = ch.overlay.slice(off, len);
let computed_overlay = ch.computed_overlay.slice(off, len);
let non_null_num = numbers.as_ref().map(|a| len - a.null_count()).unwrap_or(0);
let non_null_bool = booleans.as_ref().map(|a| len - a.null_count()).unwrap_or(0);
let non_null_text = text.as_ref().map(|a| len - a.null_count()).unwrap_or(0);
let non_null_err = errors.as_ref().map(|a| len - a.null_count()).unwrap_or(0);
ColumnChunk {
numbers: numbers.clone(),
booleans: booleans.clone(),
text: text.clone(),
errors: errors.clone(),
type_tag,
formula_id: None,
meta: ColumnChunkMeta {
len,
non_null_num,
non_null_bool,
non_null_text,
non_null_err,
},
lazy_null_numbers: OnceCell::new(),
lazy_null_booleans: OnceCell::new(),
lazy_null_text: OnceCell::new(),
lazy_null_errors: OnceCell::new(),
lowered_text: OnceCell::new(),
overlay,
computed_overlay,
}
}
pub fn maybe_compact_chunk(
&mut self,
col_idx: usize,
ch_idx: usize,
abs_threshold: usize,
frac_den: usize,
) -> usize {
if col_idx >= self.columns.len() {
return 0;
}
let (len, tags, numbers, booleans, text, errors, non_num, non_bool, non_text, non_err) = {
let Some(ch_ref) = self.columns[col_idx].chunk(ch_idx) else {
return 0;
};
let len = ch_ref.type_tag.len();
if len == 0 {
return 0;
}
let ov_len = ch_ref.overlay.len();
let den = frac_den.max(1);
let trig = ov_len > (len / den) || ov_len > abs_threshold;
if !trig {
return 0;
}
let mut tag_b = UInt8Builder::with_capacity(len);
let mut nb = Float64Builder::with_capacity(len);
let mut bb = BooleanBuilder::with_capacity(len);
let mut sb = StringBuilder::with_capacity(len, len * 8);
let mut eb = UInt8Builder::with_capacity(len);
let mut non_num = 0usize;
let mut non_bool = 0usize;
let mut non_text = 0usize;
let mut non_err = 0usize;
for i in 0..len {
if let Some(ov) = ch_ref.overlay.get_scalar(i) {
let ov = ov.to_overlay_value();
append_overlay_value_to_lane_builders(
&ov,
&mut tag_b,
&mut nb,
&mut bb,
&mut sb,
&mut eb,
&mut non_num,
&mut non_bool,
&mut non_text,
&mut non_err,
);
} else {
let tag = TypeTag::from_u8(ch_ref.type_tag.value(i));
match tag {
TypeTag::Empty => {
tag_b.append_value(TypeTag::Empty as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_null();
}
TypeTag::Number | TypeTag::DateTime | TypeTag::Duration => {
tag_b.append_value(tag as u8);
if let Some(a) = &ch_ref.numbers {
let fa = a.as_any().downcast_ref::<Float64Array>().unwrap();
if fa.is_null(i) {
nb.append_null();
} else {
nb.append_value(fa.value(i));
non_num += 1;
}
} else {
nb.append_null();
}
bb.append_null();
sb.append_null();
eb.append_null();
}
TypeTag::Boolean => {
tag_b.append_value(TypeTag::Boolean as u8);
nb.append_null();
if let Some(a) = &ch_ref.booleans {
let ba = a.as_any().downcast_ref::<BooleanArray>().unwrap();
if ba.is_null(i) {
bb.append_null();
} else {
bb.append_value(ba.value(i));
non_bool += 1;
}
} else {
bb.append_null();
}
sb.append_null();
eb.append_null();
}
TypeTag::Text => {
tag_b.append_value(TypeTag::Text as u8);
nb.append_null();
bb.append_null();
if let Some(a) = &ch_ref.text {
let sa = a.as_any().downcast_ref::<StringArray>().unwrap();
if sa.is_null(i) {
sb.append_null();
} else {
sb.append_value(sa.value(i));
non_text += 1;
}
} else {
sb.append_null();
}
eb.append_null();
}
TypeTag::Error => {
tag_b.append_value(TypeTag::Error as u8);
nb.append_null();
bb.append_null();
sb.append_null();
if let Some(a) = &ch_ref.errors {
let ea = a.as_any().downcast_ref::<UInt8Array>().unwrap();
if ea.is_null(i) {
eb.append_null();
} else {
eb.append_value(ea.value(i));
non_err += 1;
}
} else {
eb.append_null();
}
}
TypeTag::Pending => {
tag_b.append_value(TypeTag::Pending as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_null();
}
}
}
}
let tags = Arc::new(tag_b.finish());
let numbers = {
let a = nb.finish();
if non_num == 0 {
None
} else {
Some(Arc::new(a))
}
};
let booleans = {
let a = bb.finish();
if non_bool == 0 {
None
} else {
Some(Arc::new(a))
}
};
let text = {
let a = sb.finish();
if non_text == 0 {
None
} else {
Some(Arc::new(a) as ArrayRef)
}
};
let errors = {
let a = eb.finish();
if non_err == 0 {
None
} else {
Some(Arc::new(a))
}
};
(
len, tags, numbers, booleans, text, errors, non_num, non_bool, non_text, non_err,
)
};
let Some(ch_mut) = self.columns[col_idx].chunk_mut(ch_idx) else {
return 0;
};
ch_mut.type_tag = tags;
ch_mut.numbers = numbers;
ch_mut.booleans = booleans;
ch_mut.text = text;
ch_mut.errors = errors;
let freed = ch_mut.overlay.clear();
ch_mut.lowered_text = OnceCell::new();
ch_mut.meta.len = len;
ch_mut.meta.non_null_num = non_num;
ch_mut.meta.non_null_bool = non_bool;
ch_mut.meta.non_null_text = non_text;
ch_mut.meta.non_null_err = non_err;
freed
}
pub fn compact_computed_overlay_chunk(&mut self, col_idx: usize, ch_idx: usize) -> usize {
if col_idx >= self.columns.len() {
return 0;
}
let (len, tags, numbers, booleans, text, errors, non_num, non_bool, non_text, non_err) = {
let Some(ch_ref) = self.columns[col_idx].chunk(ch_idx) else {
return 0;
};
let len = ch_ref.type_tag.len();
if len == 0 || ch_ref.computed_overlay.is_empty() {
return 0;
}
let mut tag_b = UInt8Builder::with_capacity(len);
let mut nb = Float64Builder::with_capacity(len);
let mut bb = BooleanBuilder::with_capacity(len);
let mut sb = StringBuilder::with_capacity(len, len * 8);
let mut eb = UInt8Builder::with_capacity(len);
let mut non_num = 0usize;
let mut non_bool = 0usize;
let mut non_text = 0usize;
let mut non_err = 0usize;
for i in 0..len {
if let Some(ov) = ch_ref.computed_overlay.get_scalar(i) {
let ov = ov.to_overlay_value();
append_overlay_value_to_lane_builders(
&ov,
&mut tag_b,
&mut nb,
&mut bb,
&mut sb,
&mut eb,
&mut non_num,
&mut non_bool,
&mut non_text,
&mut non_err,
);
} else {
let tag = TypeTag::from_u8(ch_ref.type_tag.value(i));
match tag {
TypeTag::Empty => {
tag_b.append_value(TypeTag::Empty as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_null();
}
TypeTag::Number | TypeTag::DateTime | TypeTag::Duration => {
tag_b.append_value(tag as u8);
if let Some(a) = &ch_ref.numbers {
let fa = a.as_any().downcast_ref::<Float64Array>().unwrap();
if fa.is_null(i) {
nb.append_null();
} else {
nb.append_value(fa.value(i));
non_num += 1;
}
} else {
nb.append_null();
}
bb.append_null();
sb.append_null();
eb.append_null();
}
TypeTag::Boolean => {
tag_b.append_value(TypeTag::Boolean as u8);
nb.append_null();
if let Some(a) = &ch_ref.booleans {
let ba = a.as_any().downcast_ref::<BooleanArray>().unwrap();
if ba.is_null(i) {
bb.append_null();
} else {
bb.append_value(ba.value(i));
non_bool += 1;
}
} else {
bb.append_null();
}
sb.append_null();
eb.append_null();
}
TypeTag::Text => {
tag_b.append_value(TypeTag::Text as u8);
nb.append_null();
bb.append_null();
if let Some(a) = &ch_ref.text {
let sa = a.as_any().downcast_ref::<StringArray>().unwrap();
if sa.is_null(i) {
sb.append_null();
} else {
sb.append_value(sa.value(i));
non_text += 1;
}
} else {
sb.append_null();
}
eb.append_null();
}
TypeTag::Error => {
tag_b.append_value(TypeTag::Error as u8);
nb.append_null();
bb.append_null();
sb.append_null();
if let Some(a) = &ch_ref.errors {
let ea = a.as_any().downcast_ref::<UInt8Array>().unwrap();
if ea.is_null(i) {
eb.append_null();
} else {
eb.append_value(ea.value(i));
non_err += 1;
}
} else {
eb.append_null();
}
}
TypeTag::Pending => {
tag_b.append_value(TypeTag::Pending as u8);
nb.append_null();
bb.append_null();
sb.append_null();
eb.append_null();
}
}
}
}
let tags = Arc::new(tag_b.finish());
let numbers = {
let a = nb.finish();
if non_num == 0 {
None
} else {
Some(Arc::new(a))
}
};
let booleans = {
let a = bb.finish();
if non_bool == 0 {
None
} else {
Some(Arc::new(a))
}
};
let text = {
let a = sb.finish();
if non_text == 0 {
None
} else {
Some(Arc::new(a) as ArrayRef)
}
};
let errors = {
let a = eb.finish();
if non_err == 0 {
None
} else {
Some(Arc::new(a))
}
};
(
len, tags, numbers, booleans, text, errors, non_num, non_bool, non_text, non_err,
)
};
let Some(ch_mut) = self.columns[col_idx].chunk_mut(ch_idx) else {
return 0;
};
ch_mut.type_tag = tags;
ch_mut.numbers = numbers;
ch_mut.booleans = booleans;
ch_mut.text = text;
ch_mut.errors = errors;
let freed = ch_mut.computed_overlay.clear();
ch_mut.lowered_text = OnceCell::new();
ch_mut.meta.len = len;
ch_mut.meta.non_null_num = non_num;
ch_mut.meta.non_null_bool = non_bool;
ch_mut.meta.non_null_text = non_text;
ch_mut.meta.non_null_err = non_err;
freed
}
pub fn compact_computed_overlay_sparse_chunk(
&mut self,
col_idx: usize,
ch_idx: usize,
) -> usize {
self.compact_computed_overlay_chunk(col_idx, ch_idx)
}
pub fn insert_rows(&mut self, before: usize, count: usize) {
if count == 0 {
return;
}
let total_rows = self.nrows as usize;
if total_rows == 0 {
self.nrows = count as u32;
if self.nrows > 0 && self.chunk_starts.is_empty() {
self.chunk_starts.push(0);
}
return;
}
if self.chunk_starts.is_empty() {
self.chunk_starts.push(0);
}
let dense_aligned = self
.columns
.iter()
.all(|c| c.sparse_chunks.is_empty() && c.chunks.len() == self.chunk_starts.len());
let insert_at = before.min(total_rows);
let (split_idx, split_off) = if insert_at == total_rows {
let last_idx = self.chunk_starts.len() - 1;
let last_start = self.chunk_starts[last_idx];
let last_len = total_rows.saturating_sub(last_start);
(last_idx, last_len)
} else {
self.chunk_of_row(insert_at).unwrap_or((0, 0))
};
if dense_aligned {
for col in &mut self.columns {
let mut new_chunks: Vec<ColumnChunk> = Vec::with_capacity(col.chunks.len() + 2);
for i in 0..col.chunks.len() {
if i != split_idx {
new_chunks.push(col.chunks[i].clone());
} else {
let orig = &col.chunks[i];
let len = orig.type_tag.len();
if split_off > 0 {
new_chunks.push(Self::slice_chunk(orig, 0, split_off));
}
new_chunks.push(Self::make_empty_chunk(count));
if split_off < len {
new_chunks.push(Self::slice_chunk(orig, split_off, len - split_off));
}
}
}
col.chunks = new_chunks;
col.sparse_chunks.clear();
}
self.nrows = (total_rows + count) as u32;
self.recompute_chunk_starts();
return;
}
#[derive(Clone, Copy)]
enum PlanItem {
Slice {
old_idx: usize,
off: usize,
len: usize,
},
Empty {
len: usize,
},
}
let mut plan: Vec<PlanItem> = Vec::with_capacity(self.chunk_starts.len() + 2);
for old_idx in 0..self.chunk_starts.len() {
let ch_start = self.chunk_starts[old_idx];
let ch_end = self
.chunk_starts
.get(old_idx + 1)
.copied()
.unwrap_or(total_rows);
let ch_len = ch_end.saturating_sub(ch_start);
if ch_len == 0 {
continue;
}
if old_idx != split_idx {
plan.push(PlanItem::Slice {
old_idx,
off: 0,
len: ch_len,
});
continue;
}
let left_len = split_off.min(ch_len);
let right_len = ch_len.saturating_sub(left_len);
if left_len > 0 {
plan.push(PlanItem::Slice {
old_idx,
off: 0,
len: left_len,
});
}
plan.push(PlanItem::Empty { len: count });
if right_len > 0 {
plan.push(PlanItem::Slice {
old_idx,
off: left_len,
len: right_len,
});
}
}
let mut new_starts: Vec<usize> = Vec::with_capacity(plan.len());
let mut cur = 0usize;
for item in &plan {
let len = match *item {
PlanItem::Slice { len, .. } => len,
PlanItem::Empty { len } => len,
};
if len == 0 {
continue;
}
new_starts.push(cur);
cur = cur.saturating_add(len);
}
debug_assert_eq!(cur, total_rows.saturating_add(count));
self.nrows = (total_rows + count) as u32;
self.chunk_starts = new_starts;
for col in &mut self.columns {
let old_dense = std::mem::take(&mut col.chunks);
let old_sparse = std::mem::take(&mut col.sparse_chunks);
let get_old = |idx: usize| -> Option<&ColumnChunk> {
if idx < old_dense.len() {
Some(&old_dense[idx])
} else {
old_sparse.get(&idx)
}
};
let mut dense: Vec<ColumnChunk> = Vec::new();
let mut sparse: FxHashMap<usize, ColumnChunk> = FxHashMap::default();
let mut dense_prefix = true;
for (new_idx, item) in plan.iter().enumerate() {
let produced: Option<ColumnChunk> = match *item {
PlanItem::Empty { .. } => None,
PlanItem::Slice { old_idx, off, len } => match get_old(old_idx) {
Some(orig) => {
if off == 0 && len == orig.type_tag.len() {
Some(orig.clone())
} else {
Some(Self::slice_chunk(orig, off, len))
}
}
None => None,
},
};
if let Some(ch) = produced {
if dense_prefix && new_idx == dense.len() {
dense.push(ch);
} else {
sparse.insert(new_idx, ch);
dense_prefix = false;
}
} else if dense_prefix && new_idx == dense.len() {
dense_prefix = false;
}
}
col.chunks = dense;
col.sparse_chunks = sparse;
}
}
pub fn delete_rows(&mut self, start: usize, count: usize) {
if count == 0 || self.nrows == 0 {
return;
}
let total_rows = self.nrows as usize;
if start >= total_rows {
return;
}
let end = (start + count).min(total_rows);
let del_len = end.saturating_sub(start);
if del_len == 0 {
return;
}
if total_rows > 0 && self.chunk_starts.is_empty() {
self.chunk_starts.push(0);
}
let dense_aligned = self
.columns
.iter()
.all(|c| c.sparse_chunks.is_empty() && c.chunks.len() == self.chunk_starts.len());
if dense_aligned {
for col in &mut self.columns {
let mut new_chunks: Vec<ColumnChunk> = Vec::new();
let mut cur_start = 0usize;
for ch in &col.chunks {
let len = ch.type_tag.len();
let ch_end = cur_start + len;
if ch_end <= start || cur_start >= end {
new_chunks.push(ch.clone());
} else {
let del_start = start.max(cur_start);
let del_end = end.min(ch_end);
let left_len = del_start.saturating_sub(cur_start);
let right_len = ch_end.saturating_sub(del_end);
if left_len > 0 {
new_chunks.push(Self::slice_chunk(ch, 0, left_len));
}
if right_len > 0 {
let off = len - right_len;
new_chunks.push(Self::slice_chunk(ch, off, right_len));
}
}
cur_start = ch_end;
}
col.chunks = new_chunks;
col.sparse_chunks.clear();
}
self.nrows = (total_rows - del_len) as u32;
self.recompute_chunk_starts();
return;
}
#[derive(Clone, Copy)]
enum PlanItem {
Slice {
old_idx: usize,
off: usize,
len: usize,
},
}
let mut plan: Vec<PlanItem> = Vec::with_capacity(self.chunk_starts.len());
for old_idx in 0..self.chunk_starts.len() {
let ch_start = self.chunk_starts[old_idx];
let ch_end = self
.chunk_starts
.get(old_idx + 1)
.copied()
.unwrap_or(total_rows);
let ch_len = ch_end.saturating_sub(ch_start);
if ch_len == 0 {
continue;
}
if ch_end <= start || ch_start >= end {
plan.push(PlanItem::Slice {
old_idx,
off: 0,
len: ch_len,
});
continue;
}
if start > ch_start {
let left_end = start.min(ch_end);
let left_len = left_end.saturating_sub(ch_start);
if left_len > 0 {
plan.push(PlanItem::Slice {
old_idx,
off: 0,
len: left_len,
});
}
}
if end < ch_end {
let right_off = end.saturating_sub(ch_start);
let right_len = ch_end.saturating_sub(end);
if right_len > 0 {
plan.push(PlanItem::Slice {
old_idx,
off: right_off,
len: right_len,
});
}
}
}
let mut new_starts: Vec<usize> = Vec::with_capacity(plan.len());
let mut cur = 0usize;
for item in &plan {
let len = match *item {
PlanItem::Slice { len, .. } => len,
};
if len == 0 {
continue;
}
new_starts.push(cur);
cur = cur.saturating_add(len);
}
debug_assert_eq!(cur, total_rows.saturating_sub(del_len));
self.nrows = (total_rows - del_len) as u32;
self.chunk_starts = new_starts;
for col in &mut self.columns {
let old_dense = std::mem::take(&mut col.chunks);
let old_sparse = std::mem::take(&mut col.sparse_chunks);
let get_old = |idx: usize| -> Option<&ColumnChunk> {
if idx < old_dense.len() {
Some(&old_dense[idx])
} else {
old_sparse.get(&idx)
}
};
let mut dense: Vec<ColumnChunk> = Vec::new();
let mut sparse: FxHashMap<usize, ColumnChunk> = FxHashMap::default();
let mut dense_prefix = true;
for (new_idx, item) in plan.iter().enumerate() {
let produced: Option<ColumnChunk> = match *item {
PlanItem::Slice { old_idx, off, len } => match get_old(old_idx) {
Some(orig) => {
if off == 0 && len == orig.type_tag.len() {
Some(orig.clone())
} else {
Some(Self::slice_chunk(orig, off, len))
}
}
None => None,
},
};
if let Some(ch) = produced {
if dense_prefix && new_idx == dense.len() {
dense.push(ch);
} else {
sparse.insert(new_idx, ch);
dense_prefix = false;
}
} else if dense_prefix && new_idx == dense.len() {
dense_prefix = false;
}
}
col.chunks = dense;
col.sparse_chunks = sparse;
}
}
pub fn insert_columns(&mut self, before: usize, count: usize) {
if count == 0 {
return;
}
let empty_col = |lens: &[usize]| -> ArrowColumn {
let mut chunks = Vec::with_capacity(lens.len());
for &l in lens {
chunks.push(Self::make_empty_chunk(l));
}
ArrowColumn {
chunks,
sparse_chunks: FxHashMap::default(),
index: 0,
}
};
let dense_aligned = !self.columns.is_empty()
&& self
.columns
.iter()
.all(|c| c.sparse_chunks.is_empty() && c.chunks.len() == self.chunk_starts.len());
let lens: Vec<usize> = if dense_aligned {
self.columns[0]
.chunks
.iter()
.map(|c| c.type_tag.len())
.collect()
} else if self.columns.is_empty() {
if self.nrows > 0 {
vec![self.nrows as usize]
} else {
Vec::new()
}
} else {
Vec::new()
};
let mut cols_new: Vec<ArrowColumn> = Vec::with_capacity(self.columns.len() + count);
let before_idx = before.min(self.columns.len());
for (i, col) in self.columns.iter_mut().enumerate() {
if i == before_idx {
for _ in 0..count {
cols_new.push(empty_col(&lens));
}
}
cols_new.push(col.clone());
}
if before_idx == self.columns.len() {
for _ in 0..count {
cols_new.push(empty_col(&lens));
}
}
for (idx, col) in cols_new.iter_mut().enumerate() {
col.index = idx as u32;
}
self.columns = cols_new;
}
pub fn delete_columns(&mut self, start: usize, count: usize) {
if count == 0 || self.columns.is_empty() {
return;
}
let end = (start + count).min(self.columns.len());
if start >= end {
return;
}
self.columns.drain(start..end);
for (idx, col) in self.columns.iter_mut().enumerate() {
col.index = idx as u32;
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct ColumnShape {
pub index: u32,
pub chunks: usize,
pub rows: usize,
pub has_num: bool,
pub has_bool: bool,
pub has_text: bool,
pub has_err: bool,
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_array::Array;
use arrow_schema::DataType;
use chrono::Datelike;
fn add_overlay_stats(into: &mut OverlayDebugStats, next: OverlayDebugStats) {
into.points += next.points;
into.sparse_fragments += next.sparse_fragments;
into.dense_fragments += next.dense_fragments;
into.run_fragments += next.run_fragments;
into.covered_len += next.covered_len;
}
fn column_overlay_stats(
sheet: &ArrowSheet,
col_idx: usize,
computed: bool,
) -> OverlayDebugStats {
let mut stats = OverlayDebugStats::default();
let Some(column) = sheet.columns.get(col_idx) else {
return stats;
};
for chunk in &column.chunks {
add_overlay_stats(
&mut stats,
if computed {
chunk.computed_overlay.debug_stats()
} else {
chunk.overlay.debug_stats()
},
);
}
for chunk in column.sparse_chunks.values() {
add_overlay_stats(
&mut stats,
if computed {
chunk.computed_overlay.debug_stats()
} else {
chunk.overlay.debug_stats()
},
);
}
stats
}
fn assert_column_overlays_normalized(sheet: &ArrowSheet, col_idx: usize) {
let column = &sheet.columns[col_idx];
for chunk in &column.chunks {
assert!(chunk.overlay.debug_is_normalized());
assert!(chunk.computed_overlay.debug_is_normalized());
assert_eq!(
chunk.overlay.estimated_bytes(),
chunk.overlay.debug_recomputed_estimated_bytes()
);
assert_eq!(
chunk.computed_overlay.estimated_bytes(),
chunk.computed_overlay.debug_recomputed_estimated_bytes()
);
}
for chunk in column.sparse_chunks.values() {
assert!(chunk.overlay.debug_is_normalized());
assert!(chunk.computed_overlay.debug_is_normalized());
assert_eq!(
chunk.overlay.estimated_bytes(),
chunk.overlay.debug_recomputed_estimated_bytes()
);
assert_eq!(
chunk.computed_overlay.estimated_bytes(),
chunk.computed_overlay.debug_recomputed_estimated_bytes()
);
}
}
fn column_computed_overlay_estimated_bytes(sheet: &ArrowSheet, col_idx: usize) -> usize {
let Some(column) = sheet.columns.get(col_idx) else {
return 0;
};
column
.chunks
.iter()
.map(|chunk| chunk.computed_overlay.estimated_bytes())
.chain(
column
.sparse_chunks
.values()
.map(|chunk| chunk.computed_overlay.estimated_bytes()),
)
.fold(0usize, usize::saturating_add)
}
#[derive(Debug, Clone, Copy)]
enum Phase4ProbeFixture {
PointNumeric,
DenseNumeric,
RunNumeric,
SparseNumeric,
EmptyRun,
MixedDense,
}
impl Phase4ProbeFixture {
fn name(self) -> &'static str {
match self {
Phase4ProbeFixture::PointNumeric => "point_numeric",
Phase4ProbeFixture::DenseNumeric => "dense_numeric",
Phase4ProbeFixture::RunNumeric => "run_numeric",
Phase4ProbeFixture::SparseNumeric => "sparse_numeric",
Phase4ProbeFixture::EmptyRun => "empty_run",
Phase4ProbeFixture::MixedDense => "mixed_dense",
}
}
}
#[derive(Debug, serde::Serialize)]
struct Phase4ProbeOp {
ms: f64,
segments: usize,
arrays: usize,
rows_scanned: usize,
checksum: f64,
non_null: usize,
}
#[derive(Debug, serde::Serialize)]
struct Phase4ProbeRow {
fixture: &'static str,
rows: usize,
points: usize,
sparse_fragments: usize,
dense_fragments: usize,
run_fragments: usize,
covered_len: usize,
overlay_estimated_bytes: usize,
numbers: Phase4ProbeOp,
type_tags: Phase4ProbeOp,
lowered_text: Phase4ProbeOp,
get_cell_scan: Phase4ProbeOp,
select_stats: OverlaySelectStats,
}
fn build_phase4_probe_sheet(rows: usize, fixture: Phase4ProbeFixture) -> ArrowSheet {
let mut builder =
IngestBuilder::new("S", 1, rows.max(1), crate::engine::DateSystem::Excel1900);
for row in 0..rows {
builder
.append_row(&[LiteralValue::Number((row + 1) as f64)])
.unwrap();
}
let mut sheet = builder.finish();
let chunk = sheet.columns[0].chunk_mut(0).unwrap();
match fixture {
Phase4ProbeFixture::PointNumeric => {
for row in 0..rows {
chunk
.computed_overlay
.set_scalar(row, OverlayValue::Number((row + 1) as f64));
}
}
Phase4ProbeFixture::DenseNumeric => {
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..rows)
.map(|row| OverlayValue::Number((row + 1) as f64))
.collect(),
)
.unwrap(),
);
}
Phase4ProbeFixture::RunNumeric => {
chunk.computed_overlay.apply_fragment(
OverlayFragment::run_range(0, vec![OverlayValue::Number(1.0); rows]).unwrap(),
);
}
Phase4ProbeFixture::SparseNumeric => {
chunk.computed_overlay.apply_fragment(
OverlayFragment::sparse_offsets(
(0..rows)
.step_by(10)
.map(|row| (row, OverlayValue::Number(10.0)))
.collect(),
)
.unwrap(),
);
}
Phase4ProbeFixture::EmptyRun => {
chunk.computed_overlay.apply_fragment(
OverlayFragment::run_range(0, vec![OverlayValue::Empty; rows]).unwrap(),
);
}
Phase4ProbeFixture::MixedDense => {
let pattern = [
OverlayValue::Number(1.0),
OverlayValue::Boolean(true),
OverlayValue::Text(Arc::from("Alpha")),
OverlayValue::Empty,
OverlayValue::Error(map_error_code(ExcelErrorKind::Div)),
OverlayValue::Pending,
OverlayValue::DateTime(45000.25),
OverlayValue::Duration(0.5),
];
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..rows)
.map(|row| pattern[row % pattern.len()].clone())
.collect(),
)
.unwrap(),
);
}
}
sheet
}
fn measure_probe_numbers(sheet: &ArrowSheet, rows: usize) -> Phase4ProbeOp {
let view = sheet.range_view(0, 0, rows.saturating_sub(1), 0);
let start = std::time::Instant::now();
let mut segments = 0usize;
let mut arrays = 0usize;
let mut rows_scanned = 0usize;
let mut checksum = 0.0;
let mut non_null = 0usize;
for segment in view.numbers_slices() {
let (_row_start, row_len, cols) = segment.unwrap();
segments += 1;
rows_scanned += row_len;
for array in cols {
arrays += 1;
for idx in 0..array.len() {
if array.is_valid(idx) {
checksum += array.value(idx);
non_null += 1;
}
}
}
}
Phase4ProbeOp {
ms: start.elapsed().as_secs_f64() * 1000.0,
segments,
arrays,
rows_scanned,
checksum,
non_null,
}
}
fn measure_probe_type_tags(sheet: &ArrowSheet, rows: usize) -> Phase4ProbeOp {
let view = sheet.range_view(0, 0, rows.saturating_sub(1), 0);
let start = std::time::Instant::now();
let mut segments = 0usize;
let mut arrays = 0usize;
let mut rows_scanned = 0usize;
let mut checksum = 0.0;
let mut non_null = 0usize;
for segment in view.type_tags_slices() {
let (_row_start, row_len, cols) = segment.unwrap();
segments += 1;
rows_scanned += row_len;
for array in cols {
arrays += 1;
for idx in 0..array.len() {
if array.is_valid(idx) {
checksum += array.value(idx) as f64;
non_null += 1;
}
}
}
}
Phase4ProbeOp {
ms: start.elapsed().as_secs_f64() * 1000.0,
segments,
arrays,
rows_scanned,
checksum,
non_null,
}
}
fn measure_probe_lowered_text(sheet: &ArrowSheet, rows: usize) -> Phase4ProbeOp {
let view = sheet.range_view(0, 0, rows.saturating_sub(1), 0);
let start = std::time::Instant::now();
let mut segments = 0usize;
let mut arrays = 0usize;
let mut rows_scanned = 0usize;
let mut checksum = 0.0;
let mut non_null = 0usize;
for segment in view.lowered_text_slices() {
let (_row_start, row_len, cols) = segment.unwrap();
segments += 1;
rows_scanned += row_len;
for array in cols {
arrays += 1;
for idx in 0..array.len() {
if array.is_valid(idx) {
checksum += array.value(idx).len() as f64;
non_null += 1;
}
}
}
}
Phase4ProbeOp {
ms: start.elapsed().as_secs_f64() * 1000.0,
segments,
arrays,
rows_scanned,
checksum,
non_null,
}
}
fn literal_probe_weight(value: LiteralValue) -> f64 {
match value {
LiteralValue::Empty => 0.0,
LiteralValue::Int(value) => value as f64,
LiteralValue::Number(value) => value,
LiteralValue::Boolean(value) => {
if value {
1.0
} else {
0.0
}
}
LiteralValue::Text(value) => value.len() as f64,
LiteralValue::Error(_) => -1.0,
LiteralValue::Date(value) => value.num_days_from_ce() as f64,
LiteralValue::DateTime(value) => value.and_utc().timestamp() as f64,
LiteralValue::Time(value) => value.num_seconds_from_midnight() as f64,
LiteralValue::Duration(value) => value.num_seconds() as f64,
LiteralValue::Array(values) => values.len() as f64,
LiteralValue::Pending => -2.0,
}
}
fn measure_probe_get_cell(sheet: &ArrowSheet, rows: usize) -> Phase4ProbeOp {
let view = sheet.range_view(0, 0, rows.saturating_sub(1), 0);
let start = std::time::Instant::now();
let mut checksum = 0.0;
for row in 0..rows {
checksum += literal_probe_weight(view.get_cell(row, 0));
}
Phase4ProbeOp {
ms: start.elapsed().as_secs_f64() * 1000.0,
segments: 1,
arrays: 0,
rows_scanned: rows,
checksum,
non_null: rows,
}
}
fn run_phase4_probe_fixture(rows: usize, fixture: Phase4ProbeFixture) -> Phase4ProbeRow {
let sheet = build_phase4_probe_sheet(rows, fixture);
assert_column_overlays_normalized(&sheet, 0);
let stats = column_overlay_stats(&sheet, 0, true);
reset_overlay_select_stats();
let numbers = measure_probe_numbers(&sheet, rows);
let type_tags = measure_probe_type_tags(&sheet, rows);
let lowered_text = measure_probe_lowered_text(&sheet, rows);
let select_stats = snapshot_overlay_select_stats();
let get_cell_scan = measure_probe_get_cell(&sheet, rows);
Phase4ProbeRow {
fixture: fixture.name(),
rows,
points: stats.points,
sparse_fragments: stats.sparse_fragments,
dense_fragments: stats.dense_fragments,
run_fragments: stats.run_fragments,
covered_len: stats.covered_len,
overlay_estimated_bytes: column_computed_overlay_estimated_bytes(&sheet, 0),
numbers,
type_tags,
lowered_text,
get_cell_scan,
select_stats,
}
}
#[test]
#[ignore = "manual Phase 4 observability probe; run with --ignored --nocapture"]
fn phase4_overlay_rangeview_observability_probe() {
let rows = std::env::var("FORMUALIZER_OVERLAY_PROBE_ROWS")
.ok()
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(100_000)
.max(1);
for fixture in [
Phase4ProbeFixture::PointNumeric,
Phase4ProbeFixture::DenseNumeric,
Phase4ProbeFixture::RunNumeric,
Phase4ProbeFixture::SparseNumeric,
Phase4ProbeFixture::EmptyRun,
Phase4ProbeFixture::MixedDense,
] {
let row = run_phase4_probe_fixture(rows, fixture);
println!("{}", serde_json::to_string(&row).unwrap());
}
}
#[test]
fn ingest_mixed_rows_into_lanes_and_tags() {
let mut b = IngestBuilder::new("Sheet1", 1, 1024, crate::engine::DateSystem::Excel1900);
let data = vec![
LiteralValue::Number(42.5), LiteralValue::Empty, LiteralValue::Text(String::new()), LiteralValue::Boolean(true), LiteralValue::Error(ExcelError::new_value()), ];
for v in &data {
b.append_row(std::slice::from_ref(v)).unwrap();
}
let sheet = b.finish();
assert_eq!(sheet.nrows, 5);
assert_eq!(sheet.columns.len(), 1);
assert_eq!(sheet.columns[0].chunks.len(), 1);
let ch = &sheet.columns[0].chunks[0];
let tags = ch.type_tag.values();
assert_eq!(tags.len(), 5);
assert_eq!(tags[0], TypeTag::Number as u8);
assert_eq!(tags[1], TypeTag::Empty as u8);
assert_eq!(tags[2], TypeTag::Text as u8);
assert_eq!(tags[3], TypeTag::Boolean as u8);
assert_eq!(tags[4], TypeTag::Error as u8);
let nums = ch.numbers.as_ref().unwrap();
assert_eq!(nums.len(), 5);
assert_eq!(nums.null_count(), 4);
assert!(nums.is_valid(0));
let bools = ch.booleans.as_ref().unwrap();
assert_eq!(bools.len(), 5);
assert_eq!(bools.null_count(), 4);
assert!(bools.is_valid(3));
let txt = ch.text.as_ref().unwrap();
assert_eq!(txt.len(), 5);
assert_eq!(txt.null_count(), 4);
assert!(txt.is_valid(2));
let errs = ch.errors.as_ref().unwrap();
assert_eq!(errs.len(), 5);
assert_eq!(errs.null_count(), 4);
assert!(errs.is_valid(4));
}
#[test]
fn range_view_get_cell_and_padding() {
let mut b = IngestBuilder::new("S", 2, 2, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Number(1.0), LiteralValue::Text("".into())])
.unwrap();
b.append_row(&[LiteralValue::Empty, LiteralValue::Text("x".into())])
.unwrap();
b.append_row(&[LiteralValue::Boolean(true), LiteralValue::Empty])
.unwrap();
let sheet = b.finish();
let rv = sheet.range_view(0, 0, 2, 1);
assert_eq!(rv.dims(), (3, 2));
assert_eq!(rv.get_cell(0, 0), LiteralValue::Number(1.0));
assert_eq!(rv.get_cell(0, 1), LiteralValue::Text(String::new())); assert_eq!(rv.get_cell(1, 0), LiteralValue::Empty); assert_eq!(rv.get_cell(2, 0), LiteralValue::Boolean(true));
assert_eq!(rv.get_cell(3, 0), LiteralValue::Empty);
assert_eq!(rv.get_cell(0, 2), LiteralValue::Empty);
let nums: Vec<_> = rv.numbers_slices().map(|r| r.unwrap()).collect();
assert_eq!(nums.len(), 2);
assert_eq!(nums[0].0, 0);
assert_eq!(nums[0].1, 2);
assert_eq!(nums[1].0, 2);
assert_eq!(nums[1].1, 1);
}
#[test]
fn overlay_precedence_user_over_computed() {
let mut b = IngestBuilder::new("S", 1, 8, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Number(1.0)]).unwrap();
b.append_row(&[LiteralValue::Empty]).unwrap();
b.append_row(&[LiteralValue::Empty]).unwrap();
let mut sheet = b.finish();
let (ch_i, off) = sheet.chunk_of_row(0).unwrap();
sheet.columns[0].chunks[ch_i]
.computed_overlay
.set(off, OverlayValue::Number(2.0));
let rv0 = sheet.range_view(0, 0, 0, 0);
assert_eq!(rv0.get_cell(0, 0), LiteralValue::Number(2.0));
let nums0: Vec<_> = rv0.numbers_slices().map(|r| r.unwrap()).collect();
assert_eq!(nums0.len(), 1);
assert_eq!(nums0[0].2[0].value(0), 2.0);
sheet.columns[0].chunks[ch_i]
.overlay
.set(off, OverlayValue::Number(3.0));
let rv1 = sheet.range_view(0, 0, 0, 0);
assert_eq!(rv1.get_cell(0, 0), LiteralValue::Number(3.0));
let nums1: Vec<_> = rv1.numbers_slices().map(|r| r.unwrap()).collect();
assert_eq!(nums1.len(), 1);
assert_eq!(nums1[0].2[0].value(0), 3.0);
}
#[test]
fn overlay_slice_preserves_explicit_empty_and_offsets() {
let mut overlay = Overlay::new();
overlay.set(2, OverlayValue::Number(2.0));
overlay.set(4, OverlayValue::Empty);
overlay.set(6, OverlayValue::Text(Arc::from("outside")));
let sliced = overlay.slice(1, 4);
assert!(sliced.get_scalar(0).is_none());
assert_eq!(
sliced.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert_eq!(
sliced.get_scalar(3).unwrap().to_literal(),
LiteralValue::Empty
);
assert!(sliced.get_scalar(5).is_none());
}
#[test]
fn overlay_cascade_user_empty_masks_computed_and_base() {
let mut user = Overlay::new();
let mut computed = Overlay::new();
computed.set(1, OverlayValue::Number(42.0));
user.set(1, OverlayValue::Empty);
let cascade = OverlayCascade::new(&user, &computed);
assert_eq!(
cascade.get_scalar(1).unwrap().to_literal(),
LiteralValue::Empty
);
assert!(cascade.has_any_in_range(1..2));
}
#[test]
fn overlay_storage_pointmap_backward_compat_get_set_remove() {
let mut overlay = Overlay::new();
assert!(overlay.is_empty());
let delta = overlay.set_scalar(1, OverlayValue::Number(10.0));
assert!(delta > 0);
assert_eq!(overlay.len(), 1);
assert_eq!(
overlay.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(10.0)
);
let replace_delta = overlay.set_scalar(1, OverlayValue::Text(Arc::from("x")));
assert_ne!(replace_delta, 0);
assert_eq!(overlay.len(), 1);
assert_eq!(
overlay.get_scalar(1).unwrap().to_literal(),
LiteralValue::Text("x".into())
);
let remove_delta = overlay.remove_scalar(1);
assert!(remove_delta < 0);
assert!(overlay.is_empty());
assert!(overlay.get_scalar(1).is_none());
}
#[test]
fn overlay_remove_range_splits_fragments_and_points() {
let mut overlay = Overlay::new();
overlay.set_scalar(2, OverlayValue::Number(20.0));
overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..6)
.map(|i| OverlayValue::Number(i as f64))
.collect::<Vec<_>>(),
)
.unwrap(),
);
overlay.set_scalar(3, OverlayValue::Number(30.0));
overlay.set_scalar(8, OverlayValue::Number(80.0));
let delta = overlay.remove_range(2..5);
assert!(delta < 0);
assert_eq!(
overlay.get_scalar(0).unwrap().to_literal(),
LiteralValue::Number(0.0)
);
assert_eq!(
overlay.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
assert!(overlay.get_scalar(2).is_none());
assert!(overlay.get_scalar(3).is_none());
assert!(overlay.get_scalar(4).is_none());
assert_eq!(
overlay.get_scalar(5).unwrap().to_literal(),
LiteralValue::Number(5.0)
);
assert_eq!(
overlay.get_scalar(8).unwrap().to_literal(),
LiteralValue::Number(80.0)
);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.estimated_bytes(),
overlay.debug_recomputed_estimated_bytes()
);
}
#[test]
fn overlay_storage_no_fragments_behavior_matches_old_map() {
let mut overlay = Overlay::new();
overlay.set_scalar(0, OverlayValue::Number(1.0));
overlay.set_scalar(3, OverlayValue::Empty);
assert!(overlay.has_any_in_range(0..1));
assert!(!overlay.has_any_in_range(1..3));
assert!(overlay.has_any_in_range(3..4));
let sliced = overlay.slice(2, 3);
assert!(sliced.get_scalar(0).is_none());
assert_eq!(
sliced.get_scalar(1).unwrap().to_literal(),
LiteralValue::Empty
);
}
#[test]
fn overlay_cascade_user_layer_masks_computed_fragment_regardless_of_sequence() {
let mut user = Overlay::new();
let mut computed = Overlay::new();
user.set_scalar(0, OverlayValue::Number(3.0));
computed.apply_fragment(
OverlayFragment::dense_range(0, vec![OverlayValue::Number(2.0)]).unwrap(),
);
let cascade = OverlayCascade::new(&user, &computed);
assert_eq!(
cascade.get_scalar(0).unwrap().to_literal(),
LiteralValue::Number(3.0)
);
}
#[test]
fn overlay_same_layer_later_point_replaces_fragment_cell() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(
0,
vec![
OverlayValue::Number(1.0),
OverlayValue::Number(2.0),
OverlayValue::Number(3.0),
],
)
.unwrap(),
);
overlay.set_scalar(1, OverlayValue::Number(99.0));
assert_eq!(
overlay.get_scalar(0).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
assert_eq!(
overlay.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(99.0)
);
assert_eq!(
overlay.get_scalar(2).unwrap().to_literal(),
LiteralValue::Number(3.0)
);
}
#[test]
fn overlay_same_layer_later_fragment_replaces_point_range() {
let mut overlay = Overlay::new();
overlay.set_scalar(0, OverlayValue::Number(1.0));
overlay.set_scalar(1, OverlayValue::Number(2.0));
overlay.set_scalar(2, OverlayValue::Number(3.0));
overlay.apply_fragment(
OverlayFragment::dense_range(
0,
vec![
OverlayValue::Number(10.0),
OverlayValue::Number(20.0),
OverlayValue::Number(30.0),
],
)
.unwrap(),
);
let stats = overlay.debug_stats();
assert_eq!(stats.points, 0);
assert_eq!(stats.dense_fragments, 1);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.get_scalar(0).unwrap().to_literal(),
LiteralValue::Number(10.0)
);
assert_eq!(
overlay.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(20.0)
);
assert_eq!(
overlay.get_scalar(2).unwrap().to_literal(),
LiteralValue::Number(30.0)
);
}
#[test]
fn overlay_sparse_far_apart_replacement_does_not_rewrite_unrelated_dense_fragment() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(100, vec![OverlayValue::Number(1.0); 10]).unwrap(),
);
overlay.apply_fragment(
OverlayFragment::sparse_offsets(vec![
(0, OverlayValue::Empty),
(1000, OverlayValue::Number(1000.0)),
])
.unwrap(),
);
let stats = overlay.debug_stats();
assert_eq!(stats.dense_fragments, 1);
assert_eq!(stats.sparse_fragments, 1);
assert_eq!(stats.run_fragments, 0);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.get_scalar(105).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
assert_eq!(
overlay.get_scalar(0).unwrap().to_literal(),
LiteralValue::Empty
);
assert_eq!(
overlay.get_scalar(1000).unwrap().to_literal(),
LiteralValue::Number(1000.0)
);
}
#[test]
fn overlay_sparse_offsets_are_sorted_unique_last_write_wins() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::sparse_offsets(vec![
(3, OverlayValue::Number(3.0)),
(1, OverlayValue::Number(1.0)),
(3, OverlayValue::Number(33.0)),
])
.unwrap(),
);
let stats = overlay.debug_stats();
assert_eq!(stats.sparse_fragments, 1);
assert_eq!(overlay.len(), 2);
assert_eq!(
overlay.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
assert_eq!(
overlay.get_scalar(3).unwrap().to_literal(),
LiteralValue::Number(33.0)
);
assert!(overlay.debug_is_normalized());
}
#[test]
fn overlay_dense_point_replacement_splits_dense_not_sparse() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..6)
.map(|i| OverlayValue::Number(i as f64))
.collect::<Vec<_>>(),
)
.unwrap(),
);
overlay.set_scalar(3, OverlayValue::Number(99.0));
let stats = overlay.debug_stats();
assert_eq!(stats.points, 1);
assert_eq!(stats.dense_fragments, 2);
assert_eq!(stats.sparse_fragments, 0);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.get_scalar(2).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert_eq!(
overlay.get_scalar(3).unwrap().to_literal(),
LiteralValue::Number(99.0)
);
assert_eq!(
overlay.get_scalar(4).unwrap().to_literal(),
LiteralValue::Number(4.0)
);
}
#[test]
fn overlay_dense_fragment_replacement_splits_left_and_right_dense() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..8)
.map(|i| OverlayValue::Number(i as f64))
.collect::<Vec<_>>(),
)
.unwrap(),
);
overlay.apply_fragment(
OverlayFragment::dense_range(
3,
vec![OverlayValue::Number(30.0), OverlayValue::Number(40.0)],
)
.unwrap(),
);
let stats = overlay.debug_stats();
assert_eq!(stats.points, 0);
assert_eq!(stats.dense_fragments, 3);
assert_eq!(stats.sparse_fragments, 0);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.get_scalar(2).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert_eq!(
overlay.get_scalar(3).unwrap().to_literal(),
LiteralValue::Number(30.0)
);
assert_eq!(
overlay.get_scalar(4).unwrap().to_literal(),
LiteralValue::Number(40.0)
);
assert_eq!(
overlay.get_scalar(5).unwrap().to_literal(),
LiteralValue::Number(5.0)
);
}
#[test]
fn overlay_run_point_replacement_splits_run_not_sparse() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::run_range(0, vec![OverlayValue::Number(1.0); 10]).unwrap(),
);
overlay.set_scalar(5, OverlayValue::Number(99.0));
let stats = overlay.debug_stats();
assert_eq!(stats.points, 1);
assert_eq!(stats.run_fragments, 2);
assert_eq!(stats.sparse_fragments, 0);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.get_scalar(4).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
assert_eq!(
overlay.get_scalar(5).unwrap().to_literal(),
LiteralValue::Number(99.0)
);
assert_eq!(
overlay.get_scalar(6).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
}
#[test]
fn overlay_run_fragment_replacement_splits_left_and_right_run() {
let mut overlay = Overlay::new();
let values = [
vec![OverlayValue::Number(1.0); 4],
vec![OverlayValue::Number(2.0); 4],
vec![OverlayValue::Number(3.0); 4],
]
.concat();
overlay.apply_fragment(OverlayFragment::run_range(0, values).unwrap());
overlay.apply_fragment(
OverlayFragment::dense_range(
5,
vec![OverlayValue::Number(50.0), OverlayValue::Number(60.0)],
)
.unwrap(),
);
let stats = overlay.debug_stats();
assert_eq!(stats.run_fragments, 2);
assert_eq!(stats.dense_fragments, 1);
assert_eq!(stats.sparse_fragments, 0);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.get_scalar(4).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert_eq!(
overlay.get_scalar(5).unwrap().to_literal(),
LiteralValue::Number(50.0)
);
assert_eq!(
overlay.get_scalar(6).unwrap().to_literal(),
LiteralValue::Number(60.0)
);
assert_eq!(
overlay.get_scalar(7).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
}
#[test]
fn overlay_slice_preserves_dense_and_run_encodings() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(
10,
(0..5)
.map(|i| OverlayValue::Number(i as f64))
.collect::<Vec<_>>(),
)
.unwrap(),
);
overlay.apply_fragment(
OverlayFragment::run_range(
20,
[
vec![OverlayValue::Number(1.0); 3],
vec![OverlayValue::Number(2.0); 3],
]
.concat(),
)
.unwrap(),
);
let dense_slice = overlay.slice(12, 2);
let dense_stats = dense_slice.debug_stats();
assert_eq!(dense_stats.dense_fragments, 1);
assert_eq!(dense_stats.sparse_fragments, 0);
assert_eq!(
dense_slice.get_scalar(0).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert_eq!(
dense_slice.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(3.0)
);
assert!(dense_slice.debug_is_normalized());
let run_slice = overlay.slice(22, 3);
let run_stats = run_slice.debug_stats();
assert_eq!(run_stats.run_fragments, 1);
assert_eq!(run_stats.sparse_fragments, 0);
assert_eq!(
run_slice.get_scalar(0).unwrap().to_literal(),
LiteralValue::Number(1.0)
);
assert_eq!(
run_slice.get_scalar(1).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert_eq!(
run_slice.get_scalar(2).unwrap().to_literal(),
LiteralValue::Number(2.0)
);
assert!(run_slice.debug_is_normalized());
}
#[test]
fn overlay_computed_empty_run_masks_non_empty_base() {
let mut b = IngestBuilder::new("S", 1, 8, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Number(1.0)]).unwrap();
b.append_row(&[LiteralValue::Number(2.0)]).unwrap();
b.append_row(&[LiteralValue::Number(3.0)]).unwrap();
let mut sheet = b.finish();
let (ch_i, _) = sheet.chunk_of_row(0).unwrap();
sheet.columns[0].chunks[ch_i]
.computed_overlay
.apply_fragment(
OverlayFragment::run_range(
0,
vec![
OverlayValue::Empty,
OverlayValue::Empty,
OverlayValue::Empty,
],
)
.unwrap(),
);
assert_eq!(sheet.get_cell_value(0, 0), LiteralValue::Empty);
assert_eq!(sheet.get_cell_value(1, 0), LiteralValue::Empty);
assert_eq!(sheet.get_cell_value(2, 0), LiteralValue::Empty);
}
#[test]
fn overlay_fragments_reconstruct_scalars_from_typed_lanes() {
let values = vec![
OverlayValue::Empty,
OverlayValue::Number(1.5),
OverlayValue::DateTime(45000.25),
OverlayValue::Duration(0.5),
OverlayValue::Boolean(true),
OverlayValue::Text(Arc::from("Hello")),
OverlayValue::Error(map_error_code(ExcelErrorKind::Div)),
OverlayValue::Pending,
];
let mut dense = Overlay::new();
dense.apply_fragment(OverlayFragment::dense_range(0, values.clone()).unwrap());
for (idx, expected) in values.iter().enumerate() {
assert_eq!(
dense.get_scalar(idx).unwrap().to_overlay_value(),
expected.clone()
);
}
let mut sparse = Overlay::new();
sparse.apply_fragment(
OverlayFragment::sparse_offsets(
values
.iter()
.cloned()
.enumerate()
.map(|(idx, value)| (idx * 2, value))
.collect(),
)
.unwrap(),
);
for (idx, expected) in values.iter().enumerate() {
assert_eq!(
sparse.get_scalar(idx * 2).unwrap().to_overlay_value(),
expected.clone()
);
}
let mut run = Overlay::new();
run.apply_fragment(
OverlayFragment::run_range(
0,
vec![
OverlayValue::Number(7.0),
OverlayValue::Number(7.0),
OverlayValue::Text(Arc::from("run")),
OverlayValue::Text(Arc::from("run")),
],
)
.unwrap(),
);
assert_eq!(
run.get_scalar(0).unwrap().to_overlay_value(),
OverlayValue::Number(7.0)
);
assert_eq!(
run.get_scalar(2).unwrap().to_overlay_value(),
OverlayValue::Text(Arc::from("run"))
);
}
#[test]
fn overlay_iter_returns_complete_logical_entries() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(
2,
vec![OverlayValue::Number(2.0), OverlayValue::Number(3.0)],
)
.unwrap(),
);
overlay.set_scalar(5, OverlayValue::Text(Arc::from("point")));
let entries: Vec<_> = overlay.iter().collect();
assert_eq!(
entries,
vec![
(2, OverlayValue::Number(2.0)),
(3, OverlayValue::Number(3.0)),
(5, OverlayValue::Text(Arc::from("point"))),
]
);
assert_eq!(overlay.iter_points().count(), 1);
}
#[test]
fn overlay_fragment_estimates_follow_encoded_shapes() {
let mut points = Overlay::new();
for idx in 0..512 {
points.set_scalar(idx, OverlayValue::Number(idx as f64));
}
let mut dense = Overlay::new();
dense.apply_fragment(
OverlayFragment::dense_range(
0,
(0..512)
.map(|idx| OverlayValue::Number(idx as f64))
.collect::<Vec<_>>(),
)
.unwrap(),
);
assert_eq!(
dense.estimated_bytes(),
dense.debug_recomputed_estimated_bytes()
);
assert!(
dense.estimated_bytes() < points.estimated_bytes(),
"dense fragment should account like encoded lanes, not point-map entries"
);
let mut short_run = Overlay::new();
short_run.apply_fragment(
OverlayFragment::run_range(0, vec![OverlayValue::Number(1.0); 8]).unwrap(),
);
let mut long_run = Overlay::new();
long_run.apply_fragment(
OverlayFragment::run_range(0, vec![OverlayValue::Number(1.0); 4096]).unwrap(),
);
assert_eq!(
short_run.estimated_bytes(),
short_run.debug_recomputed_estimated_bytes()
);
assert_eq!(
long_run.estimated_bytes(),
long_run.debug_recomputed_estimated_bytes()
);
assert_eq!(
short_run.estimated_bytes(),
long_run.estimated_bytes(),
"single-run estimate should scale with run count, not covered rows"
);
let sparse10 = OverlayFragment::sparse_offsets(
(0..10)
.map(|idx| (idx * 3, OverlayValue::Number(idx as f64)))
.collect(),
)
.unwrap();
let sparse20 = OverlayFragment::sparse_offsets(
(0..20)
.map(|idx| (idx * 3, OverlayValue::Number(idx as f64)))
.collect(),
)
.unwrap();
assert!(sparse20.estimated_bytes() > sparse10.estimated_bytes());
}
#[test]
fn overlay_estimated_bytes_stay_consistent_after_split_and_clear() {
let mut overlay = Overlay::new();
overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..16)
.map(|idx| OverlayValue::Number(idx as f64))
.collect::<Vec<_>>(),
)
.unwrap(),
);
assert_eq!(
overlay.estimated_bytes(),
overlay.debug_recomputed_estimated_bytes()
);
overlay.set_scalar(8, OverlayValue::Text(Arc::from("split")));
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.estimated_bytes(),
overlay.debug_recomputed_estimated_bytes()
);
overlay.apply_fragment(
OverlayFragment::sparse_offsets(vec![
(0, OverlayValue::Empty),
(15, OverlayValue::Boolean(true)),
])
.unwrap(),
);
assert!(overlay.debug_is_normalized());
assert_eq!(
overlay.estimated_bytes(),
overlay.debug_recomputed_estimated_bytes()
);
let freed = overlay.clear_all();
assert!(freed > 0);
assert_eq!(overlay.estimated_bytes(), 0);
assert_eq!(overlay.debug_recomputed_estimated_bytes(), 0);
assert!(overlay.is_empty());
}
#[test]
fn overlay_segment_numbers_masks_base_for_non_numeric_overlays() {
let mut user = Overlay::new();
user.set(1, OverlayValue::Text(Arc::from("x")));
user.set(2, OverlayValue::Empty);
user.set(3, OverlayValue::Error(map_error_code(ExcelErrorKind::Div)));
user.set(4, OverlayValue::Pending);
let computed = Overlay::new();
let cascade = OverlayCascade::new(&user, &computed);
let base = Float64Array::from(vec![10.0, 20.0, 30.0, 40.0, 50.0]);
let selected = cascade.select_numbers(0..5, &base);
assert_eq!(selected.value(0), 10.0);
assert!(selected.is_null(1));
assert!(selected.is_null(2));
assert!(selected.is_null(3));
assert!(selected.is_null(4));
}
#[test]
fn overlay_segment_type_tags_preserve_temporal_tags() {
let mut computed = Overlay::new();
computed.set(0, OverlayValue::DateTime(45000.5));
computed.set(1, OverlayValue::Duration(0.25));
let user = Overlay::new();
let cascade = OverlayCascade::new(&user, &computed);
let base = UInt8Array::from(vec![TypeTag::Empty as u8; 2]);
let selected = cascade.select_type_tags(0..2, &base);
assert_eq!(selected.value(0), TypeTag::DateTime as u8);
assert_eq!(selected.value(1), TypeTag::Duration as u8);
}
#[test]
fn overlay_lowered_text_matches_existing_overlay_semantics() {
let mut user = Overlay::new();
user.set(0, OverlayValue::Text(Arc::from("HeLLo")));
user.set(1, OverlayValue::Number(1.5));
user.set(2, OverlayValue::Boolean(true));
user.set(3, OverlayValue::Empty);
let computed = Overlay::new();
let cascade = OverlayCascade::new(&user, &computed);
let base = StringArray::from(vec![Some("A"), Some("B"), Some("C"), Some("D")]);
let selected = cascade.select_lowered_text(0..4, &base);
assert_eq!(selected.value(0), "hello");
assert_eq!(selected.value(1), "1.5");
assert_eq!(selected.value(2), "true");
assert!(selected.is_null(3));
}
fn numeric_sheet(rows: usize) -> ArrowSheet {
let mut b = IngestBuilder::new("S", 1, rows.max(1), crate::engine::DateSystem::Excel1900);
for row in 0..rows {
b.append_row(&[LiteralValue::Number((row + 1) as f64)])
.unwrap();
}
b.finish()
}
fn numbers_for_range(sheet: &ArrowSheet, sr: usize, er: usize) -> Arc<Float64Array> {
let view = sheet.range_view(sr, 0, er, 0);
let segments: Vec<_> = view.numbers_slices().map(|res| res.unwrap()).collect();
assert_eq!(segments.len(), 1);
assert_eq!(segments[0].2.len(), 1);
segments[0].2[0].clone()
}
fn type_tags_for_range(sheet: &ArrowSheet, sr: usize, er: usize) -> Arc<UInt8Array> {
let view = sheet.range_view(sr, 0, er, 0);
let segments: Vec<_> = view.type_tags_slices().map(|res| res.unwrap()).collect();
assert_eq!(segments.len(), 1);
assert_eq!(segments[0].2.len(), 1);
segments[0].2[0].clone()
}
fn lowered_for_range(sheet: &ArrowSheet, sr: usize, er: usize) -> Arc<StringArray> {
let view = sheet.range_view(sr, 0, er, 0);
let segments: Vec<_> = view.lowered_text_slices().map(|res| res.unwrap()).collect();
assert_eq!(segments.len(), 1);
assert_eq!(segments[0].2.len(), 1);
segments[0].2[0].clone()
}
#[test]
fn rangeview_dense_text_masks_base_numbers() {
let mut sheet = numeric_sheet(4);
sheet.columns[0].chunks[0].computed_overlay.apply_fragment(
OverlayFragment::dense_range(
0,
vec![
OverlayValue::Text(Arc::from("x")),
OverlayValue::Text(Arc::from("y")),
OverlayValue::Text(Arc::from("z")),
OverlayValue::Text(Arc::from("w")),
],
)
.unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 3);
assert_eq!(numbers.null_count(), 4);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 1);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_empty_dense_masks_base_all_selectors() {
let mut sheet = numeric_sheet(3);
sheet.columns[0].chunks[0]
.computed_overlay
.apply_fragment(OverlayFragment::dense_range(0, vec![OverlayValue::Empty; 3]).unwrap());
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 2);
let type_tags = type_tags_for_range(&sheet, 0, 2);
let lowered = lowered_for_range(&sheet, 0, 2);
assert_eq!(numbers.null_count(), 3);
assert_eq!(lowered.null_count(), 3);
assert_eq!(type_tags.values(), &[TypeTag::Empty as u8; 3]);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 3);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_pending_masks_base_type_tag_present_lanes_null() {
let mut sheet = numeric_sheet(2);
sheet.columns[0].chunks[0].computed_overlay.apply_fragment(
OverlayFragment::dense_range(0, vec![OverlayValue::Pending; 2]).unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 1);
let type_tags = type_tags_for_range(&sheet, 0, 1);
let lowered = lowered_for_range(&sheet, 0, 1);
assert_eq!(numbers.null_count(), 2);
assert_eq!(lowered.null_count(), 2);
assert_eq!(type_tags.values(), &[TypeTag::Pending as u8; 2]);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 3);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_subrange_inside_dense_fragment_uses_direct_path() {
let mut sheet = numeric_sheet(10);
sheet.columns[0].chunks[0].computed_overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..10)
.map(|row| OverlayValue::Number((row + 10) as f64))
.collect(),
)
.unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 2, 6);
assert_eq!(numbers.len(), 5);
assert_eq!(numbers.value(0), 12.0);
assert_eq!(numbers.value(4), 16.0);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 1);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_subrange_inside_run_fragment_uses_direct_path() {
let mut sheet = numeric_sheet(10);
sheet.columns[0].chunks[0].computed_overlay.apply_fragment(
OverlayFragment::run_range(0, vec![OverlayValue::Number(7.0); 10]).unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 2, 6);
assert_eq!(numbers.len(), 5);
for idx in 0..numbers.len() {
assert_eq!(numbers.value(idx), 7.0);
}
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_run_materializations, 1);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_user_partial_wrong_type_masks_computed_numeric() {
let mut sheet = numeric_sheet(5);
let chunk = &mut sheet.columns[0].chunks[0];
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(
0,
(0..5)
.map(|row| OverlayValue::Number((row + 10) as f64))
.collect(),
)
.unwrap(),
);
chunk.overlay.apply_fragment(
OverlayFragment::dense_range(2, vec![OverlayValue::Text(Arc::from("mask"))]).unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 4);
assert_eq!(numbers.value(0), 10.0);
assert_eq!(numbers.value(1), 11.0);
assert!(numbers.is_null(2));
assert_eq!(numbers.value(3), 13.0);
assert_eq!(numbers.value(4), 14.0);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 0);
assert_eq!(stats.zip_select_calls, 1);
assert_eq!(stats.partial_dense_intersections, 2);
}
#[test]
fn rangeview_computed_full_cover_user_no_overlap_uses_computed_direct() {
let mut sheet = numeric_sheet(5);
let chunk = &mut sheet.columns[0].chunks[0];
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(0, vec![OverlayValue::Number(3.0); 5]).unwrap(),
);
chunk
.overlay
.set_scalar(10, OverlayValue::Text(Arc::from("outside")));
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 4);
assert_eq!(numbers.value(0), 3.0);
assert_eq!(numbers.value(4), 3.0);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 1);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_user_full_cover_ignores_computed() {
let mut sheet = numeric_sheet(4);
let chunk = &mut sheet.columns[0].chunks[0];
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(0, vec![OverlayValue::Number(99.0); 4]).unwrap(),
);
chunk.overlay.apply_fragment(
OverlayFragment::dense_range(0, vec![OverlayValue::Text(Arc::from("user")); 4])
.unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 3);
assert_eq!(numbers.null_count(), 4);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 1);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn rangeview_point_overlay_still_matches_legacy_scalar_path() {
let mut sheet = numeric_sheet(3);
sheet.columns[0].chunks[0]
.computed_overlay
.set_scalar(1, OverlayValue::Text(Arc::from("point")));
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 2);
assert_eq!(numbers.value(0), 1.0);
assert!(numbers.is_null(1));
assert_eq!(numbers.value(2), 3.0);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.zip_select_calls, 1);
assert_eq!(stats.point_entries_applied, 1);
assert_eq!(stats.row_scalar_fallbacks, 0);
}
#[test]
fn rangeview_multi_fragment_full_union_does_not_use_direct_path() {
let mut sheet = numeric_sheet(4);
let chunk = &mut sheet.columns[0].chunks[0];
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(0, vec![OverlayValue::Number(10.0); 2]).unwrap(),
);
chunk.computed_overlay.apply_fragment(
OverlayFragment::dense_range(2, vec![OverlayValue::Number(20.0); 2]).unwrap(),
);
reset_overlay_select_stats();
let numbers = numbers_for_range(&sheet, 0, 3);
assert_eq!(numbers.value(0), 10.0);
assert_eq!(numbers.value(1), 10.0);
assert_eq!(numbers.value(2), 20.0);
assert_eq!(numbers.value(3), 20.0);
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 0);
assert_eq!(stats.zip_select_calls, 1);
assert_eq!(stats.partial_dense_intersections, 2);
}
#[test]
fn rangeview_lowered_text_fragment_semantics_match_scalar_semantics() {
let mut sheet = numeric_sheet(8);
sheet.columns[0].chunks[0].computed_overlay.apply_fragment(
OverlayFragment::dense_range(
0,
vec![
OverlayValue::Text(Arc::from("HeLLo")),
OverlayValue::Number(1.5),
OverlayValue::DateTime(45000.25),
OverlayValue::Duration(0.5),
OverlayValue::Boolean(true),
OverlayValue::Empty,
OverlayValue::Error(map_error_code(ExcelErrorKind::Div)),
OverlayValue::Pending,
],
)
.unwrap(),
);
reset_overlay_select_stats();
let lowered = lowered_for_range(&sheet, 0, 7);
assert_eq!(lowered.value(0), "hello");
assert_eq!(lowered.value(1), "1.5");
assert_eq!(lowered.value(2), "45000.25");
assert_eq!(lowered.value(3), "0.5");
assert_eq!(lowered.value(4), "true");
assert!(lowered.is_null(5));
assert!(lowered.is_null(6));
assert!(lowered.is_null(7));
let stats = snapshot_overlay_select_stats();
assert_eq!(stats.direct_dense_slices, 1);
assert_eq!(stats.zip_select_calls, 0);
}
#[test]
fn row_chunk_slices_shape() {
let mut b = IngestBuilder::new("S", 2, 2, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Text("a".into()), LiteralValue::Number(1.0)])
.unwrap();
b.append_row(&[LiteralValue::Text("b".into()), LiteralValue::Number(2.0)])
.unwrap();
b.append_row(&[LiteralValue::Text("c".into()), LiteralValue::Number(3.0)])
.unwrap();
let sheet = b.finish();
let rv = sheet.range_view(0, 0, 2, 1);
let slices: Vec<_> = rv.iter_row_chunks().map(|r| r.unwrap()).collect();
assert_eq!(slices.len(), 2);
assert_eq!(slices[0].row_start, 0);
assert_eq!(slices[0].row_len, 2);
assert_eq!(slices[0].cols.len(), 2);
assert_eq!(slices[1].row_start, 2);
assert_eq!(slices[1].row_len, 1);
assert_eq!(slices[1].cols.len(), 2);
}
#[test]
fn oob_columns_are_padded() {
let mut b = IngestBuilder::new("S", 2, 2, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Number(1.0), LiteralValue::Text("a".into())])
.unwrap();
b.append_row(&[LiteralValue::Number(2.0), LiteralValue::Text("b".into())])
.unwrap();
let sheet = b.finish();
let rv = sheet.range_view(0, 0, 1, 2);
assert_eq!(rv.dims(), (2, 3));
let slices: Vec<_> = rv.iter_row_chunks().map(|r| r.unwrap()).collect();
assert!(!slices.is_empty());
for cs in &slices {
assert_eq!(cs.cols.len(), 3);
}
for res in rv.numbers_slices() {
let (_rs, _rl, cols) = res.unwrap();
assert_eq!(cols.len(), 3);
}
for res in rv.booleans_slices() {
let (_rs, _rl, cols) = res.unwrap();
assert_eq!(cols.len(), 3);
}
for res in rv.text_slices() {
let (_rs, _rl, cols) = res.unwrap();
assert_eq!(cols.len(), 3);
}
for res in rv.errors_slices() {
let (_rs, _rl, cols) = res.unwrap();
assert_eq!(cols.len(), 3);
}
for res in rv.lowered_text_slices() {
let (_rs, _rl, cols) = res.unwrap();
assert_eq!(cols.len(), 3);
}
}
#[test]
fn reversed_range_is_empty() {
let mut b = IngestBuilder::new("S", 1, 4, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Number(1.0)]).unwrap();
b.append_row(&[LiteralValue::Number(2.0)]).unwrap();
let sheet = b.finish();
let rv = sheet.range_view(3, 0, 1, 0); assert_eq!(rv.dims(), (0, 0));
assert!(rv.iter_row_chunks().next().is_none());
assert_eq!(rv.get_cell(0, 0), LiteralValue::Empty);
}
#[test]
fn chunk_alignment_invariant() {
let mut b = IngestBuilder::new("S", 3, 2, crate::engine::DateSystem::Excel1900);
for r in 0..5 {
b.append_row(&[
LiteralValue::Number(r as f64),
LiteralValue::Text(format!("{r}")),
if r % 2 == 0 {
LiteralValue::Empty
} else {
LiteralValue::Boolean(true)
},
])
.unwrap();
}
let sheet = b.finish();
assert_eq!(sheet.chunk_starts, vec![0, 2, 4]);
let lens0: Vec<usize> = sheet.columns[0]
.chunks
.iter()
.map(|ch| ch.type_tag.len())
.collect();
for col in &sheet.columns[1..] {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, lens0);
}
}
#[test]
fn chunking_splits_rows() {
let mut b = IngestBuilder::new("S", 2, 2, crate::engine::DateSystem::Excel1900);
let rows = vec![
vec![LiteralValue::Number(1.0), LiteralValue::Text("a".into())],
vec![LiteralValue::Empty, LiteralValue::Text("b".into())],
vec![LiteralValue::Boolean(true), LiteralValue::Empty],
];
for r in rows {
b.append_row(&r).unwrap();
}
let sheet = b.finish();
assert_eq!(sheet.columns[0].chunks.len(), 2);
assert_eq!(sheet.columns[1].chunks.len(), 2);
assert_eq!(sheet.columns[0].chunks[0].numbers_or_null().len(), 2);
assert_eq!(sheet.columns[0].chunks[1].numbers_or_null().len(), 1);
}
#[test]
fn pending_is_not_error() {
let mut b = IngestBuilder::new("S", 1, 8, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Pending]).unwrap();
let sheet = b.finish();
let ch = &sheet.columns[0].chunks[0];
assert_eq!(ch.type_tag.values()[0], super::TypeTag::Pending as u8);
let errs = ch.errors_or_null();
assert_eq!(errs.null_count(), 1);
}
#[test]
fn all_null_numeric_lane_uses_null_array() {
let mut b = IngestBuilder::new("S", 1, 16, crate::engine::DateSystem::Excel1900);
b.append_row(&[LiteralValue::Text("a".into())]).unwrap();
b.append_row(&[LiteralValue::Text("".into())]).unwrap();
b.append_row(&[LiteralValue::Text("b".into())]).unwrap();
let sheet = b.finish();
let ch = &sheet.columns[0].chunks[0];
let nums = ch.numbers_or_null();
assert_eq!(nums.len(), 3);
assert_eq!(nums.null_count(), 3);
assert_eq!(nums.data_type(), &DataType::Float64);
}
#[test]
fn row_insert_delete_across_chunk_boundaries_with_overlays() {
let mut b = IngestBuilder::new("S", 1, 4, crate::engine::DateSystem::Excel1900);
for _ in 0..10 {
b.append_row(&[LiteralValue::Empty]).unwrap();
}
let mut sheet = b.finish();
{
let (c0, o0) = sheet.chunk_of_row(3).unwrap();
sheet.columns[0].chunks[c0]
.overlay
.set(o0, OverlayValue::Number(30.0));
let (c1, o1) = sheet.chunk_of_row(4).unwrap();
sheet.columns[0].chunks[c1]
.overlay
.set(o1, OverlayValue::Number(40.0));
}
sheet.insert_rows(4, 2);
assert_eq!(sheet.nrows, 12);
let av = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av.get_cell(3, 0), LiteralValue::Number(30.0));
assert_eq!(av.get_cell(4, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(6, 0), LiteralValue::Number(40.0));
sheet.delete_rows(3, 3);
assert_eq!(sheet.nrows, 9);
let av2 = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av2.get_cell(3, 0), LiteralValue::Number(40.0));
let lens0: Vec<usize> = sheet.columns[0]
.chunks
.iter()
.map(|ch| ch.type_tag.len())
.collect();
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, lens0);
}
assert!(sheet.chunk_starts.windows(2).all(|w| w[0] < w[1]));
let last_start = *sheet.chunk_starts.last().unwrap_or(&0);
let last_len = sheet.columns[0]
.chunks
.last()
.map(|c| c.type_tag.len())
.unwrap_or(0);
assert_eq!(last_start + last_len, sheet.nrows as usize);
}
#[test]
fn row_insert_delete_preserves_user_dense_fragments() {
let mut b = IngestBuilder::new("S", 1, 4, crate::engine::DateSystem::Excel1900);
for _ in 0..10 {
b.append_row(&[LiteralValue::Empty]).unwrap();
}
let mut sheet = b.finish();
let (ch_idx, off) = sheet.chunk_of_row(1).unwrap();
sheet.columns[0]
.chunk_mut(ch_idx)
.unwrap()
.overlay
.apply_fragment(
OverlayFragment::dense_range(
off,
vec![
OverlayValue::Number(10.0),
OverlayValue::Number(20.0),
OverlayValue::Number(30.0),
],
)
.unwrap(),
);
let before = column_overlay_stats(&sheet, 0, false);
assert_eq!(before.dense_fragments, 1);
assert_eq!(before.sparse_fragments, 0);
assert_column_overlays_normalized(&sheet, 0);
sheet.insert_rows(2, 2);
assert_eq!(sheet.nrows, 12);
let av = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av.get_cell(1, 0), LiteralValue::Number(10.0));
assert_eq!(av.get_cell(2, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(3, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(4, 0), LiteralValue::Number(20.0));
assert_eq!(av.get_cell(5, 0), LiteralValue::Number(30.0));
let after_insert = column_overlay_stats(&sheet, 0, false);
assert_eq!(after_insert.sparse_fragments, 0);
assert!(after_insert.dense_fragments >= 2);
assert_column_overlays_normalized(&sheet, 0);
sheet.delete_rows(2, 2);
assert_eq!(sheet.nrows, 10);
let av = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av.get_cell(1, 0), LiteralValue::Number(10.0));
assert_eq!(av.get_cell(2, 0), LiteralValue::Number(20.0));
assert_eq!(av.get_cell(3, 0), LiteralValue::Number(30.0));
let after_delete = column_overlay_stats(&sheet, 0, false);
assert_eq!(after_delete.sparse_fragments, 0);
assert!(after_delete.dense_fragments >= 1);
assert_column_overlays_normalized(&sheet, 0);
}
#[test]
fn row_insert_delete_preserves_computed_empty_run_fragments() {
let mut b = IngestBuilder::new("S", 1, 4, crate::engine::DateSystem::Excel1900);
for row in 0..8 {
b.append_row(&[LiteralValue::Number((row + 1) as f64)])
.unwrap();
}
let mut sheet = b.finish();
let (ch_idx, off) = sheet.chunk_of_row(1).unwrap();
sheet.columns[0]
.chunk_mut(ch_idx)
.unwrap()
.computed_overlay
.apply_fragment(
OverlayFragment::run_range(
off,
vec![
OverlayValue::Empty,
OverlayValue::Empty,
OverlayValue::Empty,
],
)
.unwrap(),
);
let before = column_overlay_stats(&sheet, 0, true);
assert_eq!(before.run_fragments, 1);
assert_eq!(before.sparse_fragments, 0);
assert_column_overlays_normalized(&sheet, 0);
sheet.insert_rows(2, 1);
assert_eq!(sheet.nrows, 9);
let av = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av.get_cell(1, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(2, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(3, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(4, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(5, 0), LiteralValue::Number(5.0));
let after_insert = column_overlay_stats(&sheet, 0, true);
assert_eq!(after_insert.sparse_fragments, 0);
assert!(after_insert.run_fragments >= 2);
assert_column_overlays_normalized(&sheet, 0);
sheet.delete_rows(2, 1);
assert_eq!(sheet.nrows, 8);
let av = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av.get_cell(1, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(2, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(3, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(4, 0), LiteralValue::Number(5.0));
let after_delete = column_overlay_stats(&sheet, 0, true);
assert_eq!(after_delete.sparse_fragments, 0);
assert!(after_delete.run_fragments >= 1);
assert_column_overlays_normalized(&sheet, 0);
}
#[test]
fn column_insert_delete_retains_chunk_alignment() {
let mut b = IngestBuilder::new("S", 3, 3, crate::engine::DateSystem::Excel1900);
for _ in 0..5 {
b.append_row(&[
LiteralValue::Empty,
LiteralValue::Empty,
LiteralValue::Empty,
])
.unwrap();
}
let mut sheet = b.finish();
let ref_lens: Vec<usize> = sheet.columns[0]
.chunks
.iter()
.map(|ch| ch.type_tag.len())
.collect();
sheet.insert_columns(1, 2);
assert_eq!(sheet.columns.len(), 5);
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, ref_lens);
}
let starts_before = sheet.chunk_starts.clone();
sheet.delete_columns(2, 2);
assert_eq!(sheet.columns.len(), 3);
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, ref_lens);
}
assert_eq!(sheet.chunk_starts, starts_before);
}
#[test]
fn multiple_adjacent_row_ops_overlay_mixed_types() {
use formualizer_common::ExcelErrorKind;
let mut b = IngestBuilder::new("S", 2, 3, crate::engine::DateSystem::Excel1900);
for _ in 0..9 {
b.append_row(&[LiteralValue::Empty, LiteralValue::Empty])
.unwrap();
}
let mut sheet = b.finish();
let set_ov = |sh: &mut ArrowSheet, row: usize, ov: OverlayValue| {
let (ch_i, off) = sh.chunk_of_row(row).unwrap();
let _ = sh.columns[0].chunks[ch_i].overlay.set(off, ov);
};
set_ov(&mut sheet, 2, OverlayValue::Number(12.5));
set_ov(&mut sheet, 3, OverlayValue::Text(Arc::from("hello")));
set_ov(&mut sheet, 5, OverlayValue::Boolean(true));
set_ov(
&mut sheet,
6,
OverlayValue::Error(map_error_code(ExcelErrorKind::Div)),
);
set_ov(&mut sheet, 8, OverlayValue::Empty);
sheet.insert_rows(3, 1);
let av1 = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av1.get_cell(2, 0), LiteralValue::Number(12.5));
assert_eq!(av1.get_cell(4, 0), LiteralValue::Text("hello".into()));
assert_eq!(av1.get_cell(6, 0), LiteralValue::Boolean(true));
match av1.get_cell(7, 0) {
LiteralValue::Error(e) => assert_eq!(e.kind, ExcelErrorKind::Div),
other => panic!("expected error at row 7, got {other:?}"),
}
assert_eq!(av1.get_cell(9, 0), LiteralValue::Empty);
sheet.insert_rows(4, 2);
let av2 = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av2.get_cell(2, 0), LiteralValue::Number(12.5));
assert_eq!(av2.get_cell(6, 0), LiteralValue::Text("hello".into()));
assert_eq!(av2.get_cell(8, 0), LiteralValue::Boolean(true));
match av2.get_cell(9, 0) {
LiteralValue::Error(e) => assert_eq!(e.kind, ExcelErrorKind::Div),
other => panic!("expected error at row 9, got {other:?}"),
}
assert_eq!(av2.get_cell(11, 0), LiteralValue::Empty);
sheet.delete_rows(6, 2);
let av3 = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 0);
assert_eq!(av3.get_cell(2, 0), LiteralValue::Number(12.5));
assert_eq!(av3.get_cell(6, 0), LiteralValue::Boolean(true));
match av3.get_cell(7, 0) {
LiteralValue::Error(e) => assert_eq!(e.kind, ExcelErrorKind::Div),
other => panic!("expected error at row 8, got {other:?}"),
}
assert_eq!(av3.get_cell(9, 0), LiteralValue::Empty);
let lens0: Vec<usize> = sheet.columns[0]
.chunks
.iter()
.map(|ch| ch.type_tag.len())
.collect();
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, lens0);
}
assert!(sheet.chunk_starts.windows(2).all(|w| w[0] < w[1]));
let last_start = *sheet.chunk_starts.last().unwrap_or(&0);
let last_len = sheet.columns[0]
.chunks
.last()
.map(|c| c.type_tag.len())
.unwrap_or(0);
assert_eq!(last_start + last_len, sheet.nrows as usize);
}
#[test]
fn multiple_adjacent_column_ops_alignment() {
let mut b = IngestBuilder::new("S", 2, 2, crate::engine::DateSystem::Excel1900);
for _ in 0..5 {
b.append_row(&[LiteralValue::Empty, LiteralValue::Empty])
.unwrap();
}
let mut sheet = b.finish();
let ref_lens: Vec<usize> = sheet.columns[0]
.chunks
.iter()
.map(|ch| ch.type_tag.len())
.collect();
sheet.insert_columns(0, 1);
sheet.insert_columns(2, 2);
assert_eq!(sheet.columns.len(), 5);
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, ref_lens);
}
let starts_before = sheet.chunk_starts.clone();
sheet.delete_columns(1, 1);
let remain = sheet.columns.len();
if remain >= 3 {
sheet.delete_columns(remain - 2, 2);
}
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, ref_lens);
}
assert_eq!(sheet.chunk_starts, starts_before);
}
#[test]
fn overlays_on_multiple_columns_row_col_ops() {
let mut b = IngestBuilder::new("S", 3, 3, crate::engine::DateSystem::Excel1900);
for _ in 0..6 {
b.append_row(&[
LiteralValue::Empty,
LiteralValue::Empty,
LiteralValue::Empty,
])
.unwrap();
}
let mut sheet = b.finish();
let set_ov = |sh: &mut ArrowSheet, col: usize, row: usize, ov: OverlayValue| {
let (ch_i, off) = sh.chunk_of_row(row).unwrap();
let _ = sh.columns[col].chunks[ch_i].overlay.set(off, ov);
};
set_ov(&mut sheet, 0, 2, OverlayValue::Number(12.0));
set_ov(&mut sheet, 1, 2, OverlayValue::Text(Arc::from("xx")));
set_ov(&mut sheet, 2, 2, OverlayValue::Boolean(true));
set_ov(&mut sheet, 0, 3, OverlayValue::Number(33.0));
set_ov(&mut sheet, 1, 3, OverlayValue::Text(Arc::from("yy")));
set_ov(&mut sheet, 2, 3, OverlayValue::Boolean(false));
sheet.insert_rows(3, 1);
let av = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 2);
assert_eq!(av.get_cell(2, 0), LiteralValue::Number(12.0));
assert_eq!(av.get_cell(2, 1), LiteralValue::Text("xx".into()));
assert_eq!(av.get_cell(2, 2), LiteralValue::Boolean(true));
assert_eq!(av.get_cell(3, 0), LiteralValue::Empty);
assert_eq!(av.get_cell(4, 0), LiteralValue::Number(33.0));
assert_eq!(av.get_cell(4, 1), LiteralValue::Text("yy".into()));
assert_eq!(av.get_cell(4, 2), LiteralValue::Boolean(false));
sheet.delete_columns(1, 1);
let av2 = sheet.range_view(0, 0, (sheet.nrows - 1) as usize, 1);
assert_eq!(av2.get_cell(2, 0), LiteralValue::Number(12.0));
assert_eq!(av2.get_cell(2, 1), LiteralValue::Boolean(true));
assert_eq!(av2.get_cell(4, 0), LiteralValue::Number(33.0));
assert_eq!(av2.get_cell(4, 1), LiteralValue::Boolean(false));
let lens0: Vec<usize> = sheet.columns[0]
.chunks
.iter()
.map(|ch| ch.type_tag.len())
.collect();
for col in &sheet.columns {
let lens: Vec<usize> = col.chunks.iter().map(|ch| ch.type_tag.len()).collect();
assert_eq!(lens, lens0);
}
}
#[test]
fn effective_slices_overlay_precedence_numbers_text() {
let mut b = IngestBuilder::new("S", 1, 3, crate::engine::DateSystem::Excel1900);
for i in 0..6 {
b.append_row(&[LiteralValue::Number((i + 1) as f64)])
.unwrap();
}
let mut sheet = b.finish();
let (c1, o1) = sheet.chunk_of_row(1).unwrap();
sheet.columns[0].chunks[c1]
.overlay
.set(o1, OverlayValue::Text(Arc::from("X")));
let (c4, o4) = sheet.chunk_of_row(4).unwrap();
sheet.columns[0].chunks[c4]
.overlay
.set(o4, OverlayValue::Number(99.0));
let av = sheet.range_view(0, 0, 5, 0);
let mut numeric: Vec<Option<f64>> = vec![None; 6];
for res in av.numbers_slices() {
let (row_start, row_len, cols) = res.unwrap();
let a = &cols[0];
for i in 0..row_len {
let idx = row_start + i;
numeric[idx] = if a.is_null(i) { None } else { Some(a.value(i)) };
}
}
assert_eq!(numeric[0], Some(1.0));
assert_eq!(numeric[1], None); assert_eq!(numeric[2], Some(3.0));
assert_eq!(numeric[3], Some(4.0));
assert_eq!(numeric[4], Some(99.0));
assert_eq!(numeric[5], Some(6.0));
let mut texts: Vec<Option<String>> = vec![None; 6];
for res in av.text_slices() {
let (row_start, row_len, cols) = res.unwrap();
let a = cols[0].as_any().downcast_ref::<StringArray>().unwrap();
for i in 0..row_len {
let idx = row_start + i;
texts[idx] = if a.is_null(i) {
None
} else {
Some(a.value(i).to_string())
};
}
}
assert_eq!(texts[1].as_deref(), Some("X"));
assert!(texts[0].is_none());
assert!(texts[2].is_none());
assert!(texts[3].is_none());
assert!(texts[4].is_none());
assert!(texts[5].is_none());
}
#[test]
fn effective_slices_overlay_precedence_booleans() {
let mut b = IngestBuilder::new("S", 1, 4, crate::engine::DateSystem::Excel1900);
for i in 0..6 {
let v = if i % 2 == 0 {
LiteralValue::Boolean(true)
} else {
LiteralValue::Boolean(false)
};
b.append_row(&[v]).unwrap();
}
let mut sheet = b.finish();
let (c1, o1) = sheet.chunk_of_row(1).unwrap();
sheet.columns[0].chunks[c1]
.overlay
.set(o1, OverlayValue::Boolean(true));
let (c2, o2) = sheet.chunk_of_row(2).unwrap();
sheet.columns[0].chunks[c2]
.overlay
.set(o2, OverlayValue::Text(Arc::from("T")));
let av = sheet.range_view(0, 0, 5, 0);
let mut bools: Vec<Option<bool>> = vec![None; 6];
for res in av.booleans_slices() {
let (row_start, row_len, cols) = res.unwrap();
let a = &cols[0];
for i in 0..row_len {
let idx = row_start + i;
bools[idx] = if a.is_null(i) { None } else { Some(a.value(i)) };
}
}
assert_eq!(bools[0], Some(true));
assert_eq!(bools[1], Some(true)); assert_eq!(bools[2], None); assert_eq!(bools[3], Some(false));
}
#[test]
fn effective_slices_overlay_precedence_errors() {
let mut b = IngestBuilder::new("S", 1, 3, crate::engine::DateSystem::Excel1900);
for i in 0..6 {
b.append_row(&[LiteralValue::Number((i + 1) as f64)])
.unwrap();
}
let mut sheet = b.finish();
let (c4, o4) = sheet.chunk_of_row(4).unwrap();
sheet.columns[0].chunks[c4]
.overlay
.set(o4, OverlayValue::Error(map_error_code(ExcelErrorKind::Div)));
let av = sheet.range_view(0, 0, 5, 0);
let mut errs: Vec<Option<u8>> = vec![None; 6];
for res in av.errors_slices() {
let (row_start, row_len, cols) = res.unwrap();
let a = &cols[0];
for i in 0..row_len {
let idx = row_start + i;
errs[idx] = if a.is_null(i) { None } else { Some(a.value(i)) };
}
}
assert_eq!(errs[4], Some(map_error_code(ExcelErrorKind::Div)));
assert!(errs[3].is_none());
}
}