Skip to main content

nodedb_array/tile/
promotion.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Sparse → dense promotion at fill ratio > [`DENSE_PROMOTION_THRESHOLD`].
4//!
5//! The threshold is fixed in code rather than in the schema because
6//! it's a storage-level decision, not a workload one — at fill ratios
7//! above 70%, the per-cell overhead of coordinate columns exceeds the
8//! cost of storing nulls densely.
9
10use super::dense_tile::{DenseTile, cells_per_tile};
11use super::mbr::MbrBuilder;
12use super::sparse_tile::SparseTile;
13use crate::error::{ArrayError, ArrayResult};
14use crate::schema::ArraySchema;
15use crate::types::cell_value::value::CellValue;
16
17/// Fill ratio above which a sparse tile is rewritten into a dense
18/// tile (auto-promotion threshold).
19pub const DENSE_PROMOTION_THRESHOLD: f64 = 0.7;
20
21/// True if `nnz / cells_per_tile > threshold`.
22pub fn should_promote_to_dense(tile: &SparseTile, schema: &ArraySchema) -> bool {
23    let total = cells_per_tile(&schema.tile_extents);
24    if total == 0 {
25        return false;
26    }
27    (tile.nnz() as f64) / (total as f64) > DENSE_PROMOTION_THRESHOLD
28}
29
30/// Convert a sparse tile to a dense tile by materialising every cell.
31///
32/// Cells absent in the sparse payload become [`CellValue::Null`]. The
33/// caller is responsible for the integer-only-dim precondition (dense
34/// indexing relies on integer cell offsets) — this is enforced
35/// because non-`Int64`/`TimestampMs` dims do not have well-defined
36/// `tile_extent` semantics in dense layout.
37pub fn sparse_to_dense(tile: &SparseTile, schema: &ArraySchema) -> ArrayResult<DenseTile> {
38    use crate::schema::dim_spec::DimType;
39    use crate::types::coord::value::CoordValue;
40    for d in &schema.dims {
41        if !matches!(d.dtype, DimType::Int64 | DimType::TimestampMs) {
42            return Err(ArrayError::InvalidSchema {
43                array: schema.name.clone(),
44                detail: format!(
45                    "dense promotion requires integer dims; '{}' is {:?}",
46                    d.name, d.dtype
47                ),
48            });
49        }
50    }
51    let mut dense = DenseTile::empty(schema);
52    let mut mbr = MbrBuilder::new(schema.arity(), schema.attrs.len());
53    let n_rows = tile.nnz() as usize;
54    for row in 0..n_rows {
55        // Reconstruct row's coord from the dim dictionaries.
56        let coord: Vec<CoordValue> = schema
57            .dims
58            .iter()
59            .enumerate()
60            .map(|(i, _)| {
61                let dict = &tile.dim_dicts[i];
62                let idx = dict.indices[row] as usize;
63                dict.values[idx].clone()
64            })
65            .collect();
66        let attrs: Vec<CellValue> = (0..schema.attrs.len())
67            .map(|i| tile.attr_cols[i][row].clone())
68            .collect();
69        let flat = flat_index_for_coord(schema, &coord)?;
70        for (i, a) in attrs.iter().enumerate() {
71            dense.attr_cols[i][flat] = a.clone();
72        }
73        mbr.fold(&coord, &attrs);
74    }
75    dense.mbr = mbr.build();
76    Ok(dense)
77}
78
79/// Row-major flat index `((c0 - lo0) * extent1 * extent2 ...) + ...`.
80fn flat_index_for_coord(
81    schema: &ArraySchema,
82    coord: &[crate::types::coord::value::CoordValue],
83) -> ArrayResult<usize> {
84    use crate::schema::dim_spec::DimType;
85    use crate::types::coord::value::CoordValue;
86    use crate::types::domain::DomainBound;
87    let mut flat: usize = 0;
88    for (i, dim) in schema.dims.iter().enumerate() {
89        let extent = schema.tile_extents[i] as usize;
90        let lo = match (&dim.dtype, &dim.domain.lo) {
91            (DimType::Int64, DomainBound::Int64(v))
92            | (DimType::TimestampMs, DomainBound::TimestampMs(v)) => *v,
93            _ => 0,
94        };
95        let off = match coord.get(i) {
96            Some(CoordValue::Int64(v)) | Some(CoordValue::TimestampMs(v)) => {
97                ((*v - lo) as usize) % extent
98            }
99            _ => {
100                return Err(ArrayError::CoordOutOfDomain {
101                    array: schema.name.clone(),
102                    dim: dim.name.clone(),
103                    detail: "non-integer coord in dense promotion".to_string(),
104                });
105            }
106        };
107        flat = flat * extent + off;
108    }
109    Ok(flat)
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115    use crate::schema::ArraySchemaBuilder;
116    use crate::schema::attr_spec::{AttrSpec, AttrType};
117    use crate::schema::dim_spec::{DimSpec, DimType};
118    use crate::tile::sparse_tile::SparseTileBuilder;
119    use crate::types::coord::value::CoordValue;
120    use crate::types::domain::{Domain, DomainBound};
121
122    fn schema_2x2() -> ArraySchema {
123        ArraySchemaBuilder::new("g")
124            .dim(DimSpec::new(
125                "x",
126                DimType::Int64,
127                Domain::new(DomainBound::Int64(0), DomainBound::Int64(1)),
128            ))
129            .dim(DimSpec::new(
130                "y",
131                DimType::Int64,
132                Domain::new(DomainBound::Int64(0), DomainBound::Int64(1)),
133            ))
134            .attr(AttrSpec::new("v", AttrType::Int64, true))
135            .tile_extents(vec![2, 2])
136            .build()
137            .unwrap()
138    }
139
140    fn build_tile(s: &ArraySchema, n: usize) -> SparseTile {
141        let mut b = SparseTileBuilder::new(s);
142        let cells = [(0i64, 0i64, 10i64), (0, 1, 20), (1, 0, 30), (1, 1, 40)];
143        for (x, y, v) in cells.iter().take(n) {
144            b.push(
145                &[CoordValue::Int64(*x), CoordValue::Int64(*y)],
146                &[CellValue::Int64(*v)],
147            )
148            .unwrap();
149        }
150        b.build()
151    }
152
153    #[test]
154    fn promotes_above_threshold() {
155        let s = schema_2x2();
156        let t = build_tile(&s, 3); // 3/4 = 0.75 > 0.7
157        assert!(should_promote_to_dense(&t, &s));
158    }
159
160    #[test]
161    fn does_not_promote_at_or_below_threshold() {
162        let s = schema_2x2();
163        let t = build_tile(&s, 2); // 2/4 = 0.5
164        assert!(!should_promote_to_dense(&t, &s));
165    }
166
167    #[test]
168    fn dense_conversion_places_cells_at_flat_index() {
169        let s = schema_2x2();
170        let t = build_tile(&s, 4);
171        let d = sparse_to_dense(&t, &s).unwrap();
172        // Row-major (x, y) for 2x2 with extents [2, 2]: (0,0)=0,
173        // (0,1)=1, (1,0)=2, (1,1)=3.
174        assert_eq!(d.attr_cols[0][0], CellValue::Int64(10));
175        assert_eq!(d.attr_cols[0][1], CellValue::Int64(20));
176        assert_eq!(d.attr_cols[0][2], CellValue::Int64(30));
177        assert_eq!(d.attr_cols[0][3], CellValue::Int64(40));
178        assert_eq!(d.mbr.nnz, 4);
179    }
180
181    #[test]
182    fn dense_conversion_leaves_absent_cells_null() {
183        let s = schema_2x2();
184        let t = build_tile(&s, 2);
185        let d = sparse_to_dense(&t, &s).unwrap();
186        // Two cells populated, two should remain Null.
187        let nulls = d.attr_cols[0]
188            .iter()
189            .filter(|c| matches!(c, CellValue::Null))
190            .count();
191        assert_eq!(nulls, 2);
192    }
193}