Skip to main content

nodedb_array/query/
rechunk.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Re-bucket cells into a different tile-extent layout.
4//!
5//! The same schema may be re-tiled (different `tile_extents`) for a
6//! query that needs a different access pattern — e.g. a slice along
7//! one axis benefits from longer extents on that axis. Rechunk takes
8//! a single source tile and emits the (possibly multiple) target
9//! tiles that contain its cells.
10//!
11//! Both source and target schemas must share name, dim arity, attrs
12//! and dim domains; only `tile_extents` may differ. The caller is
13//! responsible for that constraint — this is a pure re-bucket.
14
15use std::collections::BTreeMap;
16
17use crate::error::ArrayResult;
18use crate::schema::ArraySchema;
19use crate::tile::layout::tile_id_for_cell;
20use crate::tile::sparse_tile::{RowKind, SparseRow, SparseTile, SparseTileBuilder};
21use crate::types::TileId;
22use crate::types::cell_value::value::CellValue;
23use crate::types::coord::value::CoordValue;
24
25/// Re-bucket every cell in `tile` according to `target_schema.tile_extents`.
26/// Returns one entry per resulting target tile, ordered by [`TileId`].
27pub fn rechunk_sparse(
28    target_schema: &ArraySchema,
29    tile: &SparseTile,
30) -> ArrayResult<Vec<(TileId, SparseTile)>> {
31    let n = tile.row_count();
32    let mut live_idx = 0usize;
33    let mut buckets: BTreeMap<TileId, SparseTileBuilder<'_>> = BTreeMap::new();
34    for row in 0..n {
35        // Sentinel rows are not re-bucketed; rechunk is a purely spatial
36        // operation on live cell data.
37        if tile.row_kind(row)? != RowKind::Live {
38            continue;
39        }
40        let attr_row = live_idx;
41        live_idx += 1;
42        let coord: Vec<CoordValue> = tile
43            .dim_dicts
44            .iter()
45            .map(|d| d.values[d.indices[row] as usize].clone())
46            .collect();
47        let attrs: Vec<CellValue> = tile
48            .attr_cols
49            .iter()
50            .map(|col| col[attr_row].clone())
51            .collect();
52        let surrogate = tile
53            .surrogates
54            .get(row)
55            .copied()
56            .unwrap_or(nodedb_types::Surrogate::ZERO);
57        let valid_from_ms = tile.valid_from_ms.get(row).copied().unwrap_or(0);
58        let valid_until_ms = tile
59            .valid_until_ms
60            .get(row)
61            .copied()
62            .unwrap_or(nodedb_types::OPEN_UPPER);
63        let tid = tile_id_for_cell(target_schema, &coord, 0)?;
64        let entry = buckets
65            .entry(tid)
66            .or_insert_with(|| SparseTileBuilder::new(target_schema));
67        entry.push_row(SparseRow {
68            coord: &coord,
69            attrs: &attrs,
70            surrogate,
71            valid_from_ms,
72            valid_until_ms,
73            kind: crate::tile::sparse_tile::RowKind::Live,
74        })?;
75    }
76    Ok(buckets.into_iter().map(|(k, v)| (k, v.build())).collect())
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82    use crate::schema::ArraySchemaBuilder;
83    use crate::schema::attr_spec::{AttrSpec, AttrType};
84    use crate::schema::dim_spec::{DimSpec, DimType};
85    use crate::types::domain::{Domain, DomainBound};
86
87    fn schema(extents: Vec<u64>) -> ArraySchema {
88        ArraySchemaBuilder::new("g")
89            .dim(DimSpec::new(
90                "x",
91                DimType::Int64,
92                Domain::new(DomainBound::Int64(0), DomainBound::Int64(99)),
93            ))
94            .dim(DimSpec::new(
95                "y",
96                DimType::Int64,
97                Domain::new(DomainBound::Int64(0), DomainBound::Int64(99)),
98            ))
99            .attr(AttrSpec::new("v", AttrType::Int64, true))
100            .tile_extents(extents)
101            .build()
102            .unwrap()
103    }
104
105    #[test]
106    fn rechunk_splits_into_smaller_tiles() {
107        let src = schema(vec![100, 100]);
108        let dst = schema(vec![10, 10]);
109        let mut b = SparseTileBuilder::new(&src);
110        b.push(
111            &[CoordValue::Int64(5), CoordValue::Int64(5)],
112            &[CellValue::Int64(1)],
113        )
114        .unwrap();
115        b.push(
116            &[CoordValue::Int64(50), CoordValue::Int64(50)],
117            &[CellValue::Int64(2)],
118        )
119        .unwrap();
120        let big = b.build();
121        let out = rechunk_sparse(&dst, &big).unwrap();
122        assert_eq!(out.len(), 2);
123        for (_, t) in &out {
124            assert_eq!(t.nnz(), 1);
125        }
126    }
127
128    #[test]
129    fn rechunk_preserves_total_cells() {
130        let src = schema(vec![16, 16]);
131        let dst = schema(vec![4, 4]);
132        let mut b = SparseTileBuilder::new(&src);
133        for i in 0..8i64 {
134            b.push(
135                &[CoordValue::Int64(i), CoordValue::Int64(i)],
136                &[CellValue::Int64(i)],
137            )
138            .unwrap();
139        }
140        let big = b.build();
141        let out = rechunk_sparse(&dst, &big).unwrap();
142        let total: u32 = out.iter().map(|(_, t)| t.nnz()).sum();
143        assert_eq!(total, 8);
144    }
145}