nodedb_array/query/
rechunk.rs1use std::collections::BTreeMap;
16
17use crate::error::ArrayResult;
18use crate::schema::ArraySchema;
19use crate::tile::layout::tile_id_for_cell;
20use crate::tile::sparse_tile::{RowKind, SparseRow, SparseTile, SparseTileBuilder};
21use crate::types::TileId;
22use crate::types::cell_value::value::CellValue;
23use crate::types::coord::value::CoordValue;
24
25pub fn rechunk_sparse(
28 target_schema: &ArraySchema,
29 tile: &SparseTile,
30) -> ArrayResult<Vec<(TileId, SparseTile)>> {
31 let n = tile.row_count();
32 let mut live_idx = 0usize;
33 let mut buckets: BTreeMap<TileId, SparseTileBuilder<'_>> = BTreeMap::new();
34 for row in 0..n {
35 if tile.row_kind(row)? != RowKind::Live {
38 continue;
39 }
40 let attr_row = live_idx;
41 live_idx += 1;
42 let coord: Vec<CoordValue> = tile
43 .dim_dicts
44 .iter()
45 .map(|d| d.values[d.indices[row] as usize].clone())
46 .collect();
47 let attrs: Vec<CellValue> = tile
48 .attr_cols
49 .iter()
50 .map(|col| col[attr_row].clone())
51 .collect();
52 let surrogate = tile
53 .surrogates
54 .get(row)
55 .copied()
56 .unwrap_or(nodedb_types::Surrogate::ZERO);
57 let valid_from_ms = tile.valid_from_ms.get(row).copied().unwrap_or(0);
58 let valid_until_ms = tile
59 .valid_until_ms
60 .get(row)
61 .copied()
62 .unwrap_or(nodedb_types::OPEN_UPPER);
63 let tid = tile_id_for_cell(target_schema, &coord, 0)?;
64 let entry = buckets
65 .entry(tid)
66 .or_insert_with(|| SparseTileBuilder::new(target_schema));
67 entry.push_row(SparseRow {
68 coord: &coord,
69 attrs: &attrs,
70 surrogate,
71 valid_from_ms,
72 valid_until_ms,
73 kind: crate::tile::sparse_tile::RowKind::Live,
74 })?;
75 }
76 Ok(buckets.into_iter().map(|(k, v)| (k, v.build())).collect())
77}
78
79#[cfg(test)]
80mod tests {
81 use super::*;
82 use crate::schema::ArraySchemaBuilder;
83 use crate::schema::attr_spec::{AttrSpec, AttrType};
84 use crate::schema::dim_spec::{DimSpec, DimType};
85 use crate::types::domain::{Domain, DomainBound};
86
87 fn schema(extents: Vec<u64>) -> ArraySchema {
88 ArraySchemaBuilder::new("g")
89 .dim(DimSpec::new(
90 "x",
91 DimType::Int64,
92 Domain::new(DomainBound::Int64(0), DomainBound::Int64(99)),
93 ))
94 .dim(DimSpec::new(
95 "y",
96 DimType::Int64,
97 Domain::new(DomainBound::Int64(0), DomainBound::Int64(99)),
98 ))
99 .attr(AttrSpec::new("v", AttrType::Int64, true))
100 .tile_extents(extents)
101 .build()
102 .unwrap()
103 }
104
105 #[test]
106 fn rechunk_splits_into_smaller_tiles() {
107 let src = schema(vec![100, 100]);
108 let dst = schema(vec![10, 10]);
109 let mut b = SparseTileBuilder::new(&src);
110 b.push(
111 &[CoordValue::Int64(5), CoordValue::Int64(5)],
112 &[CellValue::Int64(1)],
113 )
114 .unwrap();
115 b.push(
116 &[CoordValue::Int64(50), CoordValue::Int64(50)],
117 &[CellValue::Int64(2)],
118 )
119 .unwrap();
120 let big = b.build();
121 let out = rechunk_sparse(&dst, &big).unwrap();
122 assert_eq!(out.len(), 2);
123 for (_, t) in &out {
124 assert_eq!(t.nnz(), 1);
125 }
126 }
127
128 #[test]
129 fn rechunk_preserves_total_cells() {
130 let src = schema(vec![16, 16]);
131 let dst = schema(vec![4, 4]);
132 let mut b = SparseTileBuilder::new(&src);
133 for i in 0..8i64 {
134 b.push(
135 &[CoordValue::Int64(i), CoordValue::Int64(i)],
136 &[CellValue::Int64(i)],
137 )
138 .unwrap();
139 }
140 let big = b.build();
141 let out = rechunk_sparse(&dst, &big).unwrap();
142 let total: u32 = out.iter().map(|(_, t)| t.nnz()).sum();
143 assert_eq!(total, 8);
144 }
145}