1use crate::error::{Error, Result};
12use crate::storage::Storage;
13
14#[derive(Debug, Clone)]
16pub struct ChunkEntry {
17 pub address: u64,
19 pub size: u64,
21 pub filter_mask: u32,
24 pub offsets: Vec<u64>,
26}
27
28fn chunk_overlaps_bounds(
29 offsets: &[u64],
30 chunk_dims: &[u32],
31 chunk_bounds: Option<(&[u64], &[u64])>,
32) -> bool {
33 let Some((first_chunk, last_chunk)) = chunk_bounds else {
34 return true;
35 };
36
37 offsets.iter().enumerate().all(|(dim, offset)| {
38 let chunk_index = *offset / u64::from(chunk_dims[dim]);
39 chunk_index >= first_chunk[dim] && chunk_index <= last_chunk[dim]
40 })
41}
42
43fn checked_mul_u64(lhs: u64, rhs: u64, context: &str) -> Result<u64> {
44 lhs.checked_mul(rhs)
45 .ok_or_else(|| Error::InvalidData(format!("{context} overflows u64")))
46}
47
48fn checked_add_u64(lhs: u64, rhs: u64, context: &str) -> Result<u64> {
49 lhs.checked_add(rhs)
50 .ok_or_else(|| Error::InvalidData(format!("{context} overflows u64")))
51}
52
53fn checked_usize(value: u64, context: &str) -> Result<usize> {
54 usize::try_from(value).map_err(|_| {
55 Error::InvalidData(format!(
56 "{context} value {value} exceeds platform usize capacity"
57 ))
58 })
59}
60
61fn chunk_linear_index(chunk_indices: &[u64], chunks_per_dim: &[u64]) -> Result<u64> {
62 let mut linear = 0u64;
63 for (dim, chunk_index) in chunk_indices.iter().enumerate() {
64 linear = checked_mul_u64(linear, chunks_per_dim[dim], "implicit chunk linear index")?;
65 linear = checked_add_u64(linear, *chunk_index, "implicit chunk linear index")?;
66 }
67 Ok(linear)
68}
69
70pub fn collect_v2_chunk_entries(
72 data: &[u8],
73 btree_address: u64,
74 offset_size: u8,
75 length_size: u8,
76 ndim: u32,
77 chunk_dims: &[u32],
78 chunk_bounds: Option<(&[u64], &[u64])>,
79) -> Result<Vec<ChunkEntry>> {
80 let mut cursor = crate::io::Cursor::new(data);
81 cursor.set_position(btree_address);
82 let header = crate::btree_v2::BTreeV2Header::parse(&mut cursor, offset_size, length_size)?;
83
84 let records = crate::btree_v2::collect_btree_v2_records(
85 data,
86 &header,
87 offset_size,
88 length_size,
89 Some(ndim),
90 chunk_dims,
91 chunk_bounds,
92 )?;
93
94 let mut entries = Vec::with_capacity(records.len());
95 for record in records {
96 match record {
97 crate::btree_v2::BTreeV2Record::ChunkedNonFiltered { address, offsets }
98 if chunk_overlaps_bounds(&offsets, chunk_dims, chunk_bounds) =>
99 {
100 entries.push(ChunkEntry {
101 address,
102 size: 0, filter_mask: 0,
104 offsets,
105 });
106 }
107 crate::btree_v2::BTreeV2Record::ChunkedFiltered {
108 address,
109 chunk_size,
110 filter_mask,
111 offsets,
112 } if chunk_overlaps_bounds(&offsets, chunk_dims, chunk_bounds) => {
113 entries.push(ChunkEntry {
114 address,
115 size: chunk_size,
116 filter_mask,
117 offsets,
118 });
119 }
120 _ => {
121 }
123 }
124 }
125
126 Ok(entries)
127}
128
129pub fn collect_v2_chunk_entries_storage(
131 storage: &dyn Storage,
132 btree_address: u64,
133 offset_size: u8,
134 length_size: u8,
135 ndim: u32,
136 chunk_dims: &[u32],
137 chunk_bounds: Option<(&[u64], &[u64])>,
138) -> Result<Vec<ChunkEntry>> {
139 let header = crate::btree_v2::BTreeV2Header::parse_at_storage(
140 storage,
141 btree_address,
142 offset_size,
143 length_size,
144 )?;
145 let records = crate::btree_v2::collect_btree_v2_records_storage(
146 storage,
147 &header,
148 offset_size,
149 length_size,
150 Some(ndim),
151 chunk_dims,
152 chunk_bounds,
153 )?;
154
155 let mut entries = Vec::with_capacity(records.len());
156 for record in records {
157 match record {
158 crate::btree_v2::BTreeV2Record::ChunkedNonFiltered { address, offsets }
159 if chunk_overlaps_bounds(&offsets, chunk_dims, chunk_bounds) =>
160 {
161 entries.push(ChunkEntry {
162 address,
163 size: 0,
164 filter_mask: 0,
165 offsets,
166 });
167 }
168 crate::btree_v2::BTreeV2Record::ChunkedFiltered {
169 address,
170 chunk_size,
171 filter_mask,
172 offsets,
173 } if chunk_overlaps_bounds(&offsets, chunk_dims, chunk_bounds) => {
174 entries.push(ChunkEntry {
175 address,
176 size: chunk_size,
177 filter_mask,
178 offsets,
179 });
180 }
181 _ => {}
182 }
183 }
184
185 Ok(entries)
186}
187
188pub fn collect_implicit_chunk_entries(
193 start_address: u64,
194 dataset_shape: &[u64],
195 chunk_dims: &[u32],
196 elem_size: usize,
197 chunk_bounds: Option<(&[u64], &[u64])>,
198) -> Result<Vec<ChunkEntry>> {
199 let chunk_elements = chunk_dims.iter().try_fold(1u64, |acc, &dim| {
200 checked_mul_u64(acc, u64::from(dim), "implicit chunk element count")
201 })?;
202 let elem_size = u64::try_from(elem_size).map_err(|_| {
203 Error::InvalidData("implicit chunk element size exceeds u64 capacity".to_string())
204 })?;
205 let chunk_bytes = checked_mul_u64(chunk_elements, elem_size, "implicit chunk byte size")?;
206 let ndim = dataset_shape.len();
207
208 let mut chunks_per_dim = Vec::with_capacity(ndim);
210 for i in 0..ndim {
211 let chunk_dim = u64::from(chunk_dims[i]);
212 if chunk_dim == 0 {
213 return Err(Error::InvalidData(format!(
214 "implicit chunk dimension {i} has zero extent"
215 )));
216 }
217 chunks_per_dim.push(dataset_shape[i].div_ceil(chunk_dim));
218 }
219
220 if ndim == 0 {
221 return Ok(vec![ChunkEntry {
222 address: start_address,
223 size: chunk_bytes,
224 filter_mask: 0,
225 offsets: Vec::new(),
226 }]);
227 }
228
229 let (first_chunk, last_chunk): (Vec<u64>, Vec<u64>) = match chunk_bounds {
230 Some((first, last)) => (first.to_vec(), last.to_vec()),
231 None => (
232 vec![0u64; ndim],
233 chunks_per_dim
234 .iter()
235 .map(|count| count.saturating_sub(1))
236 .collect(),
237 ),
238 };
239
240 let mut chunk_counts = Vec::with_capacity(ndim);
241 for dim in 0..ndim {
242 let selected = last_chunk[dim]
243 .checked_sub(first_chunk[dim])
244 .and_then(|value| value.checked_add(1))
245 .ok_or_else(|| {
246 Error::InvalidData("implicit chunk selection bounds are invalid".to_string())
247 })?;
248 chunk_counts.push(selected);
249 }
250 let total_selected_chunks = chunk_counts.iter().try_fold(1u64, |acc, &count| {
251 checked_mul_u64(acc, count, "implicit selected chunk count")
252 })?;
253 let mut entries = Vec::with_capacity(checked_usize(
254 total_selected_chunks,
255 "implicit selected chunk count",
256 )?);
257 let mut chunk_indices = first_chunk.clone();
258
259 loop {
260 let chunk_idx = chunk_linear_index(&chunk_indices, &chunks_per_dim)?;
261 let offsets = chunk_indices
262 .iter()
263 .enumerate()
264 .map(|(dim, chunk_index)| {
265 checked_mul_u64(
266 *chunk_index,
267 u64::from(chunk_dims[dim]),
268 "implicit chunk offset",
269 )
270 })
271 .collect::<Result<Vec<_>>>()?;
272 let chunk_data_offset =
273 checked_mul_u64(chunk_idx, chunk_bytes, "implicit chunk byte offset")?;
274
275 entries.push(ChunkEntry {
276 address: checked_add_u64(start_address, chunk_data_offset, "implicit chunk address")?,
277 size: chunk_bytes,
278 filter_mask: 0,
279 offsets,
280 });
281
282 let mut advanced = false;
283 for dim in (0..ndim).rev() {
284 if chunk_indices[dim] < last_chunk[dim] {
285 chunk_indices[dim] += 1;
286 if dim + 1 < ndim {
287 chunk_indices[(dim + 1)..ndim].copy_from_slice(&first_chunk[(dim + 1)..ndim]);
288 }
289 advanced = true;
290 break;
291 }
292 }
293
294 if !advanced {
295 break;
296 }
297 }
298
299 Ok(entries)
300}
301
302pub fn single_chunk_entry(
306 address: u64,
307 filtered_size: u64,
308 filter_mask: u32,
309 ndim: usize,
310) -> ChunkEntry {
311 ChunkEntry {
312 address,
313 size: filtered_size,
314 filter_mask,
315 offsets: vec![0u64; ndim],
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322
323 #[test]
324 fn chunk_entry_debug_clone() {
325 let entry = ChunkEntry {
326 address: 0x1000,
327 size: 4096,
328 filter_mask: 0,
329 offsets: vec![0, 0],
330 };
331 let entry2 = entry.clone();
332 assert_eq!(entry2.address, 0x1000);
333 let _ = format!("{:?}", entry);
334 }
335
336 #[test]
337 fn implicit_chunk_entries() {
338 let entries = collect_implicit_chunk_entries(1000, &[10, 20], &[5, 10], 4, None).unwrap();
339 assert_eq!(entries.len(), 4);
341 assert_eq!(entries[0].address, 1000);
342 assert_eq!(entries[0].offsets, vec![0, 0]);
343 assert_eq!(entries[1].address, 1000 + 200); assert_eq!(entries[1].offsets, vec![0, 10]);
345 assert_eq!(entries[2].offsets, vec![5, 0]);
346 assert_eq!(entries[3].offsets, vec![5, 10]);
347 }
348
349 #[test]
350 fn implicit_chunk_entries_reject_chunk_byte_overflow() {
351 let err = collect_implicit_chunk_entries(1000, &[10, 10], &[u32::MAX, u32::MAX], 2, None)
352 .unwrap_err();
353 assert!(err.to_string().contains("implicit chunk byte size"));
354 }
355
356 #[test]
357 fn implicit_chunk_entries_reject_address_overflow() {
358 let err = collect_implicit_chunk_entries(u64::MAX, &[2], &[1], 1, Some((&[1], &[1])))
359 .unwrap_err();
360 assert!(err.to_string().contains("implicit chunk address"));
361 }
362
363 #[test]
364 fn single_chunk_entry_uses_origin_offsets() {
365 let entry = single_chunk_entry(0x2000, 8192, 0, 3);
366 assert_eq!(entry.address, 0x2000);
367 assert_eq!(entry.size, 8192);
368 assert_eq!(entry.offsets, vec![0, 0, 0]);
369 }
370}