Skip to main content

nodedb_array/codec/
limits.rs

1// SPDX-License-Identifier: Apache-2.0
2
3// Defensive size caps for codec decoders.
4//
5// Every length field read from segment bytes drives an allocation or a
6// loop. A corrupted segment can declare values up to `u32::MAX` and trick
7// the decoder into allocating tens of GB or running for billions of
8// iterations. These caps reject impossibly large values as
9// `SegmentCorruption` errors before the allocation happens.
10//
11// The caps are sized comfortably above legitimate workloads:
12//   * Memtable flush threshold is 4096 cells; ~1M cells per tile is
13//     already 250x the typical case.
14//   * Schemas have <=64 dims and <=256 attrs in practice.
15//   * Dictionary cardinality is bounded by cell count.
16//
17// If a real workload ever needs more, the cap is raised in this one
18// place and the rationale stays auditable.
19
20use crate::error::{ArrayError, ArrayResult};
21
22/// Maximum cells in a single sparse tile.
23pub const MAX_CELLS_PER_TILE: usize = 8_000_000;
24
25/// Maximum coordinate axes (dimensions) per tile.
26pub const MAX_AXES_PER_TILE: usize = 1_024;
27
28/// Maximum attribute columns per tile.
29pub const MAX_ATTRS_PER_TILE: usize = 1_024;
30
31/// Maximum distinct values in a per-axis dictionary.
32pub const MAX_DICT_CARDINALITY: usize = 8_000_000;
33
34/// Maximum runs in an RLE-encoded index stream.
35pub const MAX_RLE_RUNS: usize = 8_000_000;
36
37/// Maximum length of a single RLE run (bounded by tile cell count).
38pub const MAX_RLE_RUN_LEN: usize = MAX_CELLS_PER_TILE;
39
40/// Maximum entries in any column codec output (timestamps, surrogates, attrs).
41pub const MAX_COLUMN_ENTRIES: usize = MAX_CELLS_PER_TILE;
42
43/// Reject a length value that exceeds `cap`. Used everywhere a `usize`
44/// length is decoded from segment bytes.
45pub fn check_decoded_size(value: usize, cap: usize, what: &str) -> ArrayResult<()> {
46    if value > cap {
47        return Err(ArrayError::SegmentCorruption {
48            detail: format!(
49                "decoded {what} = {value} exceeds hard cap {cap} (segment likely corrupt)"
50            ),
51        });
52    }
53    Ok(())
54}