Skip to main content

lsm_tree/table/block/
identity.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2026-present, Structured World Foundation
3
4//! Identity context threaded through the Block I/O API.
5//!
6//! Every call to [`crate::table::block::Block::write_into`] /
7//! [`crate::table::block::Block::from_reader`] /
8//! [`crate::table::block::Block::from_file`] carries a
9//! `BlockIdentity` describing which
10//! block, of which table, with which compression context. The
11//! Block layer uses this to construct AAD (Additional
12//! Authenticated Data) for AEAD encryption — see the AAD-bound
13//! wire format spec for the cryptographic role each field plays.
14//!
15//! **Why a context struct vs. inline arguments.** The natural
16//! alternative — pass `aad: &[u8]` directly to the Block API — got
17//! a previous attempt into trouble: callers wrote `aad: &[]`
18//! everywhere because composing the right AAD bytes at each call
19//! site is fiddly and the type system couldn't enforce
20//! correctness. With `BlockIdentity`, every call site contributes
21//! its OWN local context (the writer/scanner/reader already knows
22//! its table id, codec context, etc.) and the Block layer
23//! computes AAD once, internally. Adding a new AAD-relevant field
24//! later means adding it to `BlockIdentity` rather than chasing
25//! down 90+ call sites.
26//!
27//! **Field requirements.** Production call sites SHOULD populate
28//! every field with the real value from their local context. Test
29//! call sites that don't exercise AAD-sensitive paths may use
30//! `BlockIdentity::for_test` which defaults `dict_id` and
31//! `window_log` to zero.
32//!
33//! **Allowed zero exceptions in production code** (each individually
34//! documented at the call site):
35//!
36//! - `table_id = 0` is allowed when reading a META block that
37//!   itself CARRIES the `table_id` field — there's no way to
38//!   know the id before the block is parsed (chicken-and-egg).
39//!   Cross-store substitution is still prevented because the
40//!   meta payload's own id field is part of the verified body.
41//!
42//! **Neither block position nor tree id is part of the identity.**
43//! AAD binds `table_id` plus the codec context, but never a per-block
44//! byte offset nor the owning tree id. Offset-independent AAD lets a
45//! writer encrypt every block of a table in parallel (the on-disk
46//! offset isn't known until placement). The tree id is a
47//! process-ephemeral counter, not durable across reopen, so binding it
48//! would fail AEAD verify after a restart; cross-tree substitution is
49//! instead prevented by per-tree key isolation (a tree's blocks decrypt
50//! only under its own key). The cost of dropping the offset is that two
51//! blocks of the SAME table are interchangeable at the AEAD layer;
52//! block-position integrity is supplied one layer up by the
53//! authenticated index (key-range -> offset) plus the structural file
54//! layout, not by per-block AEAD.
55
56use crate::table::block::BlockType;
57
58/// Identifies a block for encryption AAD and audit purposes.
59///
60/// Carried through the Block I/O API instead of separate
61/// `block_type` / `aad: &[u8]` arguments — see the module
62/// docstring for the rationale.
63#[derive(Clone, Copy, Debug)]
64pub struct BlockIdentity {
65    /// Identifier of the owning store unit — for SST blocks this is
66    /// the per-tree [`crate::TableId`] (a `u64` alias); for blob
67    /// files it is the `crate::vlog::BlobFileId` (also a `u64`
68    /// alias). Bound into the AAD so a block cannot be substituted for
69    /// one from a different table. The owning tree id is deliberately
70    /// NOT part of the identity (it is process-ephemeral, not durable
71    /// across reopen); cross-tree substitution is prevented by per-tree
72    /// key isolation instead. See the module docstring.
73    pub table_id: u64,
74
75    /// Whether this is a Data, Filter, Index, or Meta block.
76    /// Was previously a separate `block_type: BlockType`
77    /// argument on the Block API; now lives here so the call site
78    /// only computes one context value.
79    pub block_type: BlockType,
80
81    /// Zstd dictionary id used for this block, or `0` if no
82    /// dictionary applies. Binds the block to a specific
83    /// dictionary version so that decompressing with a different
84    /// dictionary (whether by mistake or by attack) surfaces as
85    /// an AEAD authentication failure rather than as silently
86    /// wrong plaintext.
87    pub dict_id: u32,
88
89    /// Zstd `window_log` advertised in the frame header, or `0` if
90    /// no zstd compression applies. Binds the block to a
91    /// specific decompression-memory budget; attempts to substitute
92    /// a block with a different `window_log` (a known "window bomb"
93    /// vector) fail AEAD authentication.
94    pub window_log: u8,
95}
96
97impl BlockIdentity {
98    /// Test-only constructor with conservative defaults for the
99    /// compression-context fields (`dict_id = 0`, `window_log = 0`).
100    /// Use this in test fixtures that don't exercise zstd
101    /// dictionary or window-budget paths; in production code,
102    /// populate every field explicitly from the local context.
103    #[cfg(test)]
104    #[must_use]
105    pub(crate) const fn for_test(table_id: u64, block_type: BlockType) -> Self {
106        Self {
107            table_id,
108            block_type,
109            dict_id: 0,
110            window_log: 0,
111        }
112    }
113}