Skip to main content

lsm_tree/
seqno.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2024-present, fjall-rs
3// Copyright (c) 2026-present, Structured World Foundation
4
5use alloc::sync::Arc;
6use core::fmt::Debug;
7use core::sync::atomic::Ordering::{AcqRel, Acquire, Release};
8
9use crate::SeqNo;
10use portable_atomic::AtomicU64;
11
12/// The maximum allowed sequence number value.
13///
14/// The MSB (`0x8000_0000_0000_0000`) is reserved for internal use by the
15/// transaction layer, so all sequence numbers must be strictly less than
16/// this boundary. Values returned by [`SequenceNumberGenerator::next`]
17/// must be at most `MAX_SEQNO - 1` (`0x7FFF_FFFF_FFFF_FFFE`), leaving
18/// room for `seqno + 1` operations used internally by the engine.
19pub const MAX_SEQNO: SeqNo = 0x7FFF_FFFF_FFFF_FFFF;
20
21/// Trait for custom sequence number generation.
22///
23/// Implementations must be thread-safe and provide atomic operations
24/// for sequence number management.
25///
26/// # Invariants
27///
28/// Implementors **must** respect the following invariants, which are
29/// relied upon by the storage engine (e.g., for MVCC and transactions):
30///
31/// - The most-significant bit (MSB) of a sequence number is reserved
32///   for internal use by the transaction layer.
33/// - All sequence numbers produced by this generator **must** therefore
34///   be strictly less than `0x8000_0000_0000_0000`.
35/// - Calls to [`SequenceNumberGenerator::next`] on a given generator
36///   instance must return monotonically increasing values (each `next`
37///   returns a value that is strictly greater than any value previously
38///   returned by `next` on the same instance) until the reserved MSB
39///   boundary is reached.
40/// - In practice, this means `next`-generated sequence numbers are
41///   unique for the lifetime of the generator (modulo wrap-around, which
42///   must not occur within the reserved MSB range).
43///
44/// Implementors are also responsible for ensuring that [`Self::get`] does not
45/// return values that violate these invariants, and that [`Self::set`] and
46/// [`Self::fetch_max`] do not violate them (e.g., by setting the counter to a
47/// value at or above `0x8000_0000_0000_0000`, or by moving the counter
48/// backwards such that subsequent calls to [`Self::next`] would observe
49/// non-monotonic sequence numbers).
50///
51/// The [`Self::set`] method is a special-case escape hatch intended for use
52/// during initialization or recovery. It may move the counter forwards
53/// or backwards and therefore **may** cause subsequent calls to
54/// [`Self::next`] to observe non-monotonic sequence numbers. This is
55/// permitted; callers are responsible for using [`Self::fetch_max`] (or other
56/// application-level logic) after `set` if they need to reestablish any
57/// monotonicity guarantees.
58///
59/// # Supertraits
60///
61/// `UnwindSafe + RefUnwindSafe` are required because generators may be
62/// captured inside `catch_unwind` (e.g., during ingestion error recovery).
63///
64/// The default implementation is [`SequenceNumberCounter`], which uses
65/// an `Arc<AtomicU64>` for lock-free atomic operations. It enforces the
66/// MSB boundary in `new`, `set`, and `fetch_max` (via assert/clamp),
67/// and panics in `next` if the counter reaches the reserved range.
68pub trait SequenceNumberGenerator:
69    Send + Sync + Debug + core::panic::UnwindSafe + core::panic::RefUnwindSafe + 'static
70{
71    /// Gets the next sequence number, atomically incrementing the counter.
72    ///
73    /// This must:
74    /// - Return a value strictly greater than any value previously returned
75    ///   by `next` on the same generator instance (monotonic increase).
76    /// - Never return a value greater than `MAX_SEQNO - 1`, leaving room
77    ///   for internal `seqno + 1` operations that must remain strictly
78    ///   less than [`MAX_SEQNO`].
79    #[must_use]
80    fn next(&self) -> SeqNo;
81
82    /// Gets the current sequence number without incrementing.
83    ///
84    /// This should be consistent with the value that will be used as the
85    /// basis for the next call to [`Self::next`], and must not itself alter the
86    /// monotonicity guarantees.
87    #[must_use]
88    fn get(&self) -> SeqNo;
89
90    /// Sets the sequence number to the given value.
91    ///
92    /// Implementations must ensure that `value` is strictly less than
93    /// `0x8000_0000_0000_0000` (the MSB is reserved).
94    ///
95    /// This method is intended for direct overrides (e.g., during
96    /// initialization or recovery) and is **not** required to preserve
97    /// monotonicity with respect to values previously returned by
98    /// [`Self::next`]. In particular, calling `set` with a value lower than a
99    /// previously returned `next` value may cause subsequent calls to
100    /// [`Self::next`] to observe non-monotonic sequence numbers. This is
101    /// allowed; callers that need to advance the sequence number in a
102    /// monotonic fashion should prefer [`Self::fetch_max`] instead of `set`.
103    fn set(&self, value: SeqNo);
104
105    /// Atomically updates the sequence number to the maximum of
106    /// the current value and the given value.
107    ///
108    /// The resulting stored value must:
109    /// - Remain strictly less than `0x8000_0000_0000_0000`.
110    /// - Preserve the monotonicity guarantees expected by [`Self::next`].
111    fn fetch_max(&self, value: SeqNo);
112}
113
114/// A shared, cloneable sequence number generator.
115pub type SharedSequenceNumberGenerator = Arc<dyn SequenceNumberGenerator>;
116
117/// Thread-safe sequence number generator
118///
119/// # Examples
120///
121/// ```
122/// # use lsm_tree::{AbstractTree, Config, SequenceNumberCounter};
123/// #
124/// # let path = tempfile::tempdir()?;
125///
126/// let seqno = SequenceNumberCounter::default();
127/// let visible_seqno = SequenceNumberCounter::default();
128///
129/// let tree = Config::new(path, seqno.clone(), visible_seqno.clone()).open()?;
130///
131/// // Do some inserts...
132/// for k in [b"a", b"b", b"c"] {
133///     let batch_seqno = seqno.next();
134///     tree.insert(k, "abc", batch_seqno);
135///     visible_seqno.fetch_max(batch_seqno + 1);
136/// }
137///
138/// // Create a batch
139/// let batch_seqno = seqno.next();
140/// tree.remove("a".as_bytes(), batch_seqno);
141/// tree.remove("b".as_bytes(), batch_seqno);
142/// tree.remove("c".as_bytes(), batch_seqno);
143/// visible_seqno.fetch_max(batch_seqno + 1);
144/// #
145/// # assert!(tree.is_empty(visible_seqno.get(), None)?);
146/// # Ok::<(), lsm_tree::Error>(())
147/// ```
148#[derive(Clone, Default, Debug)]
149pub struct SequenceNumberCounter(Arc<AtomicU64>);
150
151impl SequenceNumberCounter {
152    /// Creates a new counter, setting it to some previous value.
153    ///
154    /// # Panics
155    ///
156    /// Panics if `prev > MAX_SEQNO` (reserved MSB range).
157    ///
158    /// Note: `prev == MAX_SEQNO` is allowed but means the counter is
159    /// exhausted — any subsequent call to [`next()`](Self::next) will panic.
160    #[must_use]
161    pub fn new(prev: SeqNo) -> Self {
162        assert!(
163            prev <= MAX_SEQNO,
164            "Sequence number must not use the reserved MSB"
165        );
166        Self(Arc::new(AtomicU64::new(prev)))
167    }
168
169    /// Allocates and returns the next sequence number, atomically
170    /// incrementing the internal counter.
171    ///
172    /// The returned value is the counter's value **before** the increment
173    /// (i.e., pre-increment / fetch-then-add semantics).
174    ///
175    /// # Panics
176    ///
177    /// Panics if the current value is already [`MAX_SEQNO`] (i.e., when
178    /// advancing past it would enter the reserved MSB range).
179    #[must_use]
180    pub fn next(&self) -> SeqNo {
181        <Self as SequenceNumberGenerator>::next(self)
182    }
183
184    /// Returns the current internal counter value without incrementing.
185    ///
186    /// This is the value that the next call to [`next()`](Self::next) will
187    /// return (and then advance past).
188    #[must_use]
189    pub fn get(&self) -> SeqNo {
190        <Self as SequenceNumberGenerator>::get(self)
191    }
192
193    /// Sets the sequence number to the given value.
194    ///
195    /// # Panics
196    ///
197    /// Panics if `value > MAX_SEQNO` (reserved MSB range).
198    pub fn set(&self, value: SeqNo) {
199        <Self as SequenceNumberGenerator>::set(self, value);
200    }
201
202    /// Atomically updates the sequence number to the maximum of
203    /// the current value and the provided value.
204    pub fn fetch_max(&self, value: SeqNo) {
205        <Self as SequenceNumberGenerator>::fetch_max(self, value);
206    }
207}
208
209impl SequenceNumberGenerator for SequenceNumberCounter {
210    fn next(&self) -> SeqNo {
211        // We use fetch_update (CAS loop) instead of fetch_add to ensure the
212        // internal counter never enters the reserved MSB range. With fetch_add,
213        // a caught panic (via catch_unwind, which the trait requires via
214        // UnwindSafe) would leave the counter permanently stuck at an invalid
215        // value. fetch_update only stores the new value on success.
216        match self.0.fetch_update(AcqRel, Acquire, |current| {
217            if current >= MAX_SEQNO {
218                None
219            } else {
220                Some(current + 1)
221            }
222        }) {
223            Ok(seqno) => seqno,
224            Err(current) => panic!("Ran out of sequence numbers (current: {current})"),
225        }
226    }
227
228    fn get(&self) -> SeqNo {
229        self.0.load(Acquire)
230    }
231
232    fn set(&self, value: SeqNo) {
233        assert!(
234            value <= MAX_SEQNO,
235            "Sequence number must not use the reserved MSB"
236        );
237        self.0.store(value, Release);
238    }
239
240    fn fetch_max(&self, value: SeqNo) {
241        // Clamp to the maximum allowed sequence number to avoid storing
242        // a value in the reserved MSB range.
243        let clamped = value.min(MAX_SEQNO);
244        self.0.fetch_max(clamped, AcqRel);
245    }
246}
247
248// Orphan rules satisfied: SequenceNumberGenerator is a local trait,
249// so Arc<dyn SequenceNumberGenerator> is covered by a local type parameter.
250impl From<SequenceNumberCounter> for SharedSequenceNumberGenerator {
251    fn from(counter: SequenceNumberCounter) -> Self {
252        Arc::new(counter)
253    }
254}
255
256#[cfg(test)]
257mod tests;