Skip to main content

oxgraph_db/
state.rs

1//! Canonical record value types and the id-allocator watermark.
2//!
3//! This module owns the durable record VALUE types the base and overlay both
4//! materialize ([`ElementRecord`], [`RelationRecord`], [`IncidenceRecord`],
5//! [`PropertySubject`]) plus the nine-value monotonic id allocator watermark
6//! ([`NextIds`]). The live read surface is the merged overlay-over-base view in
7//! [`crate::overlay`].
8//!
9//! # Performance
10//!
11//! `perf: unspecified`; this module defines value types and `O(1)` watermark
12//! arithmetic.
13
14use std::{collections::BTreeSet, sync::Arc};
15
16use serde::{Deserialize, Serialize};
17
18use crate::{
19    ElementId, IncidenceId, IndexId, LabelId, ProjectionId, PropertyKeyId, RelationId,
20    RelationTypeId, RoleId, backing::DbHeader, catalog::PropertyFamily,
21};
22
23/// Shared, copy-on-write set of labels carried by one record.
24///
25/// Wraps an `Arc<BTreeSet<LabelId>>`. Cloning a record — and therefore the
26/// overlay delta maps that hold records, which a fresh writer seeds by cloning
27/// its parent's — shares the same allocation instead of deep-copying the set.
28/// The first [`Self::insert`] into a SHARED set copies it once
29/// ([`Arc::make_mut`]); a uniquely owned set mutates in place. The serde
30/// representation is the plain label set, unchanged from the owned form.
31///
32/// # Performance
33///
34/// `clone` is `O(1)` (an `Arc` increment). [`Self::insert`] is `O(log labels)`
35/// plus a one-time `O(labels)` copy when the set is still shared
36/// (copy-on-write). [`Self::contains`] is `O(log labels)`; [`Self::len`] and
37/// [`Self::is_empty`] are `O(1)`.
38#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
39#[serde(transparent)]
40pub struct LabelSet(Arc<BTreeSet<LabelId>>);
41
42impl LabelSet {
43    /// Iterates the labels in ascending order.
44    ///
45    /// # Performance
46    ///
47    /// Creating the iterator is `O(1)`; a full walk is `O(labels)`.
48    pub fn iter(&self) -> impl Iterator<Item = LabelId> + '_ {
49        self.0.iter().copied()
50    }
51
52    /// Returns whether `label` is present.
53    ///
54    /// # Performance
55    ///
56    /// This method is `O(log labels)`.
57    #[must_use]
58    pub fn contains(&self, label: &LabelId) -> bool {
59        self.0.contains(label)
60    }
61
62    /// Returns the number of labels.
63    ///
64    /// # Performance
65    ///
66    /// This method is `O(1)`.
67    #[must_use]
68    pub fn len(&self) -> usize {
69        self.0.len()
70    }
71
72    /// Returns whether the set is empty.
73    ///
74    /// # Performance
75    ///
76    /// This method is `O(1)`.
77    #[must_use]
78    pub fn is_empty(&self) -> bool {
79        self.0.is_empty()
80    }
81
82    /// Inserts `label`, returning whether it was newly added.
83    ///
84    /// A no-op insert (the label is already present) never copies, so
85    /// re-asserting a label on a record whose set is still shared with a
86    /// parent overlay stays `O(log labels)`.
87    ///
88    /// # Performance
89    ///
90    /// This method is `O(log labels)`, plus a one-time `O(labels)`
91    /// copy-on-write when the set is shared and the label is new.
92    pub fn insert(&mut self, label: LabelId) -> bool {
93        if self.0.contains(&label) {
94            return false;
95        }
96        Arc::make_mut(&mut self.0).insert(label)
97    }
98}
99
100impl FromIterator<LabelId> for LabelSet {
101    /// Collects labels into a freshly owned (unshared) set.
102    ///
103    /// # Performance
104    ///
105    /// This function is `O(labels × log labels)`.
106    fn from_iter<I: IntoIterator<Item = LabelId>>(iter: I) -> Self {
107        Self(Arc::new(iter.into_iter().collect()))
108    }
109}
110
111/// One visible canonical element.
112///
113/// # Performance
114///
115/// Cloning is `O(1)`: the label set is shared copy-on-write ([`LabelSet`]).
116#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
117pub struct ElementRecord {
118    /// Stable element identifier.
119    pub id: ElementId,
120    /// Labels assigned to this element.
121    pub labels: LabelSet,
122}
123
124/// One visible canonical relation.
125///
126/// # Performance
127///
128/// Cloning is `O(1)`: the label set is shared copy-on-write ([`LabelSet`]).
129#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
130pub struct RelationRecord {
131    /// Stable relation identifier.
132    pub id: RelationId,
133    /// Optional relation type.
134    pub relation_type: Option<RelationTypeId>,
135    /// Labels assigned to this relation.
136    pub labels: LabelSet,
137}
138
139/// One visible incidence in canonical database coordinates.
140///
141/// # Performance
142///
143/// Copying and comparing this record are `O(1)`.
144#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
145pub struct IncidenceRecord {
146    /// Stable incidence id.
147    pub id: IncidenceId,
148    /// Stable relation id containing the incidence.
149    pub relation: RelationId,
150    /// Stable element id participating in the relation.
151    pub element: ElementId,
152    /// Structural role of the incidence.
153    pub role: RoleId,
154}
155
156/// Subject that can own properties.
157///
158/// # Invariant
159///
160/// The variant declaration order (`Element`, `Relation`, `Incidence`) is
161/// load-bearing: the derived `Ord` ranks them in that order, and that ranking
162/// MUST match the wire kind tags `0`/`1`/`2` produced by the wire
163/// `encode_subject` codec. The frozen base writes property records in
164/// `BTreeMap<PropertySubject, …>` order — sorted by `(subject_kind, subject_id,
165/// key)` — and the read path materializes them back into that same keyed order;
166/// reordering these variants would silently desynchronize the two. See the wire
167/// `encode_subject` docs for the full contract and the open-time assertion in
168/// `backing` that guards it.
169///
170/// # Performance
171///
172/// Copying, comparing, ordering, and hashing are `O(1)`.
173#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
174pub enum PropertySubject {
175    /// Element property subject.
176    Element(ElementId),
177    /// Relation property subject.
178    Relation(RelationId),
179    /// Incidence property subject.
180    Incidence(IncidenceId),
181}
182
183impl PropertySubject {
184    /// Returns the property family for this subject.
185    ///
186    /// # Performance
187    ///
188    /// This function is `O(1)`.
189    #[must_use]
190    pub const fn family(self) -> PropertyFamily {
191        match self {
192            Self::Element(_id) => PropertyFamily::Element,
193            Self::Relation(_id) => PropertyFamily::Relation,
194            Self::Incidence(_id) => PropertyFamily::Incidence,
195        }
196    }
197}
198
199impl From<ElementId> for PropertySubject {
200    /// Wraps an element id as an element property subject.
201    ///
202    /// # Performance
203    ///
204    /// This function is `O(1)`.
205    fn from(id: ElementId) -> Self {
206        Self::Element(id)
207    }
208}
209
210impl From<RelationId> for PropertySubject {
211    /// Wraps a relation id as a relation property subject.
212    ///
213    /// # Performance
214    ///
215    /// This function is `O(1)`.
216    fn from(id: RelationId) -> Self {
217        Self::Relation(id)
218    }
219}
220
221impl From<IncidenceId> for PropertySubject {
222    /// Wraps an incidence id as an incidence property subject.
223    ///
224    /// # Performance
225    ///
226    /// This function is `O(1)`.
227    fn from(id: IncidenceId) -> Self {
228        Self::Incidence(id)
229    }
230}
231
232/// The nine monotonic id allocators, captured for the store header and every
233/// dirty commit's watermark op in a fixed order. Recovery folds the elementwise
234/// maximum of the base header and every replayed frame's watermark so a
235/// canonical id is never reissued across a crash.
236///
237/// # Performance
238///
239/// Copying is `O(1)`.
240#[derive(Clone, Copy, Debug, Eq, PartialEq)]
241pub(crate) struct NextIds {
242    /// Next element id candidate.
243    pub(crate) element: ElementId,
244    /// Next relation id candidate.
245    pub(crate) relation: RelationId,
246    /// Next incidence id candidate.
247    pub(crate) incidence: IncidenceId,
248    /// Next role id candidate.
249    pub(crate) role: RoleId,
250    /// Next label id candidate.
251    pub(crate) label: LabelId,
252    /// Next relation-type id candidate.
253    pub(crate) relation_type: RelationTypeId,
254    /// Next property-key id candidate.
255    pub(crate) property_key: PropertyKeyId,
256    /// Next projection id candidate.
257    pub(crate) projection: ProjectionId,
258    /// Next index id candidate.
259    pub(crate) index: IndexId,
260}
261
262impl NextIds {
263    /// The watermark of an empty store: every allocator starts at `1`, so the
264    /// `0` sentinel ([`crate::wire::RELATION_TYPE_NONE`]) can never collide with
265    /// a real id.
266    ///
267    /// # Performance
268    ///
269    /// `perf: unspecified`; this is a compile-time constant.
270    pub(crate) const INITIAL: Self = Self {
271        element: ElementId::new(1),
272        relation: RelationId::new(1),
273        incidence: IncidenceId::new(1),
274        role: RoleId::new(1),
275        label: LabelId::new(1),
276        relation_type: RelationTypeId::new(1),
277        property_key: PropertyKeyId::new(1),
278        projection: ProjectionId::new(1),
279        index: IndexId::new(1),
280    };
281
282    /// Reads the watermark out of a base header's nine `next_*` allocator
283    /// snapshots.
284    ///
285    /// # Performance
286    ///
287    /// This function is `O(1)`.
288    pub(crate) const fn from_header(header: &DbHeader) -> Self {
289        Self {
290            element: ElementId::new(header.next_element),
291            relation: RelationId::new(header.next_relation),
292            incidence: IncidenceId::new(header.next_incidence),
293            role: RoleId::new(header.next_role),
294            label: LabelId::new(header.next_label),
295            relation_type: RelationTypeId::new(header.next_relation_type),
296            property_key: PropertyKeyId::new(header.next_property_key),
297            projection: ProjectionId::new(header.next_projection),
298            index: IndexId::new(header.next_index),
299        }
300    }
301
302    /// Returns the elementwise maximum of two watermarks, allocator by
303    /// allocator. Recovery folds this over the base header and every replayed
304    /// frame so the recovered watermark sits at or above every id ever issued —
305    /// canonical ids are never reused.
306    ///
307    /// # Performance
308    ///
309    /// This function is `O(1)`.
310    pub(crate) fn elementwise_max(self, other: Self) -> Self {
311        Self {
312            element: self.element.max(other.element),
313            relation: self.relation.max(other.relation),
314            incidence: self.incidence.max(other.incidence),
315            role: self.role.max(other.role),
316            label: self.label.max(other.label),
317            relation_type: self.relation_type.max(other.relation_type),
318            property_key: self.property_key.max(other.property_key),
319            projection: self.projection.max(other.projection),
320            index: self.index.max(other.index),
321        }
322    }
323}