chroma_types/log.rs
1use crate::CollectionUuid;
2
3////////////////////////////////////// constants and variables /////////////////////////////////////
4
5pub fn dirty_log_path_from_hostname(hostname: &str) -> String {
6 format!("dirty-{}", hostname)
7}
8
9//////////////////////////////////////////// DirtyMarker ///////////////////////////////////////////
10
11/// Markers for tracking collection compaction state changes.
12///
13/// DirtyMarker represents state transitions in the compaction lifecycle of collections.
14/// The enum is designed for forwards/backwards compatibility - new variants can be added
15/// and handled independently while maintaining compatibility with older code.
16#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
17// NOTE(rescrv): This is intentionally an enum for easy forwards/backwards compatibility. Add a
18// new variant, handle both variants, cycle logs, stop handling old variant.
19// TODO(rescrv): Dedupe with log-service crate.
20pub enum DirtyMarker {
21 /// Marks a collection as needing compaction due to new records.
22 #[serde(rename = "mark_dirty")]
23 MarkDirty {
24 /// The collection requiring compaction.
25 collection_id: CollectionUuid,
26 /// The position in the write-ahead log where this marker was created.
27 log_position: u64,
28 /// The total number of records in the collection.
29 num_records: u64,
30 /// The number of times this collection has been reinserted into the heap.
31 reinsert_count: u64,
32 /// The epoch time in microseconds when this collection was first marked dirty.
33 initial_insertion_epoch_us: u64,
34 },
35 /// Removes all compaction scheduling for a collection.
36 #[serde(rename = "purge")]
37 Purge {
38 /// The collection to purge from the compaction heap.
39 collection_id: CollectionUuid,
40 },
41 // A Cleared marker is a no-op. It exists so that a log consisting of mark-dirty markers that
42 // map onto purge markers will be cleared and can be erased.
43 /// A no-op marker used for log compaction.
44 ///
45 /// When a log contains mark-dirty markers that have been purged, those entries
46 /// can be replaced with Cleared markers to allow log truncation.
47 #[serde(rename = "clear")]
48 Cleared,
49}
50
51impl DirtyMarker {
52 /// The collection ID for a given dirty marker.
53 pub fn collection_id(&self) -> CollectionUuid {
54 match self {
55 DirtyMarker::MarkDirty { collection_id, .. } => *collection_id,
56 DirtyMarker::Purge { collection_id } => *collection_id,
57 DirtyMarker::Cleared => CollectionUuid::default(),
58 }
59 }
60
61 /// Increment any reinsert counter on the variant.
62 pub fn reinsert(&mut self) {
63 if let DirtyMarker::MarkDirty {
64 collection_id: _,
65 log_position: _,
66 num_records: _,
67 reinsert_count,
68 initial_insertion_epoch_us: _,
69 } = self
70 {
71 *reinsert_count = reinsert_count.saturating_add(1);
72 }
73 }
74}