crabka_remote_storage/lib.rs
1//! KIP-405 tiered-storage SPI and reference implementations for Crabka.
2//!
3//! This crate is the foundation layer for Crabka's tiered storage: it
4//! defines the two plugin SPIs and the data model exchanged across them,
5//! and ships the two reference implementations that the rest of the
6//! tiered-storage stack is built and tested against. It mirrors the shapes
7//! of Apache Kafka's `storage-api` module
8//! (`org.apache.kafka.server.log.remote.storage`).
9//!
10//! ## What this crate provides
11//!
12//! - [`RemoteStorageManager`] — copy / fetch / delete of segment data and
13//! indexes to and from the remote tier.
14//! - [`RemoteLogMetadataManager`] — persistence + querying of
15//! remote-segment metadata, with a strict lifecycle state machine.
16//! - The data model: [`TopicIdPartition`], [`RemoteLogSegmentId`],
17//! [`RemoteLogSegmentMetadata`] / [`RemoteLogSegmentMetadataUpdate`],
18//! [`RemoteLogSegmentState`], [`LogSegmentData`], [`IndexType`],
19//! [`CustomMetadata`], and the partition-delete lifecycle
20//! ([`RemotePartitionDeleteMetadata`] / [`RemotePartitionDeleteState`]).
21//! - [`LocalTieredStorage`] — a filesystem [`RemoteStorageManager`].
22//! - [`InmemoryRemoteLogMetadataManager`] — a process-memory
23//! [`RemoteLogMetadataManager`].
24//!
25//! ## Boundary with the broker
26//!
27//! This crate is the SPI and reference-implementation layer. Broker-specific
28//! behavior such as segment-copy scheduling, `Fetch` remote reads,
29//! local-vs-remote retention policy, and topic config parsing lives in the
30//! broker.
31//!
32//! The SPIs are intentionally **synchronous** — they mirror Kafka's
33//! blocking `RemoteStorageManager` / `RemoteLogMetadataManager`, which the
34//! broker drives from a thread pool (the broker wraps calls in
35//! `spawn_blocking`). Keeping them sync keeps this crate free of the async
36//! runtime.
37//!
38//! ## Filesystem-backed remote tier
39//!
40//! ```no_run
41//! use bytes::Bytes;
42//! use crabka_remote_storage::{
43//! IndexType, LocalTieredStorage, LogSegmentData, RemoteLogSegmentId,
44//! RemoteLogSegmentMetadata, RemoteLogSegmentState, RemoteStorageManager, TopicIdPartition,
45//! };
46//! use std::collections::BTreeMap;
47//! use std::path::PathBuf;
48//! use uuid::Uuid;
49//!
50//! # fn run() -> Result<(), Box<dyn std::error::Error>> {
51//! let storage = LocalTieredStorage::new(PathBuf::from("/var/lib/crabka-remote"));
52//! let topic_partition = TopicIdPartition::new(Uuid::new_v4(), "orders", 0);
53//! let segment_id = RemoteLogSegmentId::new(topic_partition, Uuid::new_v4());
54//! let mut leader_epochs = BTreeMap::new();
55//! leader_epochs.insert(0, 0);
56//! let metadata = RemoteLogSegmentMetadata::new(
57//! segment_id,
58//! 0,
59//! 999,
60//! 1_713_000_000_000,
61//! 1,
62//! 1_713_000_000_000,
63//! 1_048_576,
64//! RemoteLogSegmentState::CopySegmentStarted,
65//! leader_epochs,
66//! )?;
67//!
68//! // The broker fills these paths from a closed local log segment.
69//! let segment = LogSegmentData {
70//! log_segment: PathBuf::from("/var/lib/crabka/orders-0/00000000000000000000.log"),
71//! offset_index: PathBuf::from("/var/lib/crabka/orders-0/00000000000000000000.index"),
72//! time_index: PathBuf::from("/var/lib/crabka/orders-0/00000000000000000000.timeindex"),
73//! transaction_index: None,
74//! producer_snapshot_index: None,
75//! leader_epoch_index: Bytes::new(),
76//! };
77//! let _custom_metadata = storage.copy_log_segment_data(&metadata, &segment)?;
78//! let bytes = storage.fetch_index(&metadata, IndexType::Offset)?;
79//! # let _ = bytes;
80//! # Ok(())
81//! # }
82//! ```
83
84#![doc(html_root_url = "https://docs.rs/crabka-remote-storage/0.3.5")]
85
86mod cache;
87pub mod dump;
88mod error;
89mod inmemory;
90mod local;
91mod metadata;
92mod metadata_manager;
93mod s3;
94mod storage_manager;
95
96pub use dump::{PartitionDump, RlmmCacheDump};
97pub use error::RemoteStorageError;
98pub use inmemory::InmemoryRemoteLogMetadataManager;
99pub use local::LocalTieredStorage;
100pub use metadata::{
101 CustomMetadata, RemoteLogSegmentId, RemoteLogSegmentMetadata, RemoteLogSegmentMetadataUpdate,
102 RemoteLogSegmentState, RemotePartitionDeleteMetadata, RemotePartitionDeleteState,
103 TopicIdPartition,
104};
105pub use metadata_manager::RemoteLogMetadataManager;
106pub use s3::{
107 DEFAULT_MULTIPART_CHUNK_SIZE, DEFAULT_MULTIPART_THRESHOLD, S3Config, S3RemoteStorage,
108};
109pub use storage_manager::{IndexType, LogSegmentData, RemoteStorageManager};