Skip to main content

common/storage/
config.rs

1//! Storage configuration types.
2//!
3//! This module provides configuration structures for different storage backends,
4//! allowing services to configure storage type (InMemory or SlateDB) via config files
5//! or environment variables.
6
7use serde::{Deserialize, Serialize};
8
9/// Top-level storage configuration.
10///
11/// Defaults to `SlateDb` with a local `/tmp/opendata-storage` directory.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13#[serde(tag = "type")]
14pub enum StorageConfig {
15    InMemory,
16    SlateDb(SlateDbStorageConfig),
17}
18
19impl Default for StorageConfig {
20    fn default() -> Self {
21        StorageConfig::SlateDb(SlateDbStorageConfig {
22            path: "data".to_string(),
23            object_store: ObjectStoreConfig::Local(LocalObjectStoreConfig {
24                path: ".data".to_string(),
25            }),
26            settings_path: None,
27            block_cache: None,
28        })
29    }
30}
31
32/// SlateDB-specific configuration.
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
34pub struct SlateDbStorageConfig {
35    /// Path prefix for SlateDB data in the object store.
36    pub path: String,
37
38    /// Object store provider configuration.
39    pub object_store: ObjectStoreConfig,
40
41    /// Optional path to SlateDB settings file (TOML/YAML/JSON).
42    ///
43    /// If not provided, uses SlateDB's `Settings::load()` which checks for
44    /// `SlateDb.toml`, `SlateDb.json`, `SlateDb.yaml` in the working directory
45    /// and merges any `SLATEDB_` prefixed environment variables.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub settings_path: Option<String>,
48
49    /// Optional block cache for SST block lookups.
50    ///
51    /// When configured, reduces object store reads by caching hot blocks
52    /// in memory and/or on local disk.
53    #[serde(default, skip_serializing_if = "Option::is_none")]
54    pub block_cache: Option<BlockCacheConfig>,
55}
56
57/// Block cache configuration for SlateDB.
58#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
59#[serde(tag = "type")]
60pub enum BlockCacheConfig {
61    /// Two-tier cache using foyer: in-memory + on-disk (ideally NVMe).
62    FoyerHybrid(FoyerHybridCacheConfig),
63}
64
65/// Configuration for foyer's hybrid (memory + disk) block cache.
66#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
67pub struct FoyerHybridCacheConfig {
68    /// In-memory cache capacity in bytes.
69    pub memory_capacity: u64,
70    /// On-disk cache capacity in bytes.
71    pub disk_capacity: u64,
72    /// Path for the on-disk cache directory.
73    pub disk_path: String,
74}
75
76impl Default for SlateDbStorageConfig {
77    fn default() -> Self {
78        Self {
79            path: "data".to_string(),
80            object_store: ObjectStoreConfig::default(),
81            settings_path: None,
82            block_cache: None,
83        }
84    }
85}
86
87impl StorageConfig {
88    /// Returns a new config with the path modified by appending a suffix.
89    ///
90    /// For SlateDB storage, appends the suffix to the path (e.g., "data" -> "data/0").
91    /// For InMemory storage, returns a clone unchanged.
92    pub fn with_path_suffix(&self, suffix: &str) -> Self {
93        match self {
94            StorageConfig::InMemory => StorageConfig::InMemory,
95            StorageConfig::SlateDb(config) => StorageConfig::SlateDb(SlateDbStorageConfig {
96                path: format!("{}/{}", config.path, suffix),
97                object_store: config.object_store.clone(),
98                settings_path: config.settings_path.clone(),
99                block_cache: config.block_cache.clone(),
100            }),
101        }
102    }
103}
104
105/// Object store provider configuration for SlateDB.
106#[derive(Default, Debug, Clone, Serialize, Deserialize, PartialEq)]
107#[serde(tag = "type")]
108pub enum ObjectStoreConfig {
109    /// In-memory object store (useful for testing and development).
110    #[default]
111    InMemory,
112
113    /// AWS S3 object store.
114    Aws(AwsObjectStoreConfig),
115
116    /// Local filesystem object store.
117    Local(LocalObjectStoreConfig),
118}
119
120/// AWS S3 object store configuration.
121#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
122pub struct AwsObjectStoreConfig {
123    /// AWS region (e.g., "us-west-2").
124    pub region: String,
125
126    /// S3 bucket name.
127    pub bucket: String,
128}
129
130/// Local filesystem object store configuration.
131#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
132pub struct LocalObjectStoreConfig {
133    /// Path to the local directory for storage.
134    pub path: String,
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn should_default_to_slatedb_with_local_data_dir() {
143        // given/when
144        let config = StorageConfig::default();
145
146        // then
147        match config {
148            StorageConfig::SlateDb(slate_config) => {
149                assert_eq!(slate_config.path, "data");
150                assert_eq!(
151                    slate_config.object_store,
152                    ObjectStoreConfig::Local(LocalObjectStoreConfig {
153                        path: ".data".to_string()
154                    })
155                );
156            }
157            _ => panic!("Expected SlateDb config as default"),
158        }
159    }
160
161    #[test]
162    fn should_deserialize_in_memory_config() {
163        // given
164        let yaml = r#"type: InMemory"#;
165
166        // when
167        let config: StorageConfig = serde_yaml::from_str(yaml).unwrap();
168
169        // then
170        assert_eq!(config, StorageConfig::InMemory);
171    }
172
173    #[test]
174    fn should_deserialize_slatedb_config_with_local_object_store() {
175        // given
176        let yaml = r#"
177type: SlateDb
178path: my-data
179object_store:
180  type: Local
181  path: /tmp/slatedb
182"#;
183
184        // when
185        let config: StorageConfig = serde_yaml::from_str(yaml).unwrap();
186
187        // then
188        match config {
189            StorageConfig::SlateDb(slate_config) => {
190                assert_eq!(slate_config.path, "my-data");
191                assert_eq!(
192                    slate_config.object_store,
193                    ObjectStoreConfig::Local(LocalObjectStoreConfig {
194                        path: "/tmp/slatedb".to_string()
195                    })
196                );
197                assert!(slate_config.settings_path.is_none());
198            }
199            _ => panic!("Expected SlateDb config"),
200        }
201    }
202
203    #[test]
204    fn should_deserialize_slatedb_config_with_aws_object_store() {
205        // given
206        let yaml = r#"
207type: SlateDb
208path: my-data
209object_store:
210  type: Aws
211  region: us-west-2
212  bucket: my-bucket
213settings_path: slatedb.toml
214"#;
215
216        // when
217        let config: StorageConfig = serde_yaml::from_str(yaml).unwrap();
218
219        // then
220        match config {
221            StorageConfig::SlateDb(slate_config) => {
222                assert_eq!(slate_config.path, "my-data");
223                assert_eq!(
224                    slate_config.object_store,
225                    ObjectStoreConfig::Aws(AwsObjectStoreConfig {
226                        region: "us-west-2".to_string(),
227                        bucket: "my-bucket".to_string()
228                    })
229                );
230                assert_eq!(slate_config.settings_path, Some("slatedb.toml".to_string()));
231            }
232            _ => panic!("Expected SlateDb config"),
233        }
234    }
235
236    #[test]
237    fn should_deserialize_slatedb_config_with_in_memory_object_store() {
238        // given
239        let yaml = r#"
240type: SlateDb
241path: test-data
242object_store:
243  type: InMemory
244"#;
245
246        // when
247        let config: StorageConfig = serde_yaml::from_str(yaml).unwrap();
248
249        // then
250        match config {
251            StorageConfig::SlateDb(slate_config) => {
252                assert_eq!(slate_config.path, "test-data");
253                assert_eq!(slate_config.object_store, ObjectStoreConfig::InMemory);
254            }
255            _ => panic!("Expected SlateDb config"),
256        }
257    }
258
259    #[test]
260    fn should_serialize_slatedb_config() {
261        // given
262        let config = StorageConfig::SlateDb(SlateDbStorageConfig {
263            path: "my-data".to_string(),
264            object_store: ObjectStoreConfig::Local(LocalObjectStoreConfig {
265                path: "/tmp/slatedb".to_string(),
266            }),
267            settings_path: None,
268            block_cache: None,
269        });
270
271        // when
272        let yaml = serde_yaml::to_string(&config).unwrap();
273
274        // then
275        assert!(yaml.contains("type: SlateDb"));
276        assert!(yaml.contains("path: my-data"));
277        assert!(yaml.contains("type: Local"));
278        // settings_path and block_cache should be omitted when None
279        assert!(!yaml.contains("settings_path"));
280        assert!(!yaml.contains("block_cache"));
281    }
282
283    #[test]
284    fn should_deserialize_block_cache_config() {
285        let yaml = r#"
286type: SlateDb
287path: data
288object_store:
289  type: InMemory
290block_cache:
291  type: FoyerHybrid
292  memory_capacity: 8589934592
293  disk_capacity: 150323855360
294  disk_path: /mnt/nvme/block-cache
295"#;
296        let config: StorageConfig = serde_yaml::from_str(yaml).unwrap();
297        match config {
298            StorageConfig::SlateDb(slate_config) => {
299                let cache = slate_config.block_cache.expect("block_cache should be set");
300                match cache {
301                    BlockCacheConfig::FoyerHybrid(foyer) => {
302                        assert_eq!(foyer.memory_capacity, 8589934592);
303                        assert_eq!(foyer.disk_capacity, 150323855360);
304                        assert_eq!(foyer.disk_path, "/mnt/nvme/block-cache");
305                    }
306                }
307            }
308            _ => panic!("Expected SlateDb config"),
309        }
310    }
311
312    #[test]
313    fn should_default_block_cache_to_none() {
314        let yaml = r#"
315type: SlateDb
316path: data
317object_store:
318  type: InMemory
319"#;
320        let config: StorageConfig = serde_yaml::from_str(yaml).unwrap();
321        match config {
322            StorageConfig::SlateDb(slate_config) => {
323                assert!(slate_config.block_cache.is_none());
324            }
325            _ => panic!("Expected SlateDb config"),
326        }
327    }
328}