1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// ─── engine/open.rs ───────────────────────────────────────────────────────────
// Native (non-WASM) constructor for the Db struct.
// Opens or creates a database at the given file path.
// ─────────────────────────────────────────────────────────────────────────────
#[cfg(not(target_arch = "wasm32"))]
use std::sync::Arc;
#[cfg(not(target_arch = "wasm32"))]
use dashmap::{DashMap, DashSet};
#[cfg(not(target_arch = "wasm32"))]
use tokio::sync::broadcast;
use crate::engine::Db;
#[cfg(not(target_arch = "wasm32"))]
use crate::engine::DbError;
#[cfg(not(target_arch = "wasm32"))]
use crate::engine::config::DbConfig;
#[cfg(not(target_arch = "wasm32"))]
use crate::engine::storage;
impl Db {
/// Open (or create) a database at the given file path.
/// Only available on native (non-WASM) builds.
///
/// `sync_mode` — if true, use SyncDiskStorage (flush on every write).
/// if false, use AsyncDiskStorage (flush every 50ms).
/// Ignored when `tiered_mode` is true.
/// `tiered_mode` — if true, use TieredStorage (hot + cold two-tier backend).
/// Hot writes go to the active log; cold data is archived and
/// read via mmap on startup. Best for large datasets (100k+ docs).
/// Enable with STORAGE_MODE=tiered environment variable.
/// `encryption_key` — if Some, wrap the storage in EncryptedStorage.
/// if None, data is stored in plaintext (not recommended).
#[cfg(not(target_arch = "wasm32"))]
pub fn open(config: DbConfig) -> Result<Self, DbError> {
let path = &config.path;
let sync_mode = config.sync_mode;
let tiered_mode = config.tiered_mode;
let hot_threshold = config.hot_threshold;
let rate_limit_requests = config.rate_limit_requests.unwrap_or(1000);
let rate_limit_window = config.rate_limit_window.unwrap_or(60);
let max_body_size = config.max_body_size;
let max_keys_per_request = config.max_keys_per_request;
let encryption_key = config.encryption_key;
let post_backup_script = config.post_backup_script;
let in_memory = config.in_memory;
// Create the shared in-memory state containers.
let state = Arc::new(DashMap::new());
// Create the broadcast channel with a buffer of 100 messages.
// If the buffer fills up (no subscribers reading), old messages are dropped.
let (tx, _rx) = broadcast::channel(1000);
let indexes: Arc<DashMap<String, DashMap<String, DashSet<String>>>> =
Arc::new(Default::default());
let query_heatmap = Arc::new(Default::default());
#[cfg(feature = "schema")]
let schemas = Arc::new(DashMap::new());
// Ensure the parent directory exists (skipped in in-memory mode — no file is created).
if !in_memory {
if let Some(parent) = std::path::Path::new(path).parent() {
std::fs::create_dir_all(parent)?;
}
}
// Choose the base storage backend based on the configured mode.
//
// in_memory = true → InMemoryStorage: all data lives in the DashMap only.
// No disk I/O at all. Data is lost on exit.
// Ideal for ephemeral caches and CI environments.
//
// tiered_mode = true → TieredStorage: hot log (async writes) + cold log
// (mmap reads). Best for large datasets. The cold log
// accumulates promoted hot data and is paged by the OS.
//
// sync_mode = true → SyncDiskStorage: every write is flushed to disk
// immediately. Zero data loss, lower throughput.
//
// default → AsyncDiskStorage: writes buffered in memory, flushed
// every 50ms. Highest throughput, up to 50ms data loss.
let base_storage: Arc<dyn crate::engine::storage::StorageBackend> = if in_memory {
Arc::new(storage::InMemoryStorage)
} else if tiered_mode {
Arc::new(storage::TieredStorage::new(path)?)
} else if sync_mode {
Arc::new(storage::SyncDiskStorage::new(path)?)
} else {
Arc::new(storage::AsyncDiskStorage::new(path)?)
};
// Optionally wrap the base storage in EncryptedStorage.
// Encryption is skipped in in-memory mode — there is nothing to encrypt on disk.
// EncryptedStorage is transparent — it encrypts on write and decrypts
// on read, so the rest of the engine doesn't know encryption is happening.
let storage: Arc<dyn crate::engine::storage::StorageBackend> = if !in_memory {
if let Some(key) = encryption_key {
Arc::new(storage::EncryptedStorage::new(base_storage, &key))
} else {
base_storage
}
} else {
base_storage
};
// Replay the log (or snapshot + delta) into the in-memory state.
// After this call, `state` and `indexes` reflect the persisted data.
storage::stream_into_state(
&*storage,
&state,
&indexes,
#[cfg(feature = "schema")] &schemas,
)?;
Ok(Self {
state,
storage,
tx,
indexes,
query_heatmap,
hot_threshold,
rate_limit_requests,
rate_limit_window,
max_body_size,
max_keys_per_request,
#[cfg(feature = "schema")]
schemas,
post_backup_script,
tiered_mode,
#[cfg(not(target_arch = "wasm32"))]
started_at: std::time::Instant::now(),
})
}
}