1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
use clap::{ArgGroup, Parser, Subcommand, ValueEnum};
use std::collections::HashMap;
use std::time::Duration;
use uuid::Uuid;
#[derive(Parser)]
#[command(name = "slatedb")]
#[command(version = "0.1.0")]
#[command(about, long_about = None)]
pub(crate) struct CliArgs {
#[arg(
short,
long,
help = "A .env file to use to supply environment variables"
)]
pub(crate) env_file: Option<String>,
#[arg(
short,
long,
help = "The path in the object store to the root directory, starting from within the object store bucket (specified when configuring the object store provider)"
)]
pub(crate) path: String,
#[command(subcommand)]
pub(crate) command: CliCommands,
}
#[derive(Subcommand, Debug)]
pub(crate) enum CliCommands {
/// Reads the latest manifest file and outputs a readable
/// String representation
ReadManifest {
/// Specify a specific manifest ULID to read, if this is
/// not specified the latest manifest will be returned
#[arg(short, long)]
id: Option<u64>,
},
/// Lists all available manifests
ListManifests {
/// Optionally specify a start id for the range of manifests to lookup
#[arg(short, long)]
start: Option<u64>,
/// Optionally specify an end id for the range of manifests to lookup
#[arg(short, long)]
end: Option<u64>,
},
/// Create a new checkpoint pointing to the database's current state.
CreateCheckpoint {
/// Optionally specify a lifetime for the created checkpoint. You can specify the lifetime
/// in a human-friendly format that uses years/days/min/s, e.g. "7days 30min 10s". The
/// checkpoint's expiry time will be set to the current wallclock time plus the specified
/// lifetime. If the lifetime is not specified, then the checkpoint is set with no expiry
/// and must be explicitly removed.
#[arg(short, long)]
#[clap(value_parser = humantime::parse_duration)]
lifetime: Option<Duration>,
/// Optionally specify the id (e.g. 01740ee5-6459-44af-9a45-85deb6e468e3) of an existing
/// checkpoint to use as the base for the newly created checkpoint. If not provided then
/// the checkpoint will be taken against the latest manifest.
#[arg(short, long)]
#[clap(value_parser = uuid::Uuid::parse_str)]
source: Option<Uuid>,
},
/// Refresh an existing checkpoint's expiry time. This command will look for an existing
/// checkpoint and update its expiry time using the specified lifetime.
RefreshCheckpoint {
/// The id of the checkpoint (e.g. 01740ee5-6459-44af-9a45-85deb6e468e3) to refresh.
#[arg(short, long)]
#[clap(value_parser = uuid::Uuid::parse_str)]
id: Uuid,
/// Optionally specify a new lifetime for the checkpoint. You can specify the lifetime in a
/// human-friendly format that uses years/days/min/s, e.g. "7days 30min 10s". The
/// checkpoint's expiry time will be set to the current wallclock time plus the specified
/// lifetime. If the lifetime is not specified, then the checkpoint is updated with no
/// expiry and must be explicitly removed.
#[arg(short, long)]
#[clap(value_parser = humantime::parse_duration)]
lifetime: Option<Duration>,
},
/// Delete an existing checkpoint.
DeleteCheckpoint {
/// The id of the checkpoint (e.g. 01740ee5-6459-44af-9a45-85deb6e468e3) to delete.
#[arg(short, long)]
#[clap(value_parser = uuid::Uuid::parse_str)]
id: Uuid,
},
/// List the current checkpoints of the db.
ListCheckpoints {},
/// Runs a garbage collection for a specific resource type once
RunGarbageCollection {
/// the type of resource to clean up (manifest, wal, compacted)
#[arg(short, long)]
resource: GcResource,
/// the minimum age of the resource before considering it for GC
#[arg(short, long)]
#[clap(value_parser = humantime::parse_duration)]
min_age: Duration,
},
/// Schedules a period garbage collection job
#[command(group(
ArgGroup::new("gc_config")
.args(["manifest", "wal", "compacted"])
.multiple(true)
.required(true)
))]
ScheduleGarbageCollection {
/// Configuration for manifest garbage collection should be set in the
/// format min_age=<duration>,period=<duration> -- the min_age is the
/// minimum manifest age that should be considered for collection and
/// the period is how often to attempt a GC
#[arg(long, value_parser = parse_gc_schedule)]
manifest: Option<GcSchedule>,
/// Configuration for WAL garbage collection should be set in the
/// format min_age=<duration>,period=<duration> -- the min_age is the
/// minimum WAL age that should be considered for collection and
/// the period is how often to attempt a GC
#[arg(long, value_parser = parse_gc_schedule)]
wal: Option<GcSchedule>,
/// Configuration for compacted SST garbage collection should be set in the
/// format min_age=<duration>,period=<duration> -- the min_age is the
/// minimum SST age that should be considered for collection and
/// the period is how often to attempt a GC
#[arg(long, value_parser = parse_gc_schedule)]
compacted: Option<GcSchedule>,
},
}
#[derive(Debug, Clone, ValueEnum)]
pub(crate) enum GcResource {
Manifest,
Wal,
Compacted,
}
fn parse_gc_schedule(s: &str) -> Result<GcSchedule, String> {
let parts: HashMap<String, String> = s
.split(',')
.filter_map(|kv| {
let mut parts = kv.splitn(2, '=');
match (parts.next(), parts.next()) {
(Some(key), Some(value)) => Some((key.to_string(), value.to_string())),
_ => None,
}
})
.collect();
let min_age = parts
.get("min_age")
.ok_or_else(|| "Missing or invalid 'min_age'".to_string())
.and_then(|v| {
humantime::parse_duration(v).map_err(|e| {
"Could not parse min_age as duration: "
.to_string()
.to_owned()
+ e.to_string().as_str()
})
})?;
let period = parts
.get("period")
.ok_or_else(|| "Missing or invalid 'period'".to_string())
.and_then(|v| humantime::parse_duration(v).map_err(|e| e.to_string()))?;
Ok(GcSchedule { min_age, period })
}
#[derive(Debug, Clone)]
pub(crate) struct GcSchedule {
/// Minimum age of resources to collect
pub(crate) min_age: Duration,
/// How often to run the garbage collection
pub(crate) period: Duration,
}
pub(crate) fn parse_args() -> CliArgs {
CliArgs::parse()
}
#[cfg(test)]
mod tests {
use crate::args::parse_gc_schedule;
use rstest::rstest;
use std::time::Duration;
#[rstest]
#[case(
"min_age=10m,period=1m",
Some(Duration::from_secs(600)),
Some(Duration::from_secs(60)),
None
)]
#[case(
"min_age=10m,period=1m,ignored=5m",
Some(Duration::from_secs(600)),
Some(Duration::from_secs(60)),
None
)]
#[case("period=1m", None, None, Some("Missing or invalid 'min_age'"))]
#[case("min_age=10m", None, None, Some("Missing or invalid 'period'"))]
#[case(
"min_age=invalid,period=1m",
None,
None,
Some("Could not parse min_age as duration")
)]
#[case(
"min_age=,period=1m",
None,
None,
Some("Could not parse min_age as duration: value was empty")
)]
fn parse_gc_schedule_tests(
#[case] input: &str,
#[case] expected_min_age: Option<Duration>,
#[case] expected_period: Option<Duration>,
#[case] expected_error: Option<&str>,
) {
let result = parse_gc_schedule(input);
match (result, expected_min_age, expected_period, expected_error) {
// Valid case: min_age and period are parsed correctly
(Ok(schedule), Some(min_age), Some(period), None) => {
assert_eq!(schedule.min_age, min_age);
assert_eq!(schedule.period, period);
}
// Error case: check if the error message matches
(Err(err), None, None, Some(expected_msg)) => {
assert!(
err.contains(expected_msg),
"Expected error to contain '{}', got '{}'",
expected_msg,
err
);
}
// Any unexpected combination fails the test
result => panic!("Unexpected test case result. {:?}", result),
}
}
}