s4-server 0.8.0

S4 — Squished S3 — GPU-accelerated transparent compression S3-compatible storage gateway (cargo install s4-server installs the `s4` binary).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
//! Object Lock (WORM) enforcement layer (v0.5 #30).
//!
//! AWS S3 Object Lock holds objects in a "Write Once Read Many" state by
//! attaching a *retention configuration* (mode + retain-until date) and/or a
//! *legal hold* flag to each version. While locked, DELETE / overwrite must
//! be refused with HTTP 403 `AccessDenied`. Two retention modes exist:
//!
//! * **Governance** — a privileged caller can override the lock by sending
//!   `x-amz-bypass-governance-retention: true` (paired in real AWS with the
//!   `s3:BypassGovernanceRetention` IAM permission; in S4 we honour the
//!   header alone because policy gating is the operator's responsibility).
//! * **Compliance** — never overridable until the retain-until date has
//!   passed. Even root/admin cannot delete, including via the bypass header.
//!
//! Legal hold is independent of either mode: while `legal_hold_on == true`
//! the object is locked, regardless of retain-until / mode. Setting it back
//! to `false` is permitted at any time.
//!
//! ## scope (v0.5 #30)
//!
//! - in-memory only (single-instance scope) with optional JSON snapshot for
//!   restart-recoverable state — same shape as `versioning.rs`'s
//!   `--versioning-state-file`.
//! - per-object lock state is keyed by `(bucket, key)` — version-id granular
//!   locking is deferred (current behaviour: a lock on a key blocks DELETE
//!   regardless of version-id; v0.6+ may attach state per (bucket, key,
//!   version-id) to mirror AWS exactly).
//! - per-bucket default config, when set, auto-applies to **new** objects on
//!   PUT (existing key with state already present is left alone).

use std::collections::HashMap;
use std::sync::RwLock;

use chrono::{DateTime, Duration, Utc};
use serde::{Deserialize, Serialize};

/// Retention mode for an object lock. Mirrors AWS S3 (`GOVERNANCE` /
/// `COMPLIANCE`).
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum LockMode {
    /// Override-able with `x-amz-bypass-governance-retention: true`.
    Governance,
    /// Never overridable until `retain_until` expires (immutable: once set,
    /// the mode cannot be downgraded to Governance and `retain_until` cannot
    /// be shortened).
    Compliance,
}

impl LockMode {
    /// Wire format used by the S3 API (`"GOVERNANCE"` / `"COMPLIANCE"`).
    #[must_use]
    pub fn as_aws_str(self) -> &'static str {
        match self {
            Self::Governance => "GOVERNANCE",
            Self::Compliance => "COMPLIANCE",
        }
    }

    /// Parse the AWS wire string back into a [`LockMode`]. Case-insensitive
    /// (AWS accepts both `GOVERNANCE` / `governance`).
    #[must_use]
    pub fn from_aws_str(s: &str) -> Option<Self> {
        if s.eq_ignore_ascii_case("GOVERNANCE") {
            Some(Self::Governance)
        } else if s.eq_ignore_ascii_case("COMPLIANCE") {
            Some(Self::Compliance)
        } else {
            None
        }
    }
}

/// Per-object lock state. All fields are optional so a "legal hold only"
/// state (`mode = None`, `retain_until = None`, `legal_hold_on = true`) is
/// representable, matching S3 semantics where a legal hold can exist without
/// any retention.
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ObjectLockState {
    pub mode: Option<LockMode>,
    pub retain_until: Option<DateTime<Utc>>,
    pub legal_hold_on: bool,
}

impl ObjectLockState {
    /// `true` when the object is presently locked from delete / overwrite.
    /// Legal hold flips this regardless of the retention clock; otherwise
    /// `mode + retain_until` is what gates.
    #[must_use]
    pub fn is_locked(&self, now: DateTime<Utc>) -> bool {
        if self.legal_hold_on {
            return true;
        }
        match (self.mode, self.retain_until) {
            (Some(_), Some(until)) => until > now,
            _ => false,
        }
    }

    /// `true` when the caller is permitted to DELETE / overwrite the object.
    ///
    /// - Legal hold ON → always denied (cannot be bypassed).
    /// - Compliance + future retain → always denied (cannot be bypassed).
    /// - Governance + future retain + `bypass_governance == true` → allowed.
    /// - Governance + future retain + `bypass_governance == false` → denied.
    /// - No mode, no retain, no legal hold → allowed.
    /// - retain_until in the past → allowed (lock expired).
    #[must_use]
    pub fn can_delete(&self, now: DateTime<Utc>, bypass_governance: bool) -> bool {
        if self.legal_hold_on {
            return false;
        }
        match (self.mode, self.retain_until) {
            (Some(LockMode::Compliance), Some(until)) if until > now => false,
            (Some(LockMode::Governance), Some(until)) if until > now => bypass_governance,
            _ => true,
        }
    }
}

/// Per-bucket default retention. Applied automatically to new objects on PUT
/// (only when no explicit per-object retention was supplied and no state
/// already exists for the (bucket, key)).
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct BucketObjectLockDefault {
    pub mode: LockMode,
    pub retention_days: u32,
}

/// Snapshot wrapper used by [`ObjectLockManager::to_json`] /
/// [`ObjectLockManager::from_json`].
#[derive(Debug, Default, Serialize, Deserialize)]
struct ObjectLockSnapshot {
    /// `(bucket, key) → state` flattened into a `Vec` so JSON stays
    /// human-readable (tuple keys can't roundtrip through `HashMap` JSON).
    states: Vec<((String, String), ObjectLockState)>,
    bucket_defaults: HashMap<String, BucketObjectLockDefault>,
}

/// Top-level manager. Owns per-(bucket, key) lock state and per-bucket
/// default configuration. All read / write operations go through `RwLock`
/// for thread safety; clones are cheap (`Arc<ObjectLockManager>` is the
/// expected handle shape).
#[derive(Debug, Default)]
pub struct ObjectLockManager {
    states: RwLock<HashMap<(String, String), ObjectLockState>>,
    bucket_defaults: RwLock<HashMap<String, BucketObjectLockDefault>>,
}

impl ObjectLockManager {
    /// Empty manager — no objects locked, no bucket defaults.
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Replace (or create) the lock state for `(bucket, key)`. `service.rs`'s
    /// `put_object_retention` handler calls this directly after validating
    /// the immutability rules (Compliance is one-way; once set, mode cannot
    /// be downgraded and retain-until cannot be shortened — the caller
    /// validates, this method just persists).
    pub fn set(&self, bucket: &str, key: &str, state: ObjectLockState) {
        self.states
            .write()
            .expect("object-lock state RwLock poisoned")
            .insert((bucket.to_owned(), key.to_owned()), state);
    }

    /// Return a clone of the current state for `(bucket, key)`, if any.
    #[must_use]
    pub fn get(&self, bucket: &str, key: &str) -> Option<ObjectLockState> {
        self.states
            .read()
            .expect("object-lock state RwLock poisoned")
            .get(&(bucket.to_owned(), key.to_owned()))
            .cloned()
    }

    /// Toggle the legal-hold flag on `(bucket, key)`. Creates a default-empty
    /// state if no entry exists yet (legal hold is allowed even without
    /// retention).
    pub fn set_legal_hold(&self, bucket: &str, key: &str, on: bool) {
        let mut guard = self
            .states
            .write()
            .expect("object-lock state RwLock poisoned");
        let entry = guard
            .entry((bucket.to_owned(), key.to_owned()))
            .or_default();
        entry.legal_hold_on = on;
    }

    /// Install (or replace) the bucket-default retention config. New PUTs to
    /// this bucket without explicit retention pick this up via
    /// [`Self::apply_default_on_put`].
    pub fn set_bucket_default(&self, bucket: &str, default: BucketObjectLockDefault) {
        self.bucket_defaults
            .write()
            .expect("object-lock bucket-default RwLock poisoned")
            .insert(bucket.to_owned(), default);
    }

    /// Look up the bucket-default retention config, if any.
    #[must_use]
    pub fn bucket_default(&self, bucket: &str) -> Option<BucketObjectLockDefault> {
        self.bucket_defaults
            .read()
            .expect("object-lock bucket-default RwLock poisoned")
            .get(bucket)
            .copied()
    }

    /// On PUT: when the bucket has a default config and no per-object state
    /// already exists for this key, materialise a fresh state with
    /// `retain_until = now + retention_days`. Existing state (e.g. an
    /// earlier explicit `put_object_retention`) is left unchanged so we
    /// don't accidentally re-arm a cleared retention on overwrite.
    pub fn apply_default_on_put(&self, bucket: &str, key: &str, now: DateTime<Utc>) {
        let Some(default) = self.bucket_default(bucket) else {
            return;
        };
        let mut guard = self
            .states
            .write()
            .expect("object-lock state RwLock poisoned");
        let key_pair = (bucket.to_owned(), key.to_owned());
        // Skip if any retention is already in effect — auto-apply must not
        // shorten an existing Compliance lock or wipe a legal hold.
        if let Some(existing) = guard.get(&key_pair)
            && (existing.mode.is_some() || existing.retain_until.is_some())
        {
            return;
        }
        let retain_until = now + Duration::days(i64::from(default.retention_days));
        let entry = guard.entry(key_pair).or_default();
        entry.mode = Some(default.mode);
        entry.retain_until = Some(retain_until);
    }

    /// Drop any lock state attached to `(bucket, key)`. Called by
    /// `service.rs` after a successful (= permitted) physical delete so the
    /// freed key can be re-armed by a future PUT under the bucket default.
    pub fn clear(&self, bucket: &str, key: &str) {
        self.states
            .write()
            .expect("object-lock state RwLock poisoned")
            .remove(&(bucket.to_owned(), key.to_owned()));
    }

    /// JSON snapshot for restart-recoverable state. Pair with
    /// [`Self::from_json`].
    pub fn to_json(&self) -> Result<String, serde_json::Error> {
        let states: Vec<((String, String), ObjectLockState)> = self
            .states
            .read()
            .expect("object-lock state RwLock poisoned")
            .iter()
            .map(|(k, v)| (k.clone(), v.clone()))
            .collect();
        let bucket_defaults = self
            .bucket_defaults
            .read()
            .expect("object-lock bucket-default RwLock poisoned")
            .clone();
        let snap = ObjectLockSnapshot {
            states,
            bucket_defaults,
        };
        serde_json::to_string(&snap)
    }

    /// Restore from a JSON snapshot produced by [`Self::to_json`].
    pub fn from_json(s: &str) -> Result<Self, serde_json::Error> {
        let snap: ObjectLockSnapshot = serde_json::from_str(s)?;
        let mut states = HashMap::with_capacity(snap.states.len());
        for (k, v) in snap.states {
            states.insert(k, v);
        }
        Ok(Self {
            states: RwLock::new(states),
            bucket_defaults: RwLock::new(snap.bucket_defaults),
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn now() -> DateTime<Utc> {
        Utc::now()
    }

    #[test]
    fn is_locked_future_retain_until() {
        let s = ObjectLockState {
            mode: Some(LockMode::Governance),
            retain_until: Some(now() + Duration::hours(1)),
            legal_hold_on: false,
        };
        assert!(s.is_locked(now()));
    }

    #[test]
    fn is_locked_past_retain_until_is_unlocked() {
        let s = ObjectLockState {
            mode: Some(LockMode::Governance),
            retain_until: Some(now() - Duration::hours(1)),
            legal_hold_on: false,
        };
        assert!(!s.is_locked(now()));
    }

    #[test]
    fn compliance_cannot_be_bypassed() {
        let s = ObjectLockState {
            mode: Some(LockMode::Compliance),
            retain_until: Some(now() + Duration::days(7)),
            legal_hold_on: false,
        };
        // Even with bypass=true, Compliance refuses delete until expiry.
        assert!(!s.can_delete(now(), true));
        assert!(!s.can_delete(now(), false));
    }

    #[test]
    fn governance_can_be_bypassed_with_header() {
        let s = ObjectLockState {
            mode: Some(LockMode::Governance),
            retain_until: Some(now() + Duration::days(7)),
            legal_hold_on: false,
        };
        assert!(s.can_delete(now(), true), "bypass=true should permit delete");
        assert!(
            !s.can_delete(now(), false),
            "bypass=false should refuse delete"
        );
    }

    #[test]
    fn legal_hold_blocks_delete_independent_of_retention() {
        // No retention at all, just a legal hold → still locked.
        let s = ObjectLockState {
            mode: None,
            retain_until: None,
            legal_hold_on: true,
        };
        assert!(s.is_locked(now()));
        assert!(!s.can_delete(now(), true), "legal hold cannot be bypassed");
        assert!(!s.can_delete(now(), false));
    }

    #[test]
    fn legal_hold_overrides_governance_bypass() {
        // Governance retention with bypass=true would normally permit delete,
        // but a legal hold present at the same time blocks it.
        let s = ObjectLockState {
            mode: Some(LockMode::Governance),
            retain_until: Some(now() + Duration::days(7)),
            legal_hold_on: true,
        };
        assert!(!s.can_delete(now(), true));
    }

    #[test]
    fn no_lock_no_block() {
        let s = ObjectLockState::default();
        assert!(!s.is_locked(now()));
        assert!(s.can_delete(now(), false));
    }

    #[test]
    fn apply_default_materialises_state_on_first_put() {
        let m = ObjectLockManager::new();
        m.set_bucket_default(
            "b",
            BucketObjectLockDefault {
                mode: LockMode::Governance,
                retention_days: 30,
            },
        );
        let now = now();
        m.apply_default_on_put("b", "k", now);
        let state = m.get("b", "k").expect("state must be materialised");
        assert_eq!(state.mode, Some(LockMode::Governance));
        let until = state.retain_until.expect("retain_until must be set");
        let target = now + Duration::days(30);
        // Allow 1s slack for clock granularity.
        let diff = (until - target).num_seconds().abs();
        assert!(diff <= 1, "retain_until off by {diff}s");
    }

    #[test]
    fn apply_default_does_not_overwrite_existing_retention() {
        let m = ObjectLockManager::new();
        let custom_until = now() + Duration::days(365);
        m.set(
            "b",
            "k",
            ObjectLockState {
                mode: Some(LockMode::Compliance),
                retain_until: Some(custom_until),
                legal_hold_on: false,
            },
        );
        m.set_bucket_default(
            "b",
            BucketObjectLockDefault {
                mode: LockMode::Governance,
                retention_days: 1,
            },
        );
        m.apply_default_on_put("b", "k", now());
        let state = m.get("b", "k").unwrap();
        // Existing Compliance + 365-day retain must be preserved.
        assert_eq!(state.mode, Some(LockMode::Compliance));
        assert_eq!(state.retain_until, Some(custom_until));
    }

    #[test]
    fn apply_default_no_op_without_bucket_default() {
        let m = ObjectLockManager::new();
        m.apply_default_on_put("b", "k", now());
        assert!(m.get("b", "k").is_none());
    }

    #[test]
    fn set_legal_hold_creates_state_when_missing() {
        let m = ObjectLockManager::new();
        m.set_legal_hold("b", "k", true);
        let s = m.get("b", "k").expect("state created");
        assert!(s.legal_hold_on);
        assert!(s.mode.is_none());
        assert!(s.retain_until.is_none());
        m.set_legal_hold("b", "k", false);
        let s2 = m.get("b", "k").unwrap();
        assert!(!s2.legal_hold_on);
    }

    #[test]
    fn snapshot_roundtrip() {
        let m = ObjectLockManager::new();
        m.set(
            "b1",
            "k1",
            ObjectLockState {
                mode: Some(LockMode::Compliance),
                retain_until: Some(Utc::now() + Duration::days(10)),
                legal_hold_on: true,
            },
        );
        m.set_bucket_default(
            "b1",
            BucketObjectLockDefault {
                mode: LockMode::Governance,
                retention_days: 7,
            },
        );
        let json = m.to_json().expect("to_json");
        let m2 = ObjectLockManager::from_json(&json).expect("from_json");
        let s = m2.get("b1", "k1").expect("state survives roundtrip");
        assert_eq!(s.mode, Some(LockMode::Compliance));
        assert!(s.legal_hold_on);
        let d = m2.bucket_default("b1").expect("default survives roundtrip");
        assert_eq!(d.mode, LockMode::Governance);
        assert_eq!(d.retention_days, 7);
    }

    #[test]
    fn lock_mode_aws_string_roundtrip() {
        assert_eq!(
            LockMode::from_aws_str(LockMode::Governance.as_aws_str()),
            Some(LockMode::Governance)
        );
        assert_eq!(
            LockMode::from_aws_str(LockMode::Compliance.as_aws_str()),
            Some(LockMode::Compliance)
        );
        assert_eq!(LockMode::from_aws_str("governance"), Some(LockMode::Governance));
        assert!(LockMode::from_aws_str("nope").is_none());
    }

    #[test]
    fn clear_removes_state() {
        let m = ObjectLockManager::new();
        m.set(
            "b",
            "k",
            ObjectLockState {
                mode: Some(LockMode::Governance),
                retain_until: Some(Utc::now() + Duration::days(1)),
                legal_hold_on: false,
            },
        );
        assert!(m.get("b", "k").is_some());
        m.clear("b", "k");
        assert!(m.get("b", "k").is_none());
    }
}