Skip to main content

fakecloud_s3/
state.rs

1use bytes::Bytes;
2use chrono::{DateTime, Utc};
3use fakecloud_persistence::cache::{BodyCache, BodyKey};
4use fakecloud_persistence::BodyRef;
5use parking_lot::RwLock;
6use std::collections::BTreeMap;
7use std::io::{self, Read, Seek, SeekFrom};
8use std::sync::Arc;
9
10/// An ACL grant entry.
11#[derive(Debug, Clone)]
12pub struct AclGrant {
13    pub grantee_type: String, // "CanonicalUser" or "Group"
14    pub grantee_id: Option<String>,
15    pub grantee_display_name: Option<String>,
16    pub grantee_uri: Option<String>,
17    pub permission: String, // READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL
18}
19
20#[derive(Debug, Clone, Default)]
21pub struct S3Object {
22    pub key: String,
23    pub body: BodyRef,
24    pub content_type: String,
25    pub etag: String,
26    pub size: u64,
27    pub last_modified: DateTime<Utc>,
28    pub metadata: BTreeMap<String, String>,
29    pub storage_class: String,
30    pub tags: BTreeMap<String, String>,
31    pub acl_grants: Vec<AclGrant>,
32    pub acl_owner_id: Option<String>,
33    /// If created from multipart upload, the number of parts.
34    pub parts_count: Option<u32>,
35    /// Per-part sizes for multipart objects (part_number, size).
36    pub part_sizes: Option<Vec<(u32, u64)>>,
37    /// Server-side encryption algorithm.
38    pub sse_algorithm: Option<String>,
39    /// KMS key ID for SSE-KMS.
40    pub sse_kms_key_id: Option<String>,
41    /// Whether bucket key is enabled.
42    pub bucket_key_enabled: Option<bool>,
43    pub version_id: Option<String>,
44    pub is_delete_marker: bool,
45    pub content_encoding: Option<String>,
46    pub website_redirect_location: Option<String>,
47    /// Glacier restore: ongoing request status.
48    pub restore_ongoing: Option<bool>,
49    /// Glacier restore: expiry date string.
50    pub restore_expiry: Option<String>,
51    /// Checksum algorithm (CRC32, SHA1, SHA256).
52    pub checksum_algorithm: Option<String>,
53    /// Base64-encoded checksum value.
54    pub checksum_value: Option<String>,
55    /// Object lock mode (GOVERNANCE or COMPLIANCE).
56    pub lock_mode: Option<String>,
57    /// Object lock retain-until date (ISO 8601).
58    pub lock_retain_until: Option<DateTime<Utc>>,
59    /// Legal hold status (ON or OFF).
60    pub lock_legal_hold: Option<String>,
61}
62
63/// A part uploaded via the multipart upload API.
64#[derive(Debug, Clone)]
65pub struct UploadPart {
66    pub part_number: u32,
67    pub body: BodyRef,
68    pub etag: String,
69    pub size: u64,
70    pub last_modified: DateTime<Utc>,
71}
72
73/// An in-progress multipart upload.
74#[derive(Debug, Clone)]
75pub struct MultipartUpload {
76    pub upload_id: String,
77    pub key: String,
78    pub initiated: DateTime<Utc>,
79    /// Parts keyed by part number.
80    pub parts: BTreeMap<u32, UploadPart>,
81    /// Metadata provided at CreateMultipartUpload time.
82    pub metadata: BTreeMap<String, String>,
83    pub content_type: String,
84    pub storage_class: String,
85    pub sse_algorithm: Option<String>,
86    pub sse_kms_key_id: Option<String>,
87    pub tagging: Option<String>,
88    pub acl_grants: Vec<AclGrant>,
89    pub checksum_algorithm: Option<String>,
90}
91
92#[derive(Debug, Clone)]
93pub struct S3Bucket {
94    pub name: String,
95    pub creation_date: DateTime<Utc>,
96    pub region: String,
97    /// Objects keyed by their full key path.
98    pub objects: BTreeMap<String, S3Object>,
99    pub tags: BTreeMap<String, String>,
100    pub acl_grants: Vec<AclGrant>,
101    pub acl_owner_id: String,
102    /// In-progress multipart uploads keyed by upload ID.
103    pub multipart_uploads: BTreeMap<String, MultipartUpload>,
104    /// Versioning status: None = never enabled, Some("Enabled"), Some("Suspended").
105    pub versioning: Option<String>,
106    /// Object versions keyed by key, each value is a list of versions.
107    pub object_versions: BTreeMap<String, Vec<S3Object>>,
108    /// Bucket ACL (canned or XML).
109    pub acl: Option<String>,
110    pub encryption_config: Option<String>,
111    pub lifecycle_config: Option<String>,
112    /// Value of the `x-amz-transition-default-minimum-object-size` header
113    /// supplied on PutBucketLifecycleConfiguration. Echoed back as a header
114    /// on the corresponding GET (and PUT) response. Real AWS defaults to
115    /// `all_storage_classes_128K` for general purpose buckets.
116    pub lifecycle_transition_default_min_size: Option<String>,
117    pub policy: Option<String>,
118    pub cors_config: Option<String>,
119    pub notification_config: Option<String>,
120    pub logging_config: Option<String>,
121    pub website_config: Option<String>,
122    pub accelerate_status: Option<String>,
123    pub public_access_block: Option<String>,
124    pub object_lock_config: Option<String>,
125    pub replication_config: Option<String>,
126    pub ownership_controls: Option<String>,
127    pub inventory_configs: BTreeMap<String, String>,
128    /// Whether EventBridge notifications are enabled for this bucket.
129    pub eventbridge_enabled: bool,
130    /// Per-id analytics configurations (XML body).
131    pub analytics_configs: BTreeMap<String, String>,
132    /// Per-id intelligent-tiering configurations (XML body).
133    pub intelligent_tiering_configs: BTreeMap<String, String>,
134    /// Per-id metrics configurations (XML body).
135    pub metrics_configs: BTreeMap<String, String>,
136    /// Request payment configuration (XML body).
137    pub request_payment: Option<String>,
138    /// Per-bucket ABAC config (XML body) — see PutBucketAbac/GetBucketAbac.
139    pub abac_config: Option<String>,
140    /// Bucket-level metadata configuration (S3 metadata table v2).
141    pub metadata_configuration: Option<String>,
142    /// Bucket-level metadata table configuration (S3 metadata table v1).
143    pub metadata_table_configuration: Option<String>,
144}
145
146impl S3Bucket {
147    pub fn new(name: &str, region: &str, owner_id: &str) -> Self {
148        Self {
149            name: name.to_string(),
150            creation_date: Utc::now(),
151            region: region.to_string(),
152            objects: BTreeMap::new(),
153            tags: BTreeMap::new(),
154            acl_grants: vec![AclGrant {
155                grantee_type: "CanonicalUser".to_string(),
156                grantee_id: Some(owner_id.to_string()),
157                grantee_display_name: Some(owner_id.to_string()),
158                grantee_uri: None,
159                permission: "FULL_CONTROL".to_string(),
160            }],
161            acl_owner_id: owner_id.to_string(),
162            multipart_uploads: BTreeMap::new(),
163            versioning: None,
164            object_versions: BTreeMap::new(),
165            acl: None,
166            encryption_config: None,
167            lifecycle_config: None,
168            lifecycle_transition_default_min_size: None,
169            policy: None,
170            cors_config: None,
171            notification_config: None,
172            logging_config: None,
173            website_config: None,
174            accelerate_status: None,
175            public_access_block: None,
176            object_lock_config: None,
177            replication_config: None,
178            ownership_controls: None,
179            inventory_configs: BTreeMap::new(),
180            eventbridge_enabled: false,
181            analytics_configs: BTreeMap::new(),
182            intelligent_tiering_configs: BTreeMap::new(),
183            metrics_configs: BTreeMap::new(),
184            request_payment: None,
185            abac_config: None,
186            metadata_configuration: None,
187            metadata_table_configuration: None,
188        }
189    }
190}
191
192/// A recorded S3 notification event for introspection.
193#[derive(Debug, Clone)]
194pub struct S3NotificationEvent {
195    pub bucket: String,
196    pub key: String,
197    pub event_type: String,
198    pub timestamp: DateTime<Utc>,
199}
200
201/// Stored response from a Lambda function invoked via S3 Object Lambda.
202/// Keyed by `request_token` in [`S3State::object_lambda_responses`].
203#[derive(Debug, Clone)]
204pub struct ObjectLambdaResponse {
205    pub route: String,
206    pub token: String,
207    pub body: Vec<u8>,
208    pub content_type: Option<String>,
209    pub fwd_status: Option<u16>,
210    pub fwd_error_message: Option<String>,
211    pub metadata: BTreeMap<String, String>,
212    pub encryption: Option<String>,
213    pub kms_key_id: Option<String>,
214    pub stored_at: DateTime<Utc>,
215}
216
217#[derive(Debug, Clone)]
218pub struct S3AccessPoint {
219    pub name: String,
220    pub bucket: String,
221    pub account_id: String,
222    pub network_origin: String,
223    pub vpc_configuration: Option<String>,
224    pub creation_date: DateTime<Utc>,
225    pub public_access_block: Option<String>,
226    pub bucket_account_id: Option<String>,
227}
228
229pub struct S3State {
230    pub account_id: String,
231    pub region: String,
232    pub buckets: BTreeMap<String, S3Bucket>,
233    pub notification_events: Vec<S3NotificationEvent>,
234    pub body_cache: Option<Arc<BodyCache>>,
235    /// Object Lambda responses keyed by request token.
236    pub object_lambda_responses: BTreeMap<String, ObjectLambdaResponse>,
237    pub access_points: BTreeMap<String, S3AccessPoint>,
238}
239
240impl S3State {
241    pub fn new(account_id: &str, region: &str) -> Self {
242        Self {
243            account_id: account_id.to_string(),
244            region: region.to_string(),
245            buckets: BTreeMap::new(),
246            notification_events: Vec::new(),
247            body_cache: None,
248            object_lambda_responses: BTreeMap::new(),
249            access_points: BTreeMap::new(),
250        }
251    }
252
253    pub fn set_body_cache(&mut self, cache: Arc<BodyCache>) {
254        self.body_cache = Some(cache);
255    }
256
257    pub fn reset(&mut self) {
258        self.buckets.clear();
259        self.notification_events.clear();
260        self.object_lambda_responses.clear();
261    }
262
263    /// Read the full body referenced by a [`BodyRef`] without touching the
264    /// [`BodyCache`] or any `S3State`. Because it borrows nothing from state,
265    /// callers can read part bodies after dropping the global S3 lock — used by
266    /// CompleteMultipartUpload to assemble a multi-GB object off-lock instead of
267    /// serializing every other S3 operation behind the assembly (bug-audit
268    /// 2026-05-28, 4.7). Disk bodies are read straight from their path.
269    pub fn read_body_uncached(body: &BodyRef) -> io::Result<Bytes> {
270        match body {
271            BodyRef::Memory(b) => Ok(b.clone()),
272            BodyRef::Disk { path, .. } => Ok(Bytes::from(std::fs::read(path)?)),
273        }
274    }
275
276    /// Read the full body referenced by a [`BodyRef`], consulting the
277    /// persistent [`BodyCache`] when one is configured.
278    pub fn read_body(&self, body: &BodyRef) -> io::Result<Bytes> {
279        match body {
280            BodyRef::Memory(b) => Ok(b.clone()),
281            BodyRef::Disk {
282                bucket,
283                key,
284                version,
285                path,
286                ..
287            } => {
288                let cache_key = BodyKey::new(bucket.clone(), key.clone(), version.clone());
289                if let Some(cache) = &self.body_cache {
290                    if let Some(hit) = cache.get(&cache_key) {
291                        return Ok(hit);
292                    }
293                }
294                let data = std::fs::read(path)?;
295                let bytes = Bytes::from(data);
296                if let Some(cache) = &self.body_cache {
297                    cache.insert(cache_key, bytes.clone());
298                }
299                Ok(bytes)
300            }
301        }
302    }
303
304    /// Read a byte range from the body without loading the full object into
305    /// memory. Memory bodies are sliced directly; disk bodies are seek+read'd.
306    /// Ranges bypass the body cache (the cache stores whole objects only).
307    pub fn read_body_range(&self, body: &BodyRef, offset: u64, len: u64) -> io::Result<Bytes> {
308        match body {
309            BodyRef::Memory(b) => {
310                let start = offset as usize;
311                let end = start.saturating_add(len as usize).min(b.len());
312                if start > b.len() {
313                    return Ok(Bytes::new());
314                }
315                Ok(b.slice(start..end))
316            }
317            BodyRef::Disk { path, .. } => {
318                let mut f = std::fs::File::open(path)?;
319                f.seek(SeekFrom::Start(offset))?;
320                let mut buf = vec![0u8; len as usize];
321                f.read_exact(&mut buf)?;
322                Ok(Bytes::from(buf))
323            }
324        }
325    }
326}
327
328impl fakecloud_core::multi_account::AccountState for S3State {
329    fn new_for_account(account_id: &str, region: &str, _endpoint: &str) -> Self {
330        Self::new(account_id, region)
331    }
332
333    fn inherit_from(&mut self, sibling: &Self) {
334        if let Some(cache) = &sibling.body_cache {
335            self.body_cache = Some(cache.clone());
336        }
337    }
338}
339
340pub type SharedS3State = Arc<RwLock<fakecloud_core::multi_account::MultiAccountState<S3State>>>;
341
342/// Construct a memory-backed [`BodyRef`] from [`Bytes`].
343pub fn memory_body(bytes: Bytes) -> BodyRef {
344    BodyRef::Memory(bytes)
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use std::io::Write;
351
352    #[test]
353    fn new_bucket_seeds_full_control_acl() {
354        let b = S3Bucket::new("my-bucket", "us-east-1", "owner-id");
355        assert_eq!(b.name, "my-bucket");
356        assert_eq!(b.region, "us-east-1");
357        assert_eq!(b.acl_owner_id, "owner-id");
358        assert_eq!(b.acl_grants.len(), 1);
359        assert_eq!(b.acl_grants[0].permission, "FULL_CONTROL");
360        assert_eq!(b.acl_grants[0].grantee_type, "CanonicalUser");
361        assert!(!b.eventbridge_enabled);
362        assert!(b.versioning.is_none());
363    }
364
365    #[test]
366    fn s3state_new_and_reset_clears_buckets() {
367        let mut state = S3State::new("123456789012", "us-east-1");
368        assert!(state.buckets.is_empty());
369        state
370            .buckets
371            .insert("b".to_string(), S3Bucket::new("b", "us-east-1", "owner"));
372        state.notification_events.push(S3NotificationEvent {
373            bucket: "b".to_string(),
374            key: "k".to_string(),
375            event_type: "s3:ObjectCreated:Put".to_string(),
376            timestamp: Utc::now(),
377        });
378        state.reset();
379        assert!(state.buckets.is_empty());
380        assert!(state.notification_events.is_empty());
381    }
382
383    #[test]
384    fn read_body_from_memory_returns_bytes() {
385        let state = S3State::new("123", "us-east-1");
386        let body = memory_body(Bytes::from_static(b"hello"));
387        assert_eq!(state.read_body(&body).unwrap(), &b"hello"[..]);
388    }
389
390    #[test]
391    fn read_body_from_disk_reads_file() {
392        let tmp = tempfile::NamedTempFile::new().unwrap();
393        tmp.as_file().write_all(b"file-body").unwrap();
394        let body = BodyRef::Disk {
395            bucket: "b".to_string(),
396            key: "k".to_string(),
397            version: None,
398            path: tmp.path().to_path_buf(),
399            size: 9,
400        };
401        let state = S3State::new("123", "us-east-1");
402        assert_eq!(state.read_body(&body).unwrap(), &b"file-body"[..]);
403    }
404
405    #[test]
406    fn read_body_uncached_reads_memory_and_disk() {
407        // bug-audit 4.7: the off-lock multipart assembler relies on this
408        // state-free reader for both body kinds.
409        let mem = memory_body(Bytes::from_static(b"hello"));
410        assert_eq!(S3State::read_body_uncached(&mem).unwrap(), &b"hello"[..]);
411
412        let tmp = tempfile::NamedTempFile::new().unwrap();
413        tmp.as_file().write_all(b"file-body").unwrap();
414        let disk = BodyRef::Disk {
415            bucket: "b".to_string(),
416            key: "k".to_string(),
417            version: None,
418            path: tmp.path().to_path_buf(),
419            size: 9,
420        };
421        assert_eq!(
422            S3State::read_body_uncached(&disk).unwrap(),
423            &b"file-body"[..]
424        );
425    }
426
427    #[test]
428    fn read_body_range_slices_memory() {
429        let state = S3State::new("123", "us-east-1");
430        let body = memory_body(Bytes::from_static(b"abcdefghij"));
431        assert_eq!(state.read_body_range(&body, 2, 4).unwrap(), &b"cdef"[..]);
432    }
433
434    #[test]
435    fn read_body_range_memory_beyond_length_returns_empty() {
436        let state = S3State::new("123", "us-east-1");
437        let body = memory_body(Bytes::from_static(b"abc"));
438        assert!(state.read_body_range(&body, 100, 4).unwrap().is_empty());
439    }
440
441    #[test]
442    fn read_body_range_memory_clamps_to_length() {
443        let state = S3State::new("123", "us-east-1");
444        let body = memory_body(Bytes::from_static(b"abcdef"));
445        assert_eq!(state.read_body_range(&body, 4, 100).unwrap(), &b"ef"[..]);
446    }
447
448    #[test]
449    fn read_body_range_from_disk() {
450        let tmp = tempfile::NamedTempFile::new().unwrap();
451        tmp.as_file().write_all(b"0123456789").unwrap();
452        let body = BodyRef::Disk {
453            bucket: "b".to_string(),
454            key: "k".to_string(),
455            version: None,
456            path: tmp.path().to_path_buf(),
457            size: 10,
458        };
459        let state = S3State::new("123", "us-east-1");
460        assert_eq!(state.read_body_range(&body, 3, 4).unwrap(), &b"3456"[..]);
461    }
462
463    #[test]
464    fn account_state_impl_new_for_account() {
465        use fakecloud_core::multi_account::AccountState;
466        let s = S3State::new_for_account("111122223333", "eu-west-1", "http://x");
467        assert_eq!(s.account_id, "111122223333");
468        assert_eq!(s.region, "eu-west-1");
469    }
470}