Skip to main content

fakecloud_dynamodb/
streams_dataplane.rs

1//! DynamoDB Streams data plane (`DynamoDBStreams_20120810`).
2//!
3//! Lambda event source mappings against `arn:aws:dynamodb:.../stream/...`
4//! depend on `ListStreams`, `DescribeStream`, `GetShardIterator`, and
5//! `GetRecords`. The control plane's `EnableStream` / `DescribeTable`
6//! already populate `DynamoTable::stream_records` on every mutation;
7//! this module is the consumer side that surfaces those records.
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use http::StatusCode;
13use serde_json::{json, Value};
14
15use fakecloud_core::service::{AwsRequest, AwsResponse, AwsService, AwsServiceError};
16
17use crate::state::{DynamoTable, SharedDynamoDbState};
18
19pub struct DynamoDbStreamsService {
20    state: SharedDynamoDbState,
21}
22
23impl DynamoDbStreamsService {
24    pub fn new(state: SharedDynamoDbState) -> Self {
25        Self { state }
26    }
27}
28
29#[async_trait]
30impl AwsService for DynamoDbStreamsService {
31    fn service_name(&self) -> &str {
32        "dynamodbstreams"
33    }
34
35    async fn handle(&self, req: AwsRequest) -> Result<AwsResponse, AwsServiceError> {
36        let body: Value = serde_json::from_slice(&req.body).unwrap_or_default();
37        match req.action.as_str() {
38            "ListStreams" => self.list_streams(&req, &body),
39            "DescribeStream" => self.describe_stream(&req, &body),
40            "GetShardIterator" => self.get_shard_iterator(&req, &body),
41            "GetRecords" => self.get_records(&req, &body),
42            _ => Err(AwsServiceError::action_not_implemented(
43                "dynamodbstreams",
44                &req.action,
45            )),
46        }
47    }
48
49    fn supported_actions(&self) -> &[&str] {
50        &[
51            "ListStreams",
52            "DescribeStream",
53            "GetShardIterator",
54            "GetRecords",
55        ]
56    }
57}
58
59impl DynamoDbStreamsService {
60    fn list_streams(&self, req: &AwsRequest, body: &Value) -> Result<AwsResponse, AwsServiceError> {
61        let table_filter = body["TableName"].as_str();
62        let accounts = self.state.read();
63        let state = match accounts.get(&req.account_id) {
64            Some(s) => s,
65            None => return Ok(AwsResponse::ok_json(json!({ "Streams": [] }))),
66        };
67        let mut streams = Vec::new();
68        for table in state.tables.values() {
69            if let Some(name) = table_filter {
70                if table.name != name {
71                    continue;
72                }
73            }
74            if !table.stream_enabled {
75                continue;
76            }
77            let Some(arn) = table.stream_arn.as_ref() else {
78                continue;
79            };
80            let label = stream_label(arn);
81            streams.push(json!({
82                "StreamArn": arn,
83                "TableName": table.name,
84                "StreamLabel": label,
85            }));
86        }
87        Ok(AwsResponse::ok_json(json!({ "Streams": streams })))
88    }
89
90    fn describe_stream(
91        &self,
92        req: &AwsRequest,
93        body: &Value,
94    ) -> Result<AwsResponse, AwsServiceError> {
95        let stream_arn = require_string(body, "StreamArn")?;
96        let accounts = self.state.read();
97        let state = accounts
98            .get(&req.account_id)
99            .ok_or_else(|| not_found("Stream", &stream_arn))?;
100        let table = state
101            .tables
102            .values()
103            .find(|t| t.stream_arn.as_deref() == Some(stream_arn.as_str()))
104            .ok_or_else(|| not_found("Stream", &stream_arn))?;
105
106        let view_type = table
107            .stream_view_type
108            .clone()
109            .unwrap_or_else(|| "NEW_AND_OLD_IMAGES".to_string());
110        let label = stream_label(&stream_arn);
111        let key_schema: Vec<Value> = table
112            .key_schema
113            .iter()
114            .map(|k| {
115                json!({
116                    "AttributeName": k.attribute_name,
117                    "KeyType": k.key_type,
118                })
119            })
120            .collect();
121
122        let body = json!({
123            "StreamDescription": {
124                "StreamArn": stream_arn,
125                "StreamLabel": label,
126                "StreamStatus": "ENABLED",
127                "StreamViewType": view_type,
128                "CreationRequestDateTime": table.created_at.timestamp() as f64,
129                "TableName": table.name,
130                "KeySchema": key_schema,
131                "Shards": [{
132                    "ShardId": "shardId-00000000000000000000-00000001",
133                    "SequenceNumberRange": {
134                        "StartingSequenceNumber": "0",
135                    },
136                }],
137            }
138        });
139        Ok(AwsResponse::ok_json(body))
140    }
141
142    fn get_shard_iterator(
143        &self,
144        req: &AwsRequest,
145        body: &Value,
146    ) -> Result<AwsResponse, AwsServiceError> {
147        let stream_arn = require_string(body, "StreamArn")?;
148        let shard_id = require_string(body, "ShardId")?;
149        let iterator_type = require_string(body, "ShardIteratorType")?;
150
151        let accounts = self.state.read();
152        let state = accounts
153            .get(&req.account_id)
154            .ok_or_else(|| not_found("Stream", &stream_arn))?;
155        let table = state
156            .tables
157            .values()
158            .find(|t| t.stream_arn.as_deref() == Some(stream_arn.as_str()))
159            .ok_or_else(|| not_found("Stream", &stream_arn))?;
160
161        // The iterator is anchored to an *exclusive start sequence number*,
162        // not a positional index into the records Vec. `add_stream_record`
163        // physically trims expired records off the front (`records.retain`),
164        // which would shift every index; a sequence-number anchor is stable
165        // across trims (like native Kinesis advancing by stored seq, not by
166        // Vec position). GetRecords returns records whose sequence_number is
167        // strictly greater than this anchor. bug-audit 2026-06-15, 4.4.
168        let records = table.stream_records.read();
169        let after_seq: String = match iterator_type.as_str() {
170            // Oldest retained record: anchor below every sequence number so the
171            // first GetRecords returns the whole retained window.
172            "TRIM_HORIZON" => "0".to_string(),
173            // After the newest record: anchor at the max current sequence so
174            // only records added later are returned.
175            "LATEST" => records
176                .iter()
177                .map(|r| r.dynamodb.sequence_number.clone())
178                .max_by(|a, b| cmp_seq(a, b))
179                .unwrap_or_else(|| "0".to_string()),
180            // Inclusive of the named record: anchor just below it.
181            "AT_SEQUENCE_NUMBER" => {
182                let seq = require_string(body, "SequenceNumber")?;
183                if !records.iter().any(|r| r.dynamodb.sequence_number == seq) {
184                    return Err(invalid_argument("SequenceNumber not found"));
185                }
186                exclusive_before(&seq)
187            }
188            // Exclusive of the named record: anchor exactly at it.
189            "AFTER_SEQUENCE_NUMBER" => {
190                let seq = require_string(body, "SequenceNumber")?;
191                if !records.iter().any(|r| r.dynamodb.sequence_number == seq) {
192                    return Err(invalid_argument("SequenceNumber not found"));
193                }
194                seq
195            }
196            other => {
197                return Err(invalid_argument(&format!(
198                    "Unsupported ShardIteratorType: {other}",
199                )))
200            }
201        };
202
203        let token = format!("{stream_arn}|{shard_id}|{after_seq}");
204        Ok(AwsResponse::ok_json(json!({ "ShardIterator": token })))
205    }
206
207    fn get_records(&self, req: &AwsRequest, body: &Value) -> Result<AwsResponse, AwsServiceError> {
208        let iterator = require_string(body, "ShardIterator")?;
209        let limit = body["Limit"].as_u64().unwrap_or(1000) as usize;
210
211        let parts: Vec<&str> = iterator.splitn(3, '|').collect();
212        if parts.len() != 3 {
213            return Err(invalid_argument("ShardIterator is invalid"));
214        }
215        let stream_arn = parts[0].to_string();
216        let shard_id = parts[1].to_string();
217        // Exclusive start sequence number (see get_shard_iterator). Index-based
218        // tokens minted by older builds would parse as a number too, but they
219        // are positions; since we now compare by sequence number this is
220        // self-correcting after one GetShardIterator. bug-audit 2026-06-15, 4.4.
221        let after_seq = parts[2].to_string();
222
223        let accounts = self.state.read();
224        let state = accounts
225            .get(&req.account_id)
226            .ok_or_else(|| not_found("Stream", &stream_arn))?;
227        let table = state
228            .tables
229            .values()
230            .find(|t| t.stream_arn.as_deref() == Some(stream_arn.as_str()))
231            .ok_or_else(|| not_found("Stream", &stream_arn))?;
232
233        // Records whose sequence number is strictly greater than the anchor,
234        // in stored (arrival) order. Front-trimming by `records.retain` cannot
235        // make us skip or replay: the anchor moves only by what we actually
236        // returned, never by physical position.
237        let records = table.stream_records.read();
238        let selected: Vec<&crate::state::StreamRecord> = records
239            .iter()
240            .filter(|r| {
241                cmp_seq(&r.dynamodb.sequence_number, &after_seq) == std::cmp::Ordering::Greater
242            })
243            .take(limit)
244            .collect();
245
246        let next_seq = selected
247            .last()
248            .map(|r| r.dynamodb.sequence_number.clone())
249            .unwrap_or(after_seq);
250        let records_json: Vec<Value> = selected
251            .iter()
252            .map(|r| stream_record_to_json(r, table))
253            .collect();
254
255        let next_token = format!("{stream_arn}|{shard_id}|{next_seq}");
256        Ok(AwsResponse::ok_json(json!({
257            "Records": records_json,
258            "NextShardIterator": next_token,
259        })))
260    }
261}
262
263fn stream_record_to_json(r: &crate::state::StreamRecord, table: &DynamoTable) -> Value {
264    let mut dynamodb = json!({
265        "ApproximateCreationDateTime": r.timestamp.timestamp() as f64,
266        "Keys": &r.dynamodb.keys,
267        "SequenceNumber": r.dynamodb.sequence_number,
268        "SizeBytes": r.dynamodb.size_bytes,
269        "StreamViewType": r.dynamodb.stream_view_type,
270    });
271    if let Some(ni) = r.dynamodb.new_image.as_ref() {
272        dynamodb["NewImage"] = json!(ni);
273    }
274    if let Some(oi) = r.dynamodb.old_image.as_ref() {
275        dynamodb["OldImage"] = json!(oi);
276    }
277    json!({
278        "eventID": r.event_id,
279        "eventName": r.event_name,
280        "eventVersion": r.event_version,
281        "eventSource": r.event_source,
282        "awsRegion": r.aws_region,
283        "eventSourceARN": table.stream_arn.clone().unwrap_or_default(),
284        "dynamodb": dynamodb,
285    })
286}
287
288fn stream_label(stream_arn: &str) -> String {
289    stream_arn.rsplit('/').next().unwrap_or("").to_string()
290}
291
292/// Compare two DynamoDB stream sequence numbers numerically. Sequence numbers
293/// are minted by an atomic counter and zero-padded to a fixed width, so they
294/// are also lexicographically ordered; we still parse to `u128` so that an
295/// un-padded legacy value (or one of a different width) compares correctly.
296fn cmp_seq(a: &str, b: &str) -> std::cmp::Ordering {
297    match (a.parse::<u128>(), b.parse::<u128>()) {
298        (Ok(x), Ok(y)) => x.cmp(&y),
299        // Non-numeric values fall back to byte order (deterministic, total).
300        _ => a.cmp(b),
301    }
302}
303
304/// The largest sequence number strictly less than `seq`, used to build an
305/// exclusive anchor for `AT_SEQUENCE_NUMBER` (which is inclusive of the named
306/// record). For the numeric counter this is `seq - 1`; if `seq` is `0` or
307/// non-numeric we anchor at `"0"` so nothing earlier is skipped.
308fn exclusive_before(seq: &str) -> String {
309    match seq.parse::<u128>() {
310        Ok(n) if n > 0 => {
311            // Preserve the original zero-padded width so lexicographic order
312            // continues to match numeric order for downstream string compares.
313            format!("{:0width$}", n - 1, width = seq.len())
314        }
315        _ => "0".to_string(),
316    }
317}
318
319fn require_string(body: &Value, field: &str) -> Result<String, AwsServiceError> {
320    body[field]
321        .as_str()
322        .map(|s| s.to_string())
323        .ok_or_else(|| invalid_argument(&format!("{field} is required")))
324}
325
326fn invalid_argument(msg: &str) -> AwsServiceError {
327    AwsServiceError::aws_error(StatusCode::BAD_REQUEST, "ValidationException", msg)
328}
329
330fn not_found(kind: &str, target: &str) -> AwsServiceError {
331    AwsServiceError::aws_error(
332        StatusCode::BAD_REQUEST,
333        "ResourceNotFoundException",
334        format!("{kind} not found: {target}"),
335    )
336}
337
338pub fn shared(state: SharedDynamoDbState) -> Arc<dyn AwsService> {
339    Arc::new(DynamoDbStreamsService::new(state))
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use crate::state::{DynamoDbStreamRecord, DynamoTable, ProvisionedThroughput, StreamRecord};
346    use bytes::Bytes;
347    use chrono::Utc;
348    use http::{HeaderMap, Method};
349    use parking_lot::RwLock;
350    use std::collections::{BTreeMap, HashMap};
351    use std::sync::Arc;
352
353    fn make_state() -> SharedDynamoDbState {
354        Arc::new(RwLock::new(
355            fakecloud_core::multi_account::MultiAccountState::new("123456789012", "us-east-1", ""),
356        ))
357    }
358
359    fn req(action: &str, body: Value) -> AwsRequest {
360        AwsRequest {
361            service: "dynamodbstreams".into(),
362            action: action.into(),
363            region: "us-east-1".into(),
364            account_id: "123456789012".into(),
365            request_id: "r".into(),
366            headers: HeaderMap::new(),
367            query_params: HashMap::new(),
368            body: Bytes::from(serde_json::to_vec(&body).unwrap()),
369            body_stream: parking_lot::Mutex::new(None),
370            path_segments: vec![],
371            raw_path: "/".into(),
372            raw_query: String::new(),
373            method: Method::POST,
374            is_query_protocol: false,
375            access_key_id: None,
376            principal: None,
377        }
378    }
379
380    fn seed_table(state: &SharedDynamoDbState) -> String {
381        let mut accts = state.write();
382        let s = accts.get_or_create("123456789012");
383        let arn =
384            "arn:aws:dynamodb:us-east-1:123456789012:table/widgets/stream/2026-05-03T00:00:00.000"
385                .to_string();
386        let table = DynamoTable {
387            name: "widgets".to_string(),
388            arn: "arn:aws:dynamodb:us-east-1:123456789012:table/widgets".to_string(),
389            table_id: "id".to_string(),
390            key_schema: Vec::new(),
391            attribute_definitions: Vec::new(),
392            provisioned_throughput: ProvisionedThroughput {
393                read_capacity_units: 0,
394                write_capacity_units: 0,
395            },
396            items: Vec::new(),
397            gsi: Vec::new(),
398            lsi: Vec::new(),
399            tags: BTreeMap::new(),
400            created_at: Utc::now(),
401            status: "ACTIVE".to_string(),
402            item_count: 0,
403            size_bytes: 0,
404            billing_mode: "PAY_PER_REQUEST".to_string(),
405            ttl_attribute: None,
406            ttl_enabled: false,
407            resource_policy: None,
408            pitr_enabled: false,
409            kinesis_destinations: Vec::new(),
410            contributor_insights_status: "DISABLED".to_string(),
411            contributor_insights_counters: BTreeMap::new(),
412            stream_enabled: true,
413            stream_view_type: Some("NEW_AND_OLD_IMAGES".to_string()),
414            stream_arn: Some(arn.clone()),
415            stream_records: Arc::new(RwLock::new(Vec::new())),
416            sse_type: None,
417            sse_kms_key_arn: None,
418            deletion_protection_enabled: false,
419            on_demand_throughput: None,
420            table_class: "STANDARD".to_string(),
421        };
422        let rec = StreamRecord {
423            event_id: "e1".into(),
424            event_name: "INSERT".into(),
425            event_version: "1.1".into(),
426            event_source: "aws:dynamodb".into(),
427            aws_region: "us-east-1".into(),
428            event_source_arn: arn.clone(),
429            timestamp: Utc::now(),
430            dynamodb: DynamoDbStreamRecord {
431                keys: HashMap::new(),
432                new_image: Some(HashMap::new()),
433                old_image: None,
434                sequence_number: "1".into(),
435                size_bytes: 16,
436                stream_view_type: "NEW_AND_OLD_IMAGES".into(),
437            },
438        };
439        table.stream_records.write().push(rec);
440        s.tables.insert("widgets".to_string(), table);
441        arn
442    }
443
444    #[tokio::test]
445    async fn list_streams_returns_enabled_streams() {
446        let state = make_state();
447        let arn = seed_table(&state);
448        let svc = DynamoDbStreamsService::new(state);
449        let resp = svc.handle(req("ListStreams", json!({}))).await.unwrap();
450        let body: Value = serde_json::from_slice(resp.body.expect_bytes()).unwrap();
451        let streams = body["Streams"].as_array().unwrap();
452        assert_eq!(streams.len(), 1);
453        assert_eq!(streams[0]["StreamArn"].as_str().unwrap(), arn);
454    }
455
456    #[tokio::test]
457    async fn describe_stream_returns_shard() {
458        let state = make_state();
459        let arn = seed_table(&state);
460        let svc = DynamoDbStreamsService::new(state);
461        let resp = svc
462            .handle(req("DescribeStream", json!({"StreamArn": arn})))
463            .await
464            .unwrap();
465        let body: Value = serde_json::from_slice(resp.body.expect_bytes()).unwrap();
466        let desc = &body["StreamDescription"];
467        assert_eq!(desc["StreamStatus"].as_str().unwrap(), "ENABLED");
468        assert_eq!(desc["Shards"].as_array().unwrap().len(), 1);
469    }
470
471    #[tokio::test]
472    async fn get_records_round_trip_via_shard_iterator() {
473        let state = make_state();
474        let arn = seed_table(&state);
475        let svc = DynamoDbStreamsService::new(state);
476        let it_resp = svc
477            .handle(req(
478                "GetShardIterator",
479                json!({
480                    "StreamArn": arn,
481                    "ShardId": "shardId-00000000000000000000-00000001",
482                    "ShardIteratorType": "TRIM_HORIZON",
483                }),
484            ))
485            .await
486            .unwrap();
487        let it_body: Value = serde_json::from_slice(it_resp.body.expect_bytes()).unwrap();
488        let iterator = it_body["ShardIterator"].as_str().unwrap().to_string();
489        let resp = svc
490            .handle(req("GetRecords", json!({"ShardIterator": iterator})))
491            .await
492            .unwrap();
493        let body: Value = serde_json::from_slice(resp.body.expect_bytes()).unwrap();
494        let recs = body["Records"].as_array().unwrap();
495        assert_eq!(recs.len(), 1);
496        assert_eq!(recs[0]["eventName"].as_str().unwrap(), "INSERT");
497    }
498
499    fn push_record(state: &SharedDynamoDbState, seq: &str, age_hours: i64, event_id: &str) {
500        let mut accts = state.write();
501        let s = accts.get_or_create("123456789012");
502        let table = s.tables.get_mut("widgets").unwrap();
503        let rec = StreamRecord {
504            event_id: event_id.into(),
505            event_name: "INSERT".into(),
506            event_version: "1.1".into(),
507            event_source: "aws:dynamodb".into(),
508            aws_region: "us-east-1".into(),
509            event_source_arn: table.stream_arn.clone().unwrap(),
510            timestamp: Utc::now() - chrono::Duration::hours(age_hours),
511            dynamodb: DynamoDbStreamRecord {
512                keys: HashMap::new(),
513                new_image: Some(HashMap::new()),
514                old_image: None,
515                sequence_number: seq.into(),
516                size_bytes: 16,
517                stream_view_type: "NEW_AND_OLD_IMAGES".into(),
518            },
519        };
520        table.stream_records.write().push(rec);
521    }
522
523    fn trim_front(state: &SharedDynamoDbState, n: usize) {
524        let accts = state.read();
525        let s = accts.get("123456789012").unwrap();
526        let table = s.tables.get("widgets").unwrap();
527        let mut recs = table.stream_records.write();
528        for _ in 0..n {
529            if !recs.is_empty() {
530                recs.remove(0);
531            }
532        }
533    }
534
535    // bug-audit 2026-06-15, 4.4: the iterator is anchored to a sequence number,
536    // not a Vec index. A consumer that has read up to record N must, after the
537    // front of the records Vec is physically trimmed (24h retention), continue
538    // exactly where it left off — no skipped or replayed records.
539    #[tokio::test]
540    async fn iterator_survives_front_trim_without_skip_or_replay() {
541        let state = make_state();
542        let arn = seed_table(&state); // seeds one record seq "1"
543                                      // Replace the seeded record set with a clean, ordered set seq 1..=5.
544        {
545            let accts = state.read();
546            let s = accts.get("123456789012").unwrap();
547            s.tables
548                .get("widgets")
549                .unwrap()
550                .stream_records
551                .write()
552                .clear();
553        }
554        for i in 1..=5u64 {
555            // First two records are aged so a later trim removes them.
556            let age = if i <= 2 { 30 } else { 0 };
557            push_record(&state, &format!("{i:021}"), age, &format!("e{i}"));
558        }
559        let svc = DynamoDbStreamsService::new(state.clone());
560
561        // Start at TRIM_HORIZON, read 3 records (seq 1,2,3).
562        let it_resp = svc
563            .handle(req(
564                "GetShardIterator",
565                json!({
566                    "StreamArn": arn,
567                    "ShardId": "shardId-00000000000000000000-00000001",
568                    "ShardIteratorType": "TRIM_HORIZON",
569                }),
570            ))
571            .await
572            .unwrap();
573        let it: Value = serde_json::from_slice(it_resp.body.expect_bytes()).unwrap();
574        let iterator = it["ShardIterator"].as_str().unwrap().to_string();
575
576        let r1 = svc
577            .handle(req(
578                "GetRecords",
579                json!({"ShardIterator": iterator, "Limit": 3}),
580            ))
581            .await
582            .unwrap();
583        let b1: Value = serde_json::from_slice(r1.body.expect_bytes()).unwrap();
584        let recs1 = b1["Records"].as_array().unwrap();
585        assert_eq!(recs1.len(), 3);
586        assert_eq!(recs1[0]["eventID"].as_str().unwrap(), "e1");
587        assert_eq!(recs1[2]["eventID"].as_str().unwrap(), "e3");
588        let next = b1["NextShardIterator"].as_str().unwrap().to_string();
589
590        // Now retention trims the two aged front records (seq 1,2) off the Vec.
591        // An index-based iterator would now mis-resolve and skip/replay; a
592        // sequence-anchored one continues correctly with seq 4,5.
593        trim_front(&state, 2);
594
595        let r2 = svc
596            .handle(req("GetRecords", json!({"ShardIterator": next})))
597            .await
598            .unwrap();
599        let b2: Value = serde_json::from_slice(r2.body.expect_bytes()).unwrap();
600        let recs2 = b2["Records"].as_array().unwrap();
601        assert_eq!(
602            recs2.len(),
603            2,
604            "must return exactly the un-consumed records after a front trim"
605        );
606        assert_eq!(recs2[0]["eventID"].as_str().unwrap(), "e4");
607        assert_eq!(recs2[1]["eventID"].as_str().unwrap(), "e5");
608    }
609
610    #[tokio::test]
611    async fn describe_stream_unknown_arn_404s() {
612        let state = make_state();
613        let _ = seed_table(&state);
614        let svc = DynamoDbStreamsService::new(state);
615        let err = svc
616            .handle(req(
617                "DescribeStream",
618                json!({"StreamArn": "arn:aws:dynamodb:us-east-1:123456789012:table/missing/stream/x"}),
619            ))
620            .await
621            .err()
622            .expect("expected ResourceNotFound");
623        assert!(format!("{:?}", err).contains("ResourceNotFoundException"));
624    }
625}