vantage-aws 0.5.3

AWS API persistence backend for Vantage framework — incubating
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
//! REST-XML HTTP transport (S3 etc.).
//!
//! Two halves:
//!   - [`build_request`] turns the operation's `"METHOD path?query"`
//!     target plus the resolved conditions into the wire-ready
//!     `(method, path, query_pairs)`. Path placeholders (`{Bucket}`)
//!     pull their value from conditions; remaining conditions become
//!     query-string params.
//!   - [`restxml_call`] signs and sends the request, returning the raw
//!     XML text.
//!
//! Path-style addressing only — `https://{service}.{region}.amazonaws.com/...`.
//! Virtual-host style (`{bucket}.s3.{region}...`) would need DNS-safe
//! bucket names and additional cross-region routing; v0 stays simple
//! and lets the caller set the region for the bucket they're targeting.
//!
//! S3 always requires `x-amz-content-sha256` to be a signed header
//! with the body's hex sha256. We add it unconditionally for every
//! REST-XML service — others accept it fine, S3 errors without it.

use std::time::SystemTime;

use vantage_core::{Result, error};

use crate::account::AwsAccount;
use crate::condition::AwsCondition;
use crate::sign::sign_v4;

/// SHA256 of the empty body — used as the `x-amz-content-sha256`
/// value for every read-only request we issue (always GET / no body).
const EMPTY_BODY_SHA256: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";

/// Parse a `"METHOD path?query"` target and splice the resolved
/// conditions in: `{Placeholder}` segments take their value from a
/// matching `eq("Placeholder", _)` condition, and any leftover
/// conditions are appended to the query string.
///
/// Returns `(method, path, query_pairs)`. `query_pairs` carry the
/// static query from the target plus any condition-derived params,
/// in target-then-condition order.
#[allow(clippy::type_complexity)]
pub(crate) fn build_request(
    target: &str,
    resolved: &[AwsCondition],
) -> Result<(String, String, Vec<(String, String)>)> {
    let (method, rest) = target.split_once(' ').ok_or_else(|| {
        error!(
            "REST target must be \"METHOD path[?query]\" — got",
            target = target
        )
    })?;
    let method = method.trim().to_ascii_uppercase();
    let (path_template, static_query) = match rest.split_once('?') {
        Some((p, q)) => (p, q),
        None => (rest, ""),
    };

    // Pull (field, value) pairs from the resolved conditions. By the
    // time we get here Deferred has been materialised, multi-value
    // sets have errored out.
    let mut params: Vec<(String, String)> = Vec::with_capacity(resolved.len());
    for cond in resolved {
        match cond {
            AwsCondition::Eq { field, value } => {
                params.push((field.clone(), cbor_scalar_to_string(value)));
            }
            AwsCondition::In { field, values } => match values.as_slice() {
                [single] => params.push((field.clone(), cbor_scalar_to_string(single))),
                [] => {
                    return Err(error!(
                        "AwsCondition::In with zero values is not representable",
                        field = field.as_str()
                    ));
                }
                _ => {
                    return Err(error!(
                        "AWS REST APIs don't accept multi-value filters; \
                         resolved condition must collapse to one value",
                        field = field.as_str(),
                        count = values.len()
                    ));
                }
            },
            AwsCondition::Deferred { field, .. } => {
                return Err(error!(
                    "Internal: Deferred condition reached REST builder unresolved",
                    field = field.as_str()
                ));
            }
        }
    }

    // Substitute {Placeholder} segments from the params, dropping each
    // one we consumed so it doesn't reappear in the query string.
    let mut path = String::with_capacity(path_template.len());
    let mut consumed: Vec<String> = Vec::new();
    let mut chars = path_template.chars().peekable();
    while let Some(c) = chars.next() {
        if c == '{' {
            let mut name = String::new();
            for nc in chars.by_ref() {
                if nc == '}' {
                    break;
                }
                name.push(nc);
            }
            let value = params
                .iter()
                .find(|(k, _)| k == &name)
                .map(|(_, v)| v.clone())
                .ok_or_else(|| {
                    error!(
                        "REST path placeholder has no matching condition",
                        placeholder = name.as_str()
                    )
                })?;
            // Path segments are URL-path-encoded — keep `/` literal so
            // multi-segment placeholders (rare, but Lambda allows them)
            // don't double-encode.
            path.push_str(&path_segment_encode(&value));
            consumed.push(name);
        } else {
            path.push(c);
        }
    }

    // Build the query string: static pairs from the target first, then
    // conditions that weren't consumed by path placeholders. Both come
    // through the same encoder so signing matches the wire bytes.
    let mut query_pairs: Vec<(String, String)> = Vec::new();
    if !static_query.is_empty() {
        for kv in static_query.split('&') {
            let (k, v) = match kv.split_once('=') {
                Some((k, v)) => (k.to_string(), v.to_string()),
                None => (kv.to_string(), String::new()),
            };
            query_pairs.push((k, v));
        }
    }
    for (k, v) in params {
        if consumed.contains(&k) {
            continue;
        }
        query_pairs.push((k, v));
    }

    Ok((method, path, query_pairs))
}

/// Issue a signed REST-XML request and return the response body as
/// raw text. Empty body — we're read-only in v0.
pub(crate) async fn restxml_call(
    account: &AwsAccount,
    service: &str,
    method: &str,
    path: &str,
    query: &[(String, String)],
) -> Result<String> {
    let region = account.region();
    if region.is_empty() {
        return Err(error!(
            "AWS region is not configured — pass it to AwsAccount::new \
             or set AWS_REGION before issuing a REST-XML request"
        ));
    }
    let host = format!("{service}.{region}.amazonaws.com");
    let url = build_url(&host, path, query);

    let body_bytes: Vec<u8> = Vec::new();

    let signing_headers = [
        ("host".to_string(), host.clone()),
        (
            "x-amz-content-sha256".to_string(),
            EMPTY_BODY_SHA256.to_string(),
        ),
    ];

    let signed = sign_v4(
        account.access_key(),
        account.secret_key(),
        account.session_token(),
        region,
        service,
        method,
        &url,
        &signing_headers,
        &body_bytes,
        SystemTime::now(),
    )?;

    let req_builder = match method {
        "GET" => account.http().get(&url),
        "HEAD" => account.http().head(&url),
        other => {
            return Err(error!(
                "REST-XML transport currently only supports read methods",
                method = other
            ));
        }
    };
    let mut req = req_builder.header("x-amz-content-sha256", EMPTY_BODY_SHA256);
    for h in &signed {
        req = req.header(h.name.as_str(), h.value.as_str());
    }

    let resp = req.send().await.map_err(|e| {
        error!(
            "AWS REST-XML request failed",
            url = url.as_str(),
            method = method,
            detail = e
        )
    })?;

    let status = resp.status();
    let response_text = resp
        .text()
        .await
        .map_err(|e| error!("Failed to read AWS REST-XML response body", detail = e))?;

    if !status.is_success() {
        if let Some(hint) = s3_permanent_redirect_hint(status.as_u16(), &response_text) {
            return Err(error!(hint));
        }
        return Err(error!(
            "AWS REST-XML request returned error status",
            url = url.as_str(),
            status = status.as_u16(),
            body = response_text
        ));
    }

    Ok(response_text)
}

/// S3 returns `301 PermanentRedirect` when a bucket-scoped request goes
/// to the wrong region. SigV4 binds the signature to the original host,
/// so transparent redirect-following isn't viable — but the response
/// body names the correct endpoint, which we surface as an actionable
/// message ("re-run with --region eu-west-2") instead of dumping raw
/// XML at the user.
fn s3_permanent_redirect_hint(status: u16, body: &str) -> Option<String> {
    if status != 301 || !body.contains("<Code>PermanentRedirect</Code>") {
        return None;
    }
    let bucket = xml_inner(body, "Bucket");
    let endpoint = xml_inner(body, "Endpoint")?;
    let region = parse_s3_endpoint_region(endpoint)?;
    let bucket_label = bucket.unwrap_or("(unknown)");
    Some(format!(
        "S3 bucket `{bucket_label}` lives in region `{region}`, \
         not the one currently configured. Re-run with `--region {region}` \
         (or set AWS_REGION={region}). Original endpoint: {endpoint}."
    ))
}

/// Pull the region out of an S3 redirect `<Endpoint>` value. S3 emits
/// the endpoint in one of four shapes (depending on bucket age and
/// region):
///   - `<bucket>.s3.<region>.amazonaws.com` — modern virtual-hosted
///   - `<bucket>.s3-<region>.amazonaws.com` — legacy dash form (still
///     used for some pre-2019 buckets in older regions)
///   - `s3.<region>.amazonaws.com` — path-style, modern
///   - `s3-<region>.amazonaws.com` — path-style, legacy
fn parse_s3_endpoint_region(endpoint: &str) -> Option<&str> {
    let stripped = endpoint.strip_suffix(".amazonaws.com")?;
    // Try the `.s3.` / `.s3-` (virtual-hosted) markers first — they
    // carry an explicit boundary before `s3`. Fall back to a
    // start-of-string `s3.` / `s3-` for path-style endpoints.
    for marker in [".s3.", ".s3-"] {
        if let Some(idx) = stripped.rfind(marker) {
            return Some(&stripped[idx + marker.len()..]);
        }
    }
    for marker in ["s3.", "s3-"] {
        if let Some(rest) = stripped.strip_prefix(marker) {
            return Some(rest);
        }
    }
    None
}

/// Extract the *first* `<tag>…</tag>` inner text from a flat XML
/// string. Good enough for AWS error bodies, which are single-level and
/// don't have nested elements named the same as the wrapper.
fn xml_inner<'a>(xml: &'a str, tag: &str) -> Option<&'a str> {
    let open = format!("<{tag}>");
    let close = format!("</{tag}>");
    let start = xml.find(&open)? + open.len();
    let end = xml[start..].find(&close)? + start;
    Some(&xml[start..end])
}

fn build_url(host: &str, path: &str, query: &[(String, String)]) -> String {
    let mut url = format!("https://{host}{path}");
    if !query.is_empty() {
        url.push('?');
        for (i, (k, v)) in query.iter().enumerate() {
            if i > 0 {
                url.push('&');
            }
            url.push_str(&query_part_encode(k));
            url.push('=');
            url.push_str(&query_part_encode(v));
        }
    }
    url
}

/// Path-segment encoder: keep `/` literal (multi-segment placeholders
/// like `{Key}` may legitimately contain it) and percent-encode the
/// rest of the reserved set per RFC 3986.
fn path_segment_encode(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    for &b in s.as_bytes() {
        let unreserved = b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_' | b'.' | b'~' | b'/');
        if unreserved {
            out.push(b as char);
        } else {
            out.push_str(&format!("%{b:02X}"));
        }
    }
    out
}

/// Query-string component encoder — matches the SigV4 alphabet.
fn query_part_encode(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    for &b in s.as_bytes() {
        let unreserved = b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_' | b'.' | b'~');
        if unreserved {
            out.push(b as char);
        } else {
            out.push_str(&format!("%{b:02X}"));
        }
    }
    out
}

/// Render a CBOR scalar for use in a path / query string. Matches the
/// shape of `condition::cbor_to_string` — kept local to avoid making
/// that module's helper public.
fn cbor_scalar_to_string(v: &ciborium::Value) -> String {
    match v {
        ciborium::Value::Text(s) => s.clone(),
        ciborium::Value::Integer(i) => {
            let n: i128 = (*i).into();
            n.to_string()
        }
        ciborium::Value::Float(f) => f.to_string(),
        ciborium::Value::Bool(b) => b.to_string(),
        ciborium::Value::Null => String::new(),
        // Compound values shouldn't reach here for REST APIs, but
        // defensively render via JSON so we don't drop data silently.
        other => other
            .deserialized::<serde_json::Value>()
            .map(|v| v.to_string())
            .unwrap_or_default(),
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::condition::AwsCondition;
    use ciborium::Value as CborValue;

    #[test]
    fn build_request_substitutes_placeholders_from_conditions() {
        let target = "GET /{Bucket}?list-type=2";
        let conds = [AwsCondition::Eq {
            field: "Bucket".into(),
            value: CborValue::from("my-bucket"),
        }];
        let (m, p, q) = build_request(target, &conds).unwrap();
        assert_eq!(m, "GET");
        assert_eq!(p, "/my-bucket");
        assert_eq!(q, vec![("list-type".to_string(), "2".to_string())]);
    }

    #[test]
    fn build_request_pushes_unmatched_conditions_to_query() {
        let target = "GET /{Bucket}?list-type=2";
        let conds = [
            AwsCondition::Eq {
                field: "Bucket".into(),
                value: CborValue::from("foo"),
            },
            AwsCondition::Eq {
                field: "prefix".into(),
                value: CborValue::from("logs/"),
            },
        ];
        let (_m, p, q) = build_request(target, &conds).unwrap();
        assert_eq!(p, "/foo");
        assert_eq!(
            q,
            vec![
                ("list-type".to_string(), "2".to_string()),
                ("prefix".to_string(), "logs/".to_string()),
            ]
        );
    }

    #[test]
    fn build_request_errors_on_missing_placeholder_value() {
        let target = "GET /{Bucket}";
        let err = build_request(target, &[]).unwrap_err();
        assert!(format!("{err}").contains("placeholder"));
    }

    #[test]
    fn build_request_no_query_section_is_fine() {
        let target = "GET /";
        let (m, p, q) = build_request(target, &[]).unwrap();
        assert_eq!(m, "GET");
        assert_eq!(p, "/");
        assert!(q.is_empty());
    }

    #[test]
    fn s3_permanent_redirect_hint_names_bucket_and_region() {
        let body = r#"<?xml version="1.0" encoding="UTF-8"?>
<Error><Code>PermanentRedirect</Code><Message>The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.</Message><Endpoint>ba-coruscant-dev-bucket1.s3.eu-west-2.amazonaws.com</Endpoint><Bucket>ba-coruscant-dev-bucket1</Bucket><RequestId>X</RequestId><HostId>Y</HostId></Error>"#;
        let hint = s3_permanent_redirect_hint(301, body).expect("hint should fire");
        assert!(hint.contains("ba-coruscant-dev-bucket1"));
        assert!(hint.contains("eu-west-2"));
        assert!(hint.contains("--region eu-west-2"));
    }

    #[test]
    fn parse_s3_endpoint_region_handles_all_four_shapes() {
        // virtual-hosted, modern
        assert_eq!(
            parse_s3_endpoint_region("my-bucket.s3.eu-west-2.amazonaws.com"),
            Some("eu-west-2"),
        );
        // virtual-hosted, legacy dash form
        assert_eq!(
            parse_s3_endpoint_region("my-bucket.s3-eu-west-1.amazonaws.com"),
            Some("eu-west-1"),
        );
        // path-style, modern
        assert_eq!(
            parse_s3_endpoint_region("s3.us-east-1.amazonaws.com"),
            Some("us-east-1"),
        );
        // path-style, legacy
        assert_eq!(
            parse_s3_endpoint_region("s3-ap-southeast-2.amazonaws.com"),
            Some("ap-southeast-2"),
        );
        // unrelated host → None
        assert_eq!(parse_s3_endpoint_region("example.com"), None);
    }

    #[test]
    fn s3_permanent_redirect_hint_ignores_unrelated_errors() {
        let body = r#"<Error><Code>AccessDenied</Code><Message>Access Denied</Message></Error>"#;
        assert!(s3_permanent_redirect_hint(403, body).is_none());
        assert!(s3_permanent_redirect_hint(301, body).is_none());
    }
}