Skip to main content

rustack_auth/
canonical.rs

1//! Canonical request construction for AWS Signature Version 4.
2//!
3//! This module implements the canonical request format as specified by AWS:
4//!
5//! ```text
6//! HTTPRequestMethod\n
7//! CanonicalURI\n
8//! CanonicalQueryString\n
9//! CanonicalHeaders\n\n
10//! SignedHeaders\n
11//! HashedPayload
12//! ```
13//!
14//! Each component is normalized according to the AWS specification to ensure
15//! deterministic signature computation.
16
17use std::collections::BTreeMap;
18
19use percent_encoding::{AsciiSet, NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
20
21/// The set of characters that must be percent-encoded in URI path segments.
22///
23/// Per AWS SigV4 spec, all characters except unreserved characters
24/// (A-Z, a-z, 0-9, `-`, `_`, `.`, `~`) must be encoded.
25/// Forward slashes in the path are preserved (not encoded).
26const URI_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
27    .remove(b'-')
28    .remove(b'_')
29    .remove(b'.')
30    .remove(b'~');
31
32/// Build the full canonical request string from its components.
33///
34/// The result is a newline-separated string of:
35/// 1. HTTP method
36/// 2. Canonical URI
37/// 3. Canonical query string
38/// 4. Canonical headers (terminated by an extra newline)
39/// 5. Signed headers
40/// 6. Hashed payload
41///
42/// # Examples
43///
44/// ```
45/// use rustack_auth::canonical::build_canonical_request;
46///
47/// let canonical = build_canonical_request(
48///     "GET",
49///     "/test.txt",
50///     "",
51///     &[("host", "examplebucket.s3.amazonaws.com")],
52///     &["host"],
53///     "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
54/// );
55/// assert!(canonical.starts_with("GET\n/test.txt\n"));
56/// ```
57#[must_use]
58pub fn build_canonical_request(
59    method: &str,
60    uri: &str,
61    query_string: &str,
62    headers: &[(&str, &str)],
63    signed_headers: &[&str],
64    payload_hash: &str,
65) -> String {
66    let canonical_uri = build_canonical_uri(uri);
67    let canonical_query = build_canonical_query_string(query_string);
68    let canonical_headers = build_canonical_headers(headers, signed_headers);
69    let signed_headers_str = build_signed_headers_string(signed_headers);
70
71    #[rustfmt::skip]
72    let result = format!(
73        "{method}\n{canonical_uri}\n{canonical_query}\n{canonical_headers}\n\n{signed_headers_str}\n{payload_hash}"
74    );
75    result
76}
77
78/// Build the canonical URI by URI-encoding each path segment individually.
79///
80/// Forward slashes (`/`) are preserved. Empty paths are normalized to `/`.
81/// Each segment is percent-encoded according to RFC 3986 unreserved characters.
82///
83/// # Examples
84///
85/// ```
86/// use rustack_auth::canonical::build_canonical_uri;
87///
88/// assert_eq!(build_canonical_uri("/test.txt"), "/test.txt");
89/// assert_eq!(build_canonical_uri("/"), "/");
90/// assert_eq!(build_canonical_uri(""), "/");
91/// ```
92#[must_use]
93pub fn build_canonical_uri(path: &str) -> String {
94    if path.is_empty() || path == "/" {
95        return "/".to_owned();
96    }
97
98    let segments: Vec<&str> = path.split('/').collect();
99    let encoded_segments: Vec<String> = segments
100        .iter()
101        .map(|segment| {
102            // Decode first to normalize, then re-encode to produce consistent canonical form.
103            // This prevents double-encoding when the path is already percent-encoded.
104            let decoded = percent_decode_str(segment).decode_utf8_lossy();
105            uri_encode(&decoded)
106        })
107        .collect();
108
109    encoded_segments.join("/")
110}
111
112/// Build the canonical query string by sorting parameters.
113///
114/// Parameters are sorted by key name first, then by value for duplicate keys.
115/// The raw query string values are preserved as-is (no decode/re-encode) because
116/// different clients use different encoding rules when signing. For example,
117/// AWS SDKs percent-encode `:` and `*` but minio-java (via OkHttp) leaves them
118/// raw. The server must use the exact same encoding the client used for signing,
119/// which is whatever appears in the HTTP request.
120///
121/// # Examples
122///
123/// ```
124/// use rustack_auth::canonical::build_canonical_query_string;
125///
126/// assert_eq!(build_canonical_query_string(""), "");
127/// assert_eq!(
128///     build_canonical_query_string("b=2&a=1"),
129///     "a=1&b=2"
130/// );
131/// ```
132#[must_use]
133pub fn build_canonical_query_string(query: &str) -> String {
134    if query.is_empty() {
135        return String::new();
136    }
137
138    let mut params: Vec<(&str, &str)> = query
139        .split('&')
140        .filter(|s| !s.is_empty())
141        .map(|param| param.split_once('=').unwrap_or((param, "")))
142        .collect();
143
144    params.sort_unstable();
145
146    params
147        .iter()
148        .map(|(k, v)| format!("{k}={v}"))
149        .collect::<Vec<_>>()
150        .join("&")
151}
152
153/// Build the canonical headers string from the request headers.
154///
155/// Only headers listed in `signed_headers` are included. Header names are lowercased,
156/// values are trimmed of leading/trailing whitespace and consecutive spaces are collapsed
157/// to a single space. Headers are sorted by name.
158///
159/// The result does NOT include a trailing newline; the caller adds that as part of
160/// the canonical request format (the double newline between headers and signed headers).
161///
162/// # Examples
163///
164/// ```
165/// use rustack_auth::canonical::build_canonical_headers;
166///
167/// let headers = vec![
168///     ("Host", "example.com"),
169///     ("X-Amz-Date", "20130524T000000Z"),
170/// ];
171/// let signed = vec!["host", "x-amz-date"];
172/// let result = build_canonical_headers(
173///     &headers.iter().map(|(k, v)| (*k, *v)).collect::<Vec<_>>(),
174///     &signed.iter().map(|s| *s).collect::<Vec<_>>(),
175/// );
176/// assert!(result.contains("host:example.com"));
177/// ```
178#[must_use]
179pub fn build_canonical_headers(headers: &[(&str, &str)], signed_headers: &[&str]) -> String {
180    // Collect headers into a sorted map, keyed by lowercase name.
181    // If multiple headers share the same name, their values are concatenated with commas.
182    let mut header_map: BTreeMap<String, String> = BTreeMap::new();
183    for (name, value) in headers {
184        let lower_name = name.to_lowercase();
185        let trimmed_value = collapse_whitespace(value.trim());
186        header_map
187            .entry(lower_name)
188            .and_modify(|existing| {
189                existing.push(',');
190                existing.push_str(&trimmed_value);
191            })
192            .or_insert(trimmed_value);
193    }
194
195    // Build the canonical headers string using only the signed headers, in sorted order.
196    let mut sorted_signed: Vec<&str> = signed_headers.to_vec();
197    sorted_signed.sort_unstable();
198
199    sorted_signed
200        .iter()
201        .filter_map(|name| header_map.get(*name).map(|value| format!("{name}:{value}")))
202        .collect::<Vec<_>>()
203        .join("\n")
204}
205
206/// Build the signed headers string as a semicolon-separated list of lowercase header names.
207///
208/// The header names are sorted lexicographically.
209///
210/// # Examples
211///
212/// ```
213/// use rustack_auth::canonical::build_signed_headers_string;
214///
215/// assert_eq!(
216///     build_signed_headers_string(&["x-amz-date", "host"]),
217///     "host;x-amz-date"
218/// );
219/// ```
220#[must_use]
221pub fn build_signed_headers_string(signed_headers: &[&str]) -> String {
222    let mut sorted: Vec<&str> = signed_headers.to_vec();
223    sorted.sort_unstable();
224    sorted.join(";")
225}
226
227/// URI-encode a single path segment using the AWS SigV4 encoding rules.
228fn uri_encode(input: &str) -> String {
229    utf8_percent_encode(input, URI_ENCODE_SET).to_string()
230}
231
232/// Collapse consecutive whitespace characters in a string to a single space.
233fn collapse_whitespace(s: &str) -> String {
234    let mut result = String::with_capacity(s.len());
235    let mut prev_was_space = false;
236    for ch in s.chars() {
237        if ch.is_whitespace() {
238            if !prev_was_space {
239                result.push(' ');
240                prev_was_space = true;
241            }
242        } else {
243            result.push(ch);
244            prev_was_space = false;
245        }
246    }
247    result
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_should_build_canonical_uri_for_simple_path() {
256        assert_eq!(build_canonical_uri("/test.txt"), "/test.txt");
257    }
258
259    #[test]
260    fn test_should_normalize_empty_path_to_slash() {
261        assert_eq!(build_canonical_uri(""), "/");
262        assert_eq!(build_canonical_uri("/"), "/");
263    }
264
265    #[test]
266    fn test_should_encode_special_characters_in_path() {
267        assert_eq!(build_canonical_uri("/hello world"), "/hello%20world");
268    }
269
270    #[test]
271    fn test_should_sort_query_parameters() {
272        assert_eq!(build_canonical_query_string("b=2&a=1&c=3"), "a=1&b=2&c=3");
273    }
274
275    #[test]
276    fn test_should_return_empty_for_empty_query() {
277        assert_eq!(build_canonical_query_string(""), "");
278    }
279
280    #[test]
281    fn test_should_preserve_raw_query_parameter_values() {
282        // Raw values are preserved as-is — no re-encoding is applied.
283        assert_eq!(
284            build_canonical_query_string("key=hello%20world"),
285            "key=hello%20world"
286        );
287    }
288
289    #[test]
290    fn test_should_build_canonical_headers_sorted_and_lowercased() {
291        let headers = [
292            ("Host", "examplebucket.s3.amazonaws.com"),
293            ("Range", "bytes=0-9"),
294            (
295                "x-amz-content-sha256",
296                "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
297            ),
298            ("x-amz-date", "20130524T000000Z"),
299        ];
300        let signed = ["host", "range", "x-amz-content-sha256", "x-amz-date"];
301        let result = build_canonical_headers(
302            &headers.iter().map(|(k, v)| (*k, *v)).collect::<Vec<_>>(),
303            &signed,
304        );
305        #[rustfmt::skip]
306        let expected = "host:examplebucket.s3.amazonaws.com\n\
307                        range:bytes=0-9\n\
308                        x-amz-content-sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n\
309                        x-amz-date:20130524T000000Z";
310        assert_eq!(result, expected);
311    }
312
313    #[test]
314    fn test_should_build_signed_headers_string_sorted() {
315        assert_eq!(
316            build_signed_headers_string(&["x-amz-date", "host", "range"]),
317            "host;range;x-amz-date"
318        );
319    }
320
321    #[test]
322    fn test_should_collapse_whitespace_in_header_values() {
323        let headers = [("Host", "  example.com  "), ("X-Custom", "a   b   c")];
324        let signed = ["host", "x-custom"];
325        let result = build_canonical_headers(
326            &headers.iter().map(|(k, v)| (*k, *v)).collect::<Vec<_>>(),
327            &signed,
328        );
329        assert_eq!(result, "host:example.com\nx-custom:a b c");
330    }
331
332    #[test]
333    fn test_should_build_canonical_request_matching_aws_example() {
334        use sha2::{Digest, Sha256};
335
336        // AWS test vector: GET /test.txt from examplebucket
337        let headers = vec![
338            ("host", "examplebucket.s3.amazonaws.com"),
339            ("range", "bytes=0-9"),
340            (
341                "x-amz-content-sha256",
342                "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
343            ),
344            ("x-amz-date", "20130524T000000Z"),
345        ];
346        let signed_headers = vec!["host", "range", "x-amz-content-sha256", "x-amz-date"];
347
348        let canonical = build_canonical_request(
349            "GET",
350            "/test.txt",
351            "",
352            &headers,
353            &signed_headers,
354            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
355        );
356
357        #[rustfmt::skip]
358        let expected = "GET\n\
359                        /test.txt\n\
360                        \n\
361                        host:examplebucket.s3.amazonaws.com\n\
362                        range:bytes=0-9\n\
363                        x-amz-content-sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n\
364                        x-amz-date:20130524T000000Z\n\
365                        \n\
366                        host;range;x-amz-content-sha256;x-amz-date\n\
367                        e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
368        assert_eq!(canonical, expected);
369
370        // Verify the hash of the canonical request matches the AWS test vector
371        let hash = hex::encode(Sha256::digest(canonical.as_bytes()));
372        assert_eq!(
373            hash,
374            "7344ae5b7ee6c3e7e6b0fe0640412a37625d1fbfff95c48bbb2dc43964946972"
375        );
376    }
377
378    #[test]
379    fn test_should_handle_presigned_url_query_string() {
380        let query = "X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%\
381                     2F20130524%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20130524T000000Z&\
382                     X-Amz-Expires=86400&X-Amz-SignedHeaders=host";
383        let result = build_canonical_query_string(query);
384        // Should be sorted, raw values preserved
385        assert!(result.contains("X-Amz-Algorithm=AWS4-HMAC-SHA256"));
386        assert!(result.contains("X-Amz-Expires=86400"));
387        // %2F should be preserved, not double-encoded to %252F
388        assert!(result.contains("AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request"));
389    }
390
391    #[test]
392    fn test_should_preserve_percent_encoded_query_parameters() {
393        // Percent-encoded values are preserved as-is.
394        let query = "events=s3%3AObjectCreated%3A%2A&prefix=test";
395        let result = build_canonical_query_string(query);
396        assert_eq!(result, "events=s3%3AObjectCreated%3A%2A&prefix=test");
397    }
398
399    #[test]
400    fn test_should_preserve_raw_special_characters_in_query() {
401        // Raw (unencoded) special characters are preserved as-is.
402        // This matches minio-java behavior which uses OkHttp's encoding
403        // that leaves `:` and `*` unencoded in query strings.
404        let raw = "events=s3:ObjectCreated:*&prefix=test";
405        let result = build_canonical_query_string(raw);
406        assert_eq!(result, "events=s3:ObjectCreated:*&prefix=test");
407    }
408
409    #[test]
410    fn test_should_sort_duplicate_query_keys() {
411        // Duplicate keys should be sorted by value.
412        let query = "events=s3:ObjectCreated:*&events=s3:ObjectAccessed:*&prefix=p";
413        let result = build_canonical_query_string(query);
414        assert_eq!(
415            result,
416            "events=s3:ObjectAccessed:*&events=s3:ObjectCreated:*&prefix=p"
417        );
418    }
419
420    #[test]
421    fn test_should_not_double_encode_uri_path() {
422        // Path with already percent-encoded space
423        assert_eq!(build_canonical_uri("/hello%20world"), "/hello%20world");
424        // Raw path should produce the same result
425        assert_eq!(
426            build_canonical_uri("/hello world"),
427            build_canonical_uri("/hello%20world")
428        );
429    }
430}