common_s3_headers/
aws_format.rs

1//! AWS-specific formatting.
2//!
3//! Any creation of strings goes here.
4//!
5use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, CONTROLS};
6use std::ops::Add;
7use time::{macros::format_description, OffsetDateTime};
8use url::Url;
9
10use crate::aws_math::get_sha256;
11
12const SHORT_DATE: &[time::format_description::FormatItem<'static>] = format_description!("[year][month][day]");
13
14/// Convert a `time::OffsetDateTime` to a short date string. This is used in
15/// the AWS credential scope. It is always UTC, YYYYMMDD, sortable and
16/// lexicographically comparable.
17///
18/// # Examples
19///
20/// ```
21/// use time::OffsetDateTime;
22/// use common_s3_headers::aws_format::to_short_datetime;
23///
24/// let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
25/// let result = to_short_datetime(&datetime);
26/// assert_eq!(result, "19700101");
27///
28/// let datetime = OffsetDateTime::from_unix_timestamp(1_000_000_000).unwrap();
29/// let result = to_short_datetime(&datetime);
30/// assert_eq!(result, "20010909");
31/// ```
32///
33pub fn to_short_datetime(datetime: &OffsetDateTime) -> String {
34  datetime
35    .format(SHORT_DATE)
36    .expect("All dates can be represented as short.")
37}
38
39const LONG_DATETIME: &[time::format_description::FormatItem<'static>] =
40  time::macros::format_description!("[year][month][day]T[hour][minute][second]Z");
41
42/// Convert a `time::OffsetDateTime` to a long date string. This is used in
43/// the AWS credential scope. It is always UTC, YYYYMMDD'T'HHMMSS'Z', sortable
44/// and lexicographically comparable.
45///
46/// # Examples
47///
48/// ```
49/// use time::OffsetDateTime;
50/// use common_s3_headers::aws_format::to_long_datetime;
51///
52/// let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
53/// let result = to_long_datetime(&datetime);
54/// assert_eq!(result, "19700101T000000Z");
55///
56/// let datetime = OffsetDateTime::from_unix_timestamp(1_000_000_000).unwrap();
57/// let result = to_long_datetime(&datetime);
58/// assert_eq!(result, "20010909T014640Z");
59/// ```
60///
61pub fn to_long_datetime(datetime: &OffsetDateTime) -> String {
62  datetime
63    .format(LONG_DATETIME)
64    .expect("All dates can be represented as long.")
65}
66
67/// The set of characters that are allowed in an AWS fragment.
68///
69/// See https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
70/// See https://perishablepress.com/stop-using-unsafe-characters-in-urls/
71const FRAGMENT: &AsciiSet = &CONTROLS
72  // URL_RESERVED
73  .add(b':')
74  .add(b'?')
75  .add(b'#')
76  .add(b'[')
77  .add(b']')
78  .add(b'@')
79  .add(b'!')
80  .add(b'$')
81  .add(b'&')
82  .add(b'\'')
83  .add(b'(')
84  .add(b')')
85  .add(b'*')
86  .add(b'+')
87  .add(b',')
88  .add(b';')
89  .add(b'=')
90  // URL_UNSAFE
91  .add(b'"')
92  .add(b' ')
93  .add(b'<')
94  .add(b'>')
95  .add(b'%')
96  .add(b'{')
97  .add(b'}')
98  .add(b'|')
99  .add(b'\\')
100  .add(b'^')
101  .add(b'`');
102
103const FRAGMENT_SLASH: &AsciiSet = &FRAGMENT.add(b'/');
104
105/// Encode a URI following the specific requirements of the AWS service.
106pub fn uri_encode(string: &str, encode_slash: bool) -> String {
107  if encode_slash {
108    utf8_percent_encode(string, FRAGMENT_SLASH).to_string()
109  } else {
110    utf8_percent_encode(string, FRAGMENT).to_string()
111  }
112}
113
114/// Generate an AWS scope string. This is used in the AWS authorization header. It is
115/// always YYYYMMDD'T'HHMMSS'Z'/region/service/aws4_request.
116///
117/// # Examples
118///
119/// ```
120/// use time::OffsetDateTime;
121/// use common_s3_headers::aws_format::credential_scope_string;
122///
123/// let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
124/// let result = credential_scope_string(&datetime, "us-east-1", "s3");
125/// assert_eq!(result, "19700101/us-east-1/s3/aws4_request");
126/// ```
127///
128pub fn credential_scope_string(datetime: &OffsetDateTime, region: &str, service: &str) -> String {
129  format!("{}/{}/{}/aws4_request", to_short_datetime(datetime), region, service)
130}
131
132/// Generate the AWS authorization header.
133///
134/// # Examples
135///
136/// ```
137/// use time::OffsetDateTime;
138/// use common_s3_headers::aws_format::authorization_header_string;
139///
140/// let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
141/// let result = authorization_header_string("access_key", &datetime, "us-east-1", "s3", "signed_headers", "signature");
142/// assert_eq!(
143///  result,
144///  "AWS4-HMAC-SHA256 Credential=access_key/19700101/us-east-1/s3/aws4_request,SignedHeaders=signed_headers,Signature=signature"
145/// );
146/// ```
147///
148pub fn authorization_header_string(
149  access_key: &str,
150  datetime: &OffsetDateTime,
151  region: &str,
152  service: &str,
153  signed_headers: &str,
154  signature: &str,
155) -> String {
156  format!(
157    "AWS4-HMAC-SHA256 Credential={access_key}/{scope},\
158          SignedHeaders={signed_headers},Signature={signature}",
159    access_key = access_key,
160    scope = credential_scope_string(datetime, region, service),
161    signed_headers = signed_headers,
162    signature = signature
163  )
164}
165
166/// Generate the AWS string to sign. This is used in the AWS authorization header.
167///
168/// # Examples
169///
170/// ```
171/// use time::OffsetDateTime;
172/// use common_s3_headers::aws_format::string_to_sign;
173///
174/// let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
175/// let result = string_to_sign(&datetime, "us-east-1", "s3", "canonical_request");
176/// assert_eq!(
177///   result,
178///   "AWS4-HMAC-SHA256\n19700101T000000Z\n19700101/us-east-1/s3/aws4_request\n572b1e335109068b81e4def81524c5fe5d0e385143b5656cbf2f7c88e5c1a51e"
179/// );
180/// ```
181///
182/// # See
183///
184/// * https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html#ConstructingTheAuthenticationHeader
185/// * https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
186/// * https://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
187///
188pub fn string_to_sign(datetime: &OffsetDateTime, region: &str, service: &str, canonical_request: &str) -> String {
189  let hashed_canonical_request = get_sha256(canonical_request.as_bytes());
190
191  format!(
192    "AWS4-HMAC-SHA256\n{}\n{}\n{}",
193    to_long_datetime(datetime),
194    credential_scope_string(datetime, region, service),
195    hashed_canonical_request
196  )
197}
198
199/// Generate a canonical URI string from the given URL. This is used in the AWS
200/// canonical request. It is always the path of the URL with percent encoding
201/// applied.
202///
203/// # Examples
204///
205/// ```
206/// use url::Url;
207/// use common_s3_headers::aws_format::canonical_uri_string;
208///
209/// let url = Url::parse("http://localhost/some-url/?okay").unwrap();
210/// let result = canonical_uri_string(&url);
211/// assert_eq!(result, "/some-url/");
212/// ```
213pub fn canonical_uri_string(uri: &Url) -> String {
214  // decode `Url`'s percent-encoding and then reencode it
215  // according to AWS's rules
216  let decoded = percent_decode_str(uri.path()).decode_utf8_lossy();
217  uri_encode(&decoded, false)
218}
219
220/// Generate a canonical query string from the query pairs in the given URL.
221pub fn canonical_query_string(uri: &Url) -> String {
222  let mut keyvalues: Vec<(String, String)> = uri
223    .query_pairs()
224    .map(|(key, value)| (key.to_string(), value.to_string()))
225    .collect();
226  // Note that the sorting happens BEFORE encoding.
227  keyvalues.sort();
228
229  let keyvalues: Vec<String> = keyvalues
230    .iter()
231    .map(|(k, v)| {
232      format!(
233        "{}={}",
234        utf8_percent_encode(k, FRAGMENT_SLASH),
235        utf8_percent_encode(v, FRAGMENT_SLASH)
236      )
237    })
238    .collect();
239  keyvalues.join("&")
240}
241
242/// Convert a list of key-value pairs into a list of key-value strings with the given separator.
243/// Allocates.
244fn to_key_value_strings<S: AsRef<str>, T: AsRef<str>>(headers: &[(S, T)], sep: &str) -> Vec<String> {
245  headers
246    .iter()
247    .map(|(k, v)| [k.as_ref(), v.as_ref()].join(sep))
248    .collect::<Vec<String>>()
249}
250
251/// Get the keys from a list of key-value pairs. Allocates.
252///
253/// # Examples
254///
255/// ```
256/// use common_s3_headers::aws_format::get_keys;
257///
258/// let headers = vec![
259///  ("x-amz-date", "20130524T000000Z"),
260///  ("Range", "bytes=0-9"),
261///  ("Host", "examplebucket.s3.amazonaws.com"),
262/// ];
263/// let result = get_keys(&headers);
264/// assert_eq!(result, vec!["x-amz-date", "Range", "Host"]);
265/// ```
266pub fn get_keys<S: AsRef<str>, T>(headers: &[(S, T)]) -> Vec<&str> {
267  headers.iter().map(|(key, _)| key.as_ref()).collect::<Vec<&str>>()
268}
269
270/// Generate a canonical request. Assumes headers are already sorted and aws canonical.
271///
272/// NOTE: payload_hash might be "UNSIGNED-PAYLOAD" or sha256() of content, which can be different per request type.
273///
274/// canonical_request = method + '\n' +
275///   canonical_uri + '\n' +
276///   canonical_querystring + '\n' +
277///   canonical_headers + '\n' +
278///   signed_headers + '\n' +
279///   payload_hash
280///
281/// # Examples
282///
283/// ```
284/// use url::Url;
285/// use common_s3_headers::aws_canonical::to_canonical_headers;
286/// use common_s3_headers::aws_format::canonical_request_string;
287///
288/// let url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap();
289/// let headers = vec![
290///  ("x-amz-date", "20130524T000000Z"),
291///  ("Range", "bytes=0-9"),
292///  ("Host", "examplebucket.s3.amazonaws.com"),
293///  ("x-amz-content-sha256", "UNSIGNED-PAYLOAD"),
294/// ];
295/// let canonical_headers = to_canonical_headers(&headers);
296/// let result = canonical_request_string("GET", &url, &canonical_headers, "UNSIGNED-PAYLOAD");
297/// assert_eq!(
298///  result,
299///  "GET\n\
300///  /test.txt\n\
301///  \n\
302///  host:examplebucket.s3.amazonaws.com\n\
303///  range:bytes=0-9\n\
304///  x-amz-content-sha256:UNSIGNED-PAYLOAD\n\
305///  x-amz-date:20130524T000000Z\n\
306///  \n\
307///  host;range;x-amz-content-sha256;x-amz-date\n\
308///  UNSIGNED-PAYLOAD"
309/// );
310/// ```
311///
312/// # See
313///
314/// * https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html#ConstructingTheAuthenticationHeader
315/// * https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
316/// * https://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
317/// * https://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
318///
319pub fn canonical_request_string<S: AsRef<str>>(
320  method: &str,
321  url: &Url,
322  canonical_headers: &[(S, &str)],
323  payload_hash: &str,
324) -> String {
325  format!(
326    "{}\n{}\n{}\n{}\n{}\n{}",
327    method,
328    canonical_uri_string(url),
329    canonical_query_string(url),
330    to_key_value_strings(canonical_headers, ":").join("\n").add("\n"),
331    get_keys(canonical_headers).join(";"),
332    payload_hash
333  )
334}
335
336/// Get the security token string. It is always &X-Amz-Security-Token=token with percent encoding.
337///
338/// # Examples
339///
340/// ```
341/// use common_s3_headers::aws_format::security_token_string;
342///
343/// let result = security_token_string("token");
344/// assert_eq!(result, "&X-Amz-Security-Token=token");
345///
346/// let result = security_token_string("token with spaces");
347/// assert_eq!(result, "&X-Amz-Security-Token=token%20with%20spaces");
348///
349/// let result = security_token_string("token/with/slashes");
350/// assert_eq!(result, "&X-Amz-Security-Token=token%2Fwith%2Fslashes")
351/// ```
352pub fn security_token_string(token: &str) -> String {
353  format!("&X-Amz-Security-Token={}", utf8_percent_encode(token, FRAGMENT_SLASH))
354}
355
356/// Get the query params string. It is always ?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=credential&X-Amz-Date=long_date&X-Amz-Expires=expires&X-Amz-SignedHeaders=signed_headers.
357/// The credential is always access_key/credential_scope_string(datetime, region, service) with percent encoding.
358/// The datetime is always to_long_datetime(datetime).
359///
360/// # Examples
361///
362/// ```
363/// use time::OffsetDateTime;
364/// use common_s3_headers::aws_format::query_params_string;
365///
366/// let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
367/// let result = query_params_string(&["host", "x-amz-content-sha256", "x-amz-date"], "access_key", &datetime, "region", "service", 123);
368/// assert_eq!(
369///  result,
370///  "?X-Amz-Algorithm=AWS4-HMAC-SHA256\
371///  &X-Amz-Credential=access_key%2F19700101%2Fregion%2Fservice%2Faws4_request\
372///  &X-Amz-Date=19700101T000000Z\
373///  &X-Amz-Expires=123\
374///  &X-Amz-SignedHeaders=host%3Bx-amz-content-sha256%3Bx-amz-date"
375/// );
376/// ```
377pub fn query_params_string(
378  signed_headers: &[&str],
379  access_key: &str,
380  datetime: &OffsetDateTime,
381  region: &str,
382  service: &str,
383  expires: u32,
384) -> String {
385  let signed_headers = signed_headers.join(";");
386  let signed_headers = utf8_percent_encode(&signed_headers, FRAGMENT_SLASH);
387
388  let credentials = format!("{}/{}", access_key, credential_scope_string(datetime, region, service));
389  let credentials = utf8_percent_encode(&credentials, FRAGMENT_SLASH);
390
391  format!(
392    "?X-Amz-Algorithm=AWS4-HMAC-SHA256\
393          &X-Amz-Credential={credentials}\
394          &X-Amz-Date={long_date}\
395          &X-Amz-Expires={expires}\
396          &X-Amz-SignedHeaders={signed_headers}",
397    credentials = credentials,
398    long_date = to_long_datetime(datetime),
399    expires = expires,
400    signed_headers = signed_headers,
401  )
402}
403
404#[cfg(test)]
405mod tests {
406  use super::*;
407  use crate::aws_canonical::to_canonical_headers;
408  use common_testing::assert;
409  use std::str::FromStr;
410
411  #[test]
412  fn to_short_datetime_works() {
413    let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
414    let result = to_short_datetime(&datetime);
415    assert_eq!(result, "19700101");
416
417    let datetime = OffsetDateTime::from_unix_timestamp(1_000_000_000).unwrap();
418    let result = to_short_datetime(&datetime);
419    assert_eq!(result, "20010909");
420  }
421
422  #[test]
423  fn to_long_datetime_works() {
424    let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
425    let result = to_long_datetime(&datetime);
426    assert_eq!(result, "19700101T000000Z");
427
428    let datetime = OffsetDateTime::from_unix_timestamp(1_000_000_000).unwrap();
429    let result = to_long_datetime(&datetime);
430    assert_eq!(result, "20010909T014640Z");
431  }
432
433  #[test]
434  fn uri_encode_works() {
435    let result = uri_encode("foo", false);
436    assert_eq!(result, "foo");
437
438    let result = uri_encode("foo", true);
439    assert_eq!(result, "foo");
440
441    let result = uri_encode("foo bar", false);
442    assert_eq!(result, "foo%20bar");
443
444    let result = uri_encode("foo bar", true);
445    assert_eq!(result, "foo%20bar");
446
447    let result = uri_encode("foo/bar", false);
448    assert_eq!(result, "foo/bar");
449
450    let result = uri_encode("foo/bar", true);
451    assert_eq!(result, "foo%2Fbar");
452
453    let result = uri_encode("foo/bar/baz", false);
454    assert_eq!(result, "foo/bar/baz");
455
456    let result = uri_encode("foo/bar/baz", true);
457    assert_eq!(result, "foo%2Fbar%2Fbaz");
458
459    let result = uri_encode("foo/bar/baz/", false);
460    assert_eq!(result, "foo/bar/baz/");
461
462    let result = uri_encode("foo/bar/baz/", true);
463    assert_eq!(result, "foo%2Fbar%2Fbaz%2F");
464  }
465
466  #[test]
467  fn canonical_uri_string_when_empty() {
468    let url = Url::from_str("http://localhost").unwrap();
469    let result = canonical_uri_string(&url);
470    assert::equal(result, "/");
471  }
472
473  #[test]
474  fn canonical_uri_string_slash_percent_multiple() {
475    let url = Url::parse("http://s3.amazonaws.com/bucket/Folder (xx)%=/Filename (xx)%=").unwrap();
476    let canonical = canonical_uri_string(&url);
477    assert_eq!("/bucket/Folder%20%28xx%29%25%3D/Filename%20%28xx%29%25%3D", canonical);
478  }
479
480  #[test]
481  fn canonical_uri_string_when_plain_text() {
482    let url = Url::from_str("http://localhost/some-url/?okay").unwrap();
483    let result = canonical_uri_string(&url);
484    assert::equal(result, "/some-url/");
485  }
486
487  #[test]
488  fn canonical_uri_string_encode() {
489    // Make sure parsing doesn't remove extra slashes, as normalization
490    // will mess up the path lookup.
491    let url = Url::parse("http://s3.amazonaws.com/examplebucket///foo//bar//baz").unwrap();
492    let canonical = canonical_uri_string(&url);
493    assert_eq!("/examplebucket///foo//bar//baz", canonical);
494  }
495
496  #[test]
497  fn credential_scope_string_works() {
498    let datetime = OffsetDateTime::from_unix_timestamp(0).unwrap();
499    let result = credential_scope_string(&datetime, "us-east-1", "s3");
500    assert_eq!(result, "19700101/us-east-1/s3/aws4_request");
501  }
502
503  #[test]
504  fn canonical_request_string_works() {
505    let url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap();
506    let headers = vec![
507      ("x-amz-date", "20130524T000000Z"),
508      ("Range", "bytes=0-9"),
509      ("Host", "examplebucket.s3.amazonaws.com"),
510      ("x-amz-content-sha256", "UNSIGNED-PAYLOAD"),
511    ];
512    let canonical_headers = to_canonical_headers(&headers);
513    let result = canonical_request_string("GET", &url, &canonical_headers, "UNSIGNED-PAYLOAD");
514    assert_eq!(
515      result,
516      "GET\n\
517      /test.txt\n\
518      \n\
519      host:examplebucket.s3.amazonaws.com\n\
520      range:bytes=0-9\n\
521      x-amz-content-sha256:UNSIGNED-PAYLOAD\n\
522      x-amz-date:20130524T000000Z\n\
523      \n\
524      host;range;x-amz-content-sha256;x-amz-date\n\
525      UNSIGNED-PAYLOAD"
526    );
527  }
528
529  #[test]
530  fn test_query_string_encode() {
531    let url =
532      Url::parse("http://s3.amazonaws.com/examplebucket?prefix=somePrefix&marker=someMarker&max-keys=20").unwrap();
533    let canonical = canonical_query_string(&url);
534    assert_eq!("marker=someMarker&max-keys=20&prefix=somePrefix", canonical);
535
536    let url = Url::parse("http://s3.amazonaws.com/examplebucket?acl").unwrap();
537    let canonical = canonical_query_string(&url);
538    assert_eq!("acl=", canonical);
539
540    let url = Url::parse("http://s3.amazonaws.com/examplebucket?key=with%20space&also+space=with+plus").unwrap();
541    let canonical = canonical_query_string(&url);
542    assert_eq!("also%20space=with%20plus&key=with%20space", canonical);
543
544    let url = Url::parse("http://s3.amazonaws.com/examplebucket?key-with-postfix=something&key=").unwrap();
545    let canonical = canonical_query_string(&url);
546    assert_eq!("key=&key-with-postfix=something", canonical);
547
548    let url = Url::parse("http://s3.amazonaws.com/examplebucket?key=c&key=a&key=b").unwrap();
549    let canonical = canonical_query_string(&url);
550    assert_eq!("key=a&key=b&key=c", canonical);
551  }
552
553  #[test]
554  fn test_uri_encode() {
555    assert_eq!(uri_encode(r#"~!@#$%^&*()-_=+[]\{}|;:'",.<>? привет 你好"#, true), "~%21%40%23%24%25%5E%26%2A%28%29-_%3D%2B%5B%5D%5C%7B%7D%7C%3B%3A%27%22%2C.%3C%3E%3F%20%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82%20%E4%BD%A0%E5%A5%BD");
556  }
557}