Skip to main content

rustack_s3_core/
utils.rs

1//! Shared utilities for the S3 service.
2//!
3//! Provides ID generation, timestamp helpers, range-header parsing,
4//! conditional-request matching, continuation-token encoding, and XML
5//! escaping functions.
6
7use base64::{Engine, engine::general_purpose::STANDARD as BASE64_STANDARD};
8use chrono::Utc;
9use rand::RngExt;
10use uuid::Uuid;
11
12use crate::error::S3ServiceError;
13
14// ---------------------------------------------------------------------------
15// ID generation
16// ---------------------------------------------------------------------------
17
18/// Generate a random version ID suitable for S3 versioned objects.
19///
20/// Produces a URL-safe base64 string of approximately 32 characters.
21///
22/// # Examples
23///
24/// ```
25/// use rustack_s3_core::utils::generate_version_id;
26///
27/// let id = generate_version_id();
28/// assert!(id.len() >= 20);
29/// assert!(id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'));
30/// ```
31#[must_use]
32pub fn generate_version_id() -> String {
33    let mut rng = rand::rng();
34    let mut buf = [0u8; 24];
35    rng.fill(&mut buf);
36    base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(buf)
37}
38
39/// Generate a random upload ID for multipart uploads.
40///
41/// Produces a hex string of approximately 64 characters.
42///
43/// # Examples
44///
45/// ```
46/// use rustack_s3_core::utils::generate_upload_id;
47///
48/// let id = generate_upload_id();
49/// assert!(id.len() >= 32);
50/// assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
51/// ```
52#[must_use]
53pub fn generate_upload_id() -> String {
54    let mut rng = rand::rng();
55    let mut buf = [0u8; 32];
56    rng.fill(&mut buf);
57    hex::encode(buf)
58}
59
60/// Generate a unique request ID (UUID v4 without dashes).
61///
62/// # Examples
63///
64/// ```
65/// use rustack_s3_core::utils::generate_request_id;
66///
67/// let id = generate_request_id();
68/// assert_eq!(id.len(), 32);
69/// assert!(id.chars().all(|c| c.is_ascii_hexdigit()));
70/// ```
71#[must_use]
72pub fn generate_request_id() -> String {
73    Uuid::new_v4().simple().to_string()
74}
75
76// ---------------------------------------------------------------------------
77// Timestamps
78// ---------------------------------------------------------------------------
79
80/// Return the current UTC time as milliseconds since the Unix epoch.
81///
82/// # Examples
83///
84/// ```
85/// use rustack_s3_core::utils::timestamp_millis;
86///
87/// let ts = timestamp_millis();
88/// assert!(ts > 0);
89/// ```
90#[must_use]
91pub fn timestamp_millis() -> i64 {
92    Utc::now().timestamp_millis()
93}
94
95/// Return the current UTC time formatted as an RFC 3339 string.
96///
97/// # Examples
98///
99/// ```
100/// use rustack_s3_core::utils::timestamp_rfc3339;
101///
102/// let ts = timestamp_rfc3339();
103/// assert!(ts.contains('T'));
104/// ```
105#[must_use]
106pub fn timestamp_rfc3339() -> String {
107    Utc::now().to_rfc3339()
108}
109
110// ---------------------------------------------------------------------------
111// Range header parsing
112// ---------------------------------------------------------------------------
113
114/// Parse an HTTP `Range` header value and return the inclusive byte range.
115///
116/// Supported formats:
117/// - `bytes=0-499` -- first 500 bytes
118/// - `bytes=-500` -- last 500 bytes
119/// - `bytes=500-` -- from byte 500 to the end
120/// - `bytes=0-` -- the entire content
121///
122/// Returns an inclusive `(start, end)` tuple.
123///
124/// # Errors
125///
126/// Returns [`S3ServiceError::InvalidRange`] if the range header is malformed
127/// or specifies an unsatisfiable range.
128///
129/// # Examples
130///
131/// ```
132/// use rustack_s3_core::utils::parse_range_header;
133///
134/// let (start, end) = parse_range_header("bytes=0-499", 1000).unwrap();
135/// assert_eq!((start, end), (0, 499));
136/// ```
137pub fn parse_range_header(range: &str, content_length: u64) -> Result<(u64, u64), S3ServiceError> {
138    let range = range
139        .strip_prefix("bytes=")
140        .ok_or(S3ServiceError::InvalidRange)?;
141
142    if content_length == 0 {
143        return Err(S3ServiceError::InvalidRange);
144    }
145
146    if let Some(suffix) = range.strip_prefix('-') {
147        // bytes=-N  (last N bytes)
148        let n: u64 = suffix.parse().map_err(|_| S3ServiceError::InvalidRange)?;
149        if n == 0 || n > content_length {
150            return Err(S3ServiceError::InvalidRange);
151        }
152        let start = content_length - n;
153        Ok((start, content_length - 1))
154    } else if let Some(prefix) = range.strip_suffix('-') {
155        // bytes=N-  (from N to end)
156        let start: u64 = prefix.parse().map_err(|_| S3ServiceError::InvalidRange)?;
157        if start >= content_length {
158            return Err(S3ServiceError::InvalidRange);
159        }
160        Ok((start, content_length - 1))
161    } else {
162        // bytes=N-M
163        let parts: Vec<&str> = range.splitn(2, '-').collect();
164        if parts.len() != 2 {
165            return Err(S3ServiceError::InvalidRange);
166        }
167        let start: u64 = parts[0].parse().map_err(|_| S3ServiceError::InvalidRange)?;
168        let end: u64 = parts[1].parse().map_err(|_| S3ServiceError::InvalidRange)?;
169        if start > end || start >= content_length {
170            return Err(S3ServiceError::InvalidRange);
171        }
172        // Clamp end to content_length - 1
173        let end = end.min(content_length - 1);
174        Ok((start, end))
175    }
176}
177
178// ---------------------------------------------------------------------------
179// Conditional request helpers
180// ---------------------------------------------------------------------------
181
182/// Check whether the given ETag satisfies an `If-Match` condition.
183///
184/// The `if_match` value may be `"*"` (matches any ETag) or a quoted ETag
185/// value.
186///
187/// # Examples
188///
189/// ```
190/// use rustack_s3_core::utils::is_valid_if_match;
191///
192/// assert!(is_valid_if_match("\"abc\"", "*"));
193/// assert!(is_valid_if_match("\"abc\"", "\"abc\""));
194/// assert!(!is_valid_if_match("\"abc\"", "\"xyz\""));
195/// ```
196#[must_use]
197pub fn is_valid_if_match(etag: &str, if_match: &str) -> bool {
198    if if_match == "*" {
199        return true;
200    }
201    normalize_etag(etag) == normalize_etag(if_match)
202}
203
204/// Check whether the given ETag satisfies an `If-None-Match` condition.
205///
206/// Returns `true` if the object should be returned (i.e. the ETag does
207/// *not* match). Returns `false` if the ETags match (meaning a 304 Not
208/// Modified response is appropriate).
209///
210/// # Examples
211///
212/// ```
213/// use rustack_s3_core::utils::is_valid_if_none_match;
214///
215/// assert!(!is_valid_if_none_match("\"abc\"", "*"));
216/// assert!(!is_valid_if_none_match("\"abc\"", "\"abc\""));
217/// assert!(is_valid_if_none_match("\"abc\"", "\"xyz\""));
218/// ```
219#[must_use]
220pub fn is_valid_if_none_match(etag: &str, if_none_match: &str) -> bool {
221    if if_none_match == "*" {
222        return false;
223    }
224    normalize_etag(etag) != normalize_etag(if_none_match)
225}
226
227/// Normalize an ETag by stripping surrounding double quotes.
228fn normalize_etag(etag: &str) -> &str {
229    etag.strip_prefix('"')
230        .and_then(|s| s.strip_suffix('"'))
231        .unwrap_or(etag)
232}
233
234// ---------------------------------------------------------------------------
235// Continuation tokens
236// ---------------------------------------------------------------------------
237
238/// Encode an object key as a base64 continuation token.
239///
240/// # Examples
241///
242/// ```
243/// use rustack_s3_core::utils::{encode_continuation_token, decode_continuation_token};
244///
245/// let token = encode_continuation_token("photos/2024/img.jpg");
246/// let key = decode_continuation_token(&token).unwrap();
247/// assert_eq!(key, "photos/2024/img.jpg");
248/// ```
249#[must_use]
250pub fn encode_continuation_token(key: &str) -> String {
251    BASE64_STANDARD.encode(key.as_bytes())
252}
253
254/// Decode a base64 continuation token back to an object key.
255///
256/// # Errors
257///
258/// Returns [`S3ServiceError::InvalidArgument`] if the token is not valid
259/// base64 or does not decode to valid UTF-8.
260pub fn decode_continuation_token(token: &str) -> Result<String, S3ServiceError> {
261    let bytes = BASE64_STANDARD
262        .decode(token)
263        .map_err(|_| S3ServiceError::InvalidArgument {
264            message: "Invalid continuation token".to_owned(),
265        })?;
266    String::from_utf8(bytes).map_err(|_| S3ServiceError::InvalidArgument {
267        message: "Continuation token contains invalid UTF-8".to_owned(),
268    })
269}
270
271// ---------------------------------------------------------------------------
272// Copy source parsing
273// ---------------------------------------------------------------------------
274
275/// Parse the `x-amz-copy-source` header value into bucket, key, and optional
276/// version ID components.
277///
278/// The copy source header uses the format `/bucket/key` or `bucket/key`, with
279/// an optional `?versionId=<vid>` suffix. Percent-encoded characters in the
280/// key are decoded.
281///
282/// # Errors
283///
284/// Returns [`S3ServiceError::InvalidArgument`] if the copy source string
285/// is empty or malformed.
286///
287/// # Examples
288///
289/// ```
290/// use rustack_s3_core::utils::parse_copy_source;
291///
292/// let (bucket, key, vid) = parse_copy_source("my-bucket/my-key").unwrap();
293/// assert_eq!(bucket, "my-bucket");
294/// assert_eq!(key, "my-key");
295/// assert!(vid.is_none());
296/// ```
297pub fn parse_copy_source(source: &str) -> Result<(String, String, Option<String>), S3ServiceError> {
298    // Strip leading slash if present.
299    let source = source.strip_prefix('/').unwrap_or(source);
300
301    // Split off the versionId query parameter if present.
302    let (path, version_id) = if let Some((p, query)) = source.split_once('?') {
303        let vid = query
304            .split('&')
305            .find_map(|param| param.strip_prefix("versionId="))
306            .map(String::from);
307        (p, vid)
308    } else {
309        (source, None)
310    };
311
312    // Split into bucket and key at the first '/'.
313    let (bucket, key) = path
314        .split_once('/')
315        .ok_or_else(|| S3ServiceError::InvalidArgument {
316            message: "Invalid copy source: must be in the format bucket/key".to_owned(),
317        })?;
318
319    if bucket.is_empty() || key.is_empty() {
320        return Err(S3ServiceError::InvalidArgument {
321            message: "Invalid copy source: bucket and key must not be empty".to_owned(),
322        });
323    }
324
325    // URL-decode the key (copy source keys may be percent-encoded).
326    let decoded_key = percent_encoding::percent_decode_str(key)
327        .decode_utf8()
328        .map_err(|_| S3ServiceError::InvalidArgument {
329            message: "Invalid copy source: key contains invalid UTF-8".to_owned(),
330        })?
331        .into_owned();
332
333    Ok((bucket.to_owned(), decoded_key, version_id))
334}
335
336// ---------------------------------------------------------------------------
337// XML escaping
338// ---------------------------------------------------------------------------
339
340/// Escape a string for safe inclusion in XML content.
341///
342/// Replaces `&`, `<`, `>`, `"`, and `'` with their XML entity references.
343///
344/// # Examples
345///
346/// ```
347/// use rustack_s3_core::utils::xml_escape;
348///
349/// assert_eq!(xml_escape("a<b>c"), "a&lt;b&gt;c");
350/// assert_eq!(xml_escape("x&y"), "x&amp;y");
351/// assert_eq!(xml_escape("hello"), "hello");
352/// ```
353#[must_use]
354pub fn xml_escape(s: &str) -> String {
355    let mut out = String::with_capacity(s.len());
356    for ch in s.chars() {
357        match ch {
358            '&' => out.push_str("&amp;"),
359            '<' => out.push_str("&lt;"),
360            '>' => out.push_str("&gt;"),
361            '"' => out.push_str("&quot;"),
362            '\'' => out.push_str("&apos;"),
363            _ => out.push(ch),
364        }
365    }
366    out
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372
373    // -----------------------------------------------------------------------
374    // ID generation
375    // -----------------------------------------------------------------------
376
377    #[test]
378    fn test_should_generate_unique_version_ids() {
379        let id1 = generate_version_id();
380        let id2 = generate_version_id();
381        assert_ne!(id1, id2);
382        assert!(id1.len() >= 20);
383    }
384
385    #[test]
386    fn test_should_generate_unique_upload_ids() {
387        let id1 = generate_upload_id();
388        let id2 = generate_upload_id();
389        assert_ne!(id1, id2);
390        assert_eq!(id1.len(), 64);
391        assert!(id1.chars().all(|c| c.is_ascii_hexdigit()));
392    }
393
394    #[test]
395    fn test_should_generate_unique_request_ids() {
396        let id1 = generate_request_id();
397        let id2 = generate_request_id();
398        assert_ne!(id1, id2);
399        assert_eq!(id1.len(), 32);
400    }
401
402    // -----------------------------------------------------------------------
403    // Timestamps
404    // -----------------------------------------------------------------------
405
406    #[test]
407    fn test_should_return_positive_timestamp_millis() {
408        assert!(timestamp_millis() > 0);
409    }
410
411    #[test]
412    fn test_should_return_rfc3339_timestamp() {
413        let ts = timestamp_rfc3339();
414        assert!(ts.contains('T'));
415        assert!(ts.contains('+') || ts.contains('Z'));
416    }
417
418    // -----------------------------------------------------------------------
419    // Range parsing
420    // -----------------------------------------------------------------------
421
422    #[test]
423    fn test_should_parse_range_start_end() {
424        let (s, e) = parse_range_header("bytes=0-499", 1000).expect("test parse");
425        assert_eq!((s, e), (0, 499));
426    }
427
428    #[test]
429    fn test_should_parse_range_suffix() {
430        let (s, e) = parse_range_header("bytes=-500", 1000).expect("test parse");
431        assert_eq!((s, e), (500, 999));
432    }
433
434    #[test]
435    fn test_should_parse_range_from_offset() {
436        let (s, e) = parse_range_header("bytes=500-", 1000).expect("test parse");
437        assert_eq!((s, e), (500, 999));
438    }
439
440    #[test]
441    fn test_should_parse_range_from_zero() {
442        let (s, e) = parse_range_header("bytes=0-", 1000).expect("test parse");
443        assert_eq!((s, e), (0, 999));
444    }
445
446    #[test]
447    fn test_should_clamp_range_end_to_content_length() {
448        let (s, e) = parse_range_header("bytes=0-9999", 100).expect("test parse");
449        assert_eq!((s, e), (0, 99));
450    }
451
452    #[test]
453    fn test_should_reject_invalid_range_no_prefix() {
454        assert!(parse_range_header("0-499", 1000).is_err());
455    }
456
457    #[test]
458    fn test_should_reject_range_start_beyond_length() {
459        assert!(parse_range_header("bytes=1000-", 1000).is_err());
460    }
461
462    #[test]
463    fn test_should_reject_range_start_greater_than_end() {
464        assert!(parse_range_header("bytes=500-100", 1000).is_err());
465    }
466
467    #[test]
468    fn test_should_reject_range_on_empty_content() {
469        assert!(parse_range_header("bytes=0-0", 0).is_err());
470    }
471
472    #[test]
473    fn test_should_reject_suffix_range_zero() {
474        assert!(parse_range_header("bytes=-0", 1000).is_err());
475    }
476
477    #[test]
478    fn test_should_reject_suffix_range_exceeding_length() {
479        assert!(parse_range_header("bytes=-2000", 1000).is_err());
480    }
481
482    // -----------------------------------------------------------------------
483    // Conditional request matching
484    // -----------------------------------------------------------------------
485
486    #[test]
487    fn test_should_match_if_match_wildcard() {
488        assert!(is_valid_if_match("\"abc\"", "*"));
489    }
490
491    #[test]
492    fn test_should_match_if_match_same_etag() {
493        assert!(is_valid_if_match("\"abc\"", "\"abc\""));
494    }
495
496    #[test]
497    fn test_should_not_match_if_match_different_etag() {
498        assert!(!is_valid_if_match("\"abc\"", "\"xyz\""));
499    }
500
501    #[test]
502    fn test_should_match_if_match_unquoted() {
503        assert!(is_valid_if_match("abc", "abc"));
504    }
505
506    #[test]
507    fn test_should_not_match_if_none_match_wildcard() {
508        assert!(!is_valid_if_none_match("\"abc\"", "*"));
509    }
510
511    #[test]
512    fn test_should_not_match_if_none_match_same_etag() {
513        assert!(!is_valid_if_none_match("\"abc\"", "\"abc\""));
514    }
515
516    #[test]
517    fn test_should_match_if_none_match_different_etag() {
518        assert!(is_valid_if_none_match("\"abc\"", "\"xyz\""));
519    }
520
521    // -----------------------------------------------------------------------
522    // Continuation tokens
523    // -----------------------------------------------------------------------
524
525    #[test]
526    fn test_should_roundtrip_continuation_token() {
527        let key = "photos/2024/image.jpg";
528        let token = encode_continuation_token(key);
529        let decoded = decode_continuation_token(&token).expect("test decode");
530        assert_eq!(decoded, key);
531    }
532
533    #[test]
534    fn test_should_roundtrip_empty_continuation_token() {
535        let token = encode_continuation_token("");
536        let decoded = decode_continuation_token(&token).expect("test decode");
537        assert_eq!(decoded, "");
538    }
539
540    #[test]
541    fn test_should_reject_invalid_continuation_token() {
542        assert!(decode_continuation_token("!!!not-base64!!!").is_err());
543    }
544
545    // -----------------------------------------------------------------------
546    // Copy source parsing
547    // -----------------------------------------------------------------------
548
549    #[test]
550    fn test_should_parse_copy_source_simple() {
551        let (bucket, key, vid) = parse_copy_source("my-bucket/my-key").unwrap();
552        assert_eq!(bucket, "my-bucket");
553        assert_eq!(key, "my-key");
554        assert!(vid.is_none());
555    }
556
557    #[test]
558    fn test_should_parse_copy_source_with_leading_slash() {
559        let (bucket, key, vid) = parse_copy_source("/my-bucket/my-key").unwrap();
560        assert_eq!(bucket, "my-bucket");
561        assert_eq!(key, "my-key");
562        assert!(vid.is_none());
563    }
564
565    #[test]
566    fn test_should_parse_copy_source_with_version_id() {
567        let (bucket, key, vid) = parse_copy_source("/my-bucket/my-key?versionId=abc123").unwrap();
568        assert_eq!(bucket, "my-bucket");
569        assert_eq!(key, "my-key");
570        assert_eq!(vid.as_deref(), Some("abc123"));
571    }
572
573    #[test]
574    fn test_should_parse_copy_source_with_nested_key() {
575        let (bucket, key, vid) = parse_copy_source("bucket/path/to/key").unwrap();
576        assert_eq!(bucket, "bucket");
577        assert_eq!(key, "path/to/key");
578        assert!(vid.is_none());
579    }
580
581    #[test]
582    fn test_should_parse_copy_source_with_encoded_key() {
583        let (bucket, key, vid) = parse_copy_source("bucket/path%20to/key%2B1").unwrap();
584        assert_eq!(bucket, "bucket");
585        assert_eq!(key, "path to/key+1");
586        assert!(vid.is_none());
587    }
588
589    #[test]
590    fn test_should_reject_copy_source_no_key() {
591        assert!(parse_copy_source("bucket-only").is_err());
592    }
593
594    #[test]
595    fn test_should_reject_copy_source_empty_bucket() {
596        assert!(parse_copy_source("/").is_err());
597    }
598
599    #[test]
600    fn test_should_reject_copy_source_empty_key() {
601        assert!(parse_copy_source("bucket/").is_err());
602    }
603
604    // -----------------------------------------------------------------------
605    // XML escaping
606    // -----------------------------------------------------------------------
607
608    #[test]
609    fn test_should_escape_ampersand() {
610        assert_eq!(xml_escape("a&b"), "a&amp;b");
611    }
612
613    #[test]
614    fn test_should_escape_angle_brackets() {
615        assert_eq!(xml_escape("<tag>"), "&lt;tag&gt;");
616    }
617
618    #[test]
619    fn test_should_escape_quotes() {
620        assert_eq!(xml_escape("he said \"hi\""), "he said &quot;hi&quot;");
621    }
622
623    #[test]
624    fn test_should_escape_apostrophe() {
625        assert_eq!(xml_escape("it's"), "it&apos;s");
626    }
627
628    #[test]
629    fn test_should_not_escape_plain_text() {
630        assert_eq!(xml_escape("hello world"), "hello world");
631    }
632
633    #[test]
634    fn test_should_handle_empty_string() {
635        assert_eq!(xml_escape(""), "");
636    }
637}