Skip to main content

oximedia_cache/
key_norm.rs

1//! Cache key normalisation.
2//!
3//! [`normalize_cache_key`] converts a URL into a canonical form suitable for
4//! use as a cache key.  The following transformations are applied:
5//!
6//! 1. **Lowercase** — scheme, host, and path are lowercased.
7//! 2. **Trailing slash removal** — a trailing `/` on the path component is
8//!    stripped (unless the path is `/` itself).
9//! 3. **Query parameter sorting** — query string parameters are split on `&`,
10//!    sorted lexicographically, and rejoined.  Empty query strings are omitted.
11//!
12//! Fragment identifiers (`#...`) are discarded as they are client-side only.
13//!
14//! # Example
15//!
16//! ```
17//! use oximedia_cache::key_norm::normalize_cache_key;
18//!
19//! let key = normalize_cache_key("https://CDN.Example.com/Video/Clip.mp4/?b=2&a=1#frag");
20//! assert_eq!(key, "https://cdn.example.com/video/clip.mp4?a=1&b=2");
21//! ```
22
23#![allow(dead_code)]
24
25/// Normalise a URL into a canonical cache key.
26///
27/// # Arguments
28///
29/// * `url` — The raw URL string (any scheme is accepted).
30///
31/// # Returns
32///
33/// A normalised `String` suitable for use as a cache key.  If `url` cannot
34/// be parsed (e.g. it is an empty string) the function returns the input
35/// lowercased and stripped of trailing slashes as a best-effort fallback.
36#[must_use]
37pub fn normalize_cache_key(url: &str) -> String {
38    if url.is_empty() {
39        return String::new();
40    }
41
42    // Strip fragment (#...) — fragments are not sent to servers
43    let no_fragment = match url.find('#') {
44        Some(pos) => &url[..pos],
45        None => url,
46    };
47
48    // Split at '?' to separate path from query string
49    let (path_part, query_part) = match no_fragment.find('?') {
50        Some(pos) => (&no_fragment[..pos], Some(&no_fragment[pos + 1..])),
51        None => (no_fragment, None),
52    };
53
54    // Lowercase the path portion and strip trailing slash.
55    // For URLs with a scheme (e.g. https://host/), preserve the root "/" so
56    // that "https://example.com/" stays as-is (the path is "/", not empty).
57    let mut path_lower = path_part.to_lowercase();
58    let is_root_path = if let Some(after_scheme) = path_lower.find("://") {
59        // Count how many '/' chars appear after the "://" separator+host.
60        let after_host_start = after_scheme + 3;
61        let after_host = &path_lower[after_host_start..];
62        // Root path: the only '/' is the very first character of the path.
63        after_host
64            .find('/')
65            .map_or(false, |p| after_host[p..].len() == 1)
66    } else {
67        path_lower == "/"
68    };
69    if path_lower.ends_with('/') && !is_root_path {
70        path_lower.pop();
71    }
72
73    // Sort and lowercase query parameters
74    match query_part {
75        None | Some("") => path_lower,
76        Some(query) => {
77            let mut params: Vec<String> = query
78                .split('&')
79                .filter(|s| !s.is_empty())
80                .map(|p| p.to_lowercase())
81                .collect();
82            params.sort_unstable();
83            let sorted_query = params.join("&");
84            if sorted_query.is_empty() {
85                path_lower
86            } else {
87                format!("{path_lower}?{sorted_query}")
88            }
89        }
90    }
91}
92
93// ---------------------------------------------------------------------------
94// Tests
95// ---------------------------------------------------------------------------
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    // ── basic normalisation ───────────────────────────────────────────────────
102
103    #[test]
104    fn test_lowercase_host_and_path() {
105        let key = normalize_cache_key("https://CDN.Example.COM/Video/Clip.mp4");
106        assert_eq!(key, "https://cdn.example.com/video/clip.mp4");
107    }
108
109    #[test]
110    fn test_strip_trailing_slash() {
111        let key = normalize_cache_key("https://example.com/path/");
112        assert_eq!(key, "https://example.com/path");
113    }
114
115    #[test]
116    fn test_preserve_root_slash() {
117        // A bare "/" should not be stripped
118        let key = normalize_cache_key("https://example.com/");
119        // The "/" is the only character in the path after the host — lowercased is unchanged
120        // but the trailing slash removal skips length-1 paths
121        assert_eq!(key, "https://example.com/");
122    }
123
124    #[test]
125    fn test_sort_query_params() {
126        let key = normalize_cache_key("https://example.com/v?b=2&a=1");
127        assert_eq!(key, "https://example.com/v?a=1&b=2");
128    }
129
130    #[test]
131    fn test_strip_fragment() {
132        let key = normalize_cache_key("https://example.com/page#section");
133        assert_eq!(key, "https://example.com/page");
134    }
135
136    #[test]
137    fn test_fragment_and_query_and_trailing_slash() {
138        let key = normalize_cache_key("https://CDN.Example.com/Video/Clip.mp4/?b=2&a=1#frag");
139        assert_eq!(key, "https://cdn.example.com/video/clip.mp4?a=1&b=2");
140    }
141
142    #[test]
143    fn test_no_query_no_fragment() {
144        let key = normalize_cache_key("https://example.com/asset.m4s");
145        assert_eq!(key, "https://example.com/asset.m4s");
146    }
147
148    #[test]
149    fn test_empty_url_returns_empty() {
150        let key = normalize_cache_key("");
151        assert_eq!(key, "");
152    }
153
154    #[test]
155    fn test_empty_query_string_omitted() {
156        let key = normalize_cache_key("https://example.com/path?");
157        // Empty query → no '?' in output
158        assert!(!key.contains('?'), "Got: {key}");
159    }
160
161    #[test]
162    fn test_multiple_query_params_sorted() {
163        let key = normalize_cache_key("http://cdn.test/v?z=9&m=3&a=1");
164        assert_eq!(key, "http://cdn.test/v?a=1&m=3&z=9");
165    }
166
167    #[test]
168    fn test_already_normalised_is_idempotent() {
169        let url = "https://example.com/path?a=1&b=2";
170        let once = normalize_cache_key(url);
171        let twice = normalize_cache_key(&once);
172        assert_eq!(once, twice);
173    }
174
175    #[test]
176    fn test_path_without_scheme() {
177        // Non-standard inputs should at least lowercase and sort params
178        let key = normalize_cache_key("/PATH/TO/FILE?Z=1&A=2");
179        assert_eq!(key, "/path/to/file?a=2&z=1");
180    }
181}