Skip to main content

crates_docs/tools/docs/cache/
key.rs

1//! Cache key generation and validation for document cache
2
3use std::collections::hash_map::DefaultHasher;
4use std::hash::{Hash, Hasher};
5
6/// Check if a byte is a valid crate name character
7#[inline]
8fn is_valid_crate_name_char(b: u8) -> bool {
9    b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
10}
11
12/// Check if a byte is a valid item path character
13#[inline]
14fn is_valid_item_path_char(b: u8) -> bool {
15    b.is_ascii_alphanumeric() || b == b'_' || b == b'-' || b == b':'
16}
17
18/// Check if crate name is valid (non-empty and all valid chars)
19#[inline]
20fn is_valid_crate_name(name: &str) -> bool {
21    !name.is_empty() && name.bytes().all(is_valid_crate_name_char)
22}
23
24/// Check if item path is valid (non-empty and all valid chars)
25#[inline]
26fn is_valid_item_path(path: &str) -> bool {
27    !path.is_empty() && path.bytes().all(is_valid_item_path_char)
28}
29
30/// Cache key generator for document cache
31pub struct CacheKeyGenerator;
32
33impl CacheKeyGenerator {
34    /// Build a raw crate HTML cache key with normalization.
35    ///
36    /// This key stores the fetched docs.rs HTML artifact shared across
37    /// markdown, text, and html responses for the same crate lookup.
38    ///
39    /// Key format: `crate:{name}:html` or `crate:{name}:{version}:html`
40    #[must_use]
41    pub fn crate_html_cache_key(crate_name: &str, version: Option<&str>) -> String {
42        let base_key = Self::crate_cache_key(crate_name, version);
43        format!("{base_key}:html")
44    }
45
46    /// Build crate cache key with normalization
47    ///
48    /// # Normalization rules
49    ///
50    /// - `crate_name`: lowercase, trimmed
51    ///   (crate names are case-insensitive on crates.io)
52    /// - `version`: lowercase, trimmed
53    /// - Invalid characters in `crate_name` (non-alphanumeric, non-underscore, non-hyphen)
54    ///   will result in a hashed key to prevent injection
55    #[must_use]
56    pub fn crate_cache_key(crate_name: &str, version: Option<&str>) -> String {
57        // Inline normalization to avoid intermediate allocations
58        let normalized_name = crate_name.trim().to_lowercase();
59        let normalized_ver = version.map(|v| v.trim().to_lowercase());
60
61        if !is_valid_crate_name(&normalized_name) {
62            let mut hasher = DefaultHasher::new();
63            normalized_name.hash(&mut hasher);
64            let hash = hasher.finish();
65            return match normalized_ver {
66                Some(ver) => format!("crate:hash:{hash}:{ver}"),
67                None => format!("crate:hash:{hash}"),
68            };
69        }
70
71        match normalized_ver {
72            Some(ver) => format!("crate:{normalized_name}:{ver}"),
73            None => format!("crate:{normalized_name}"),
74        }
75    }
76
77    /// Build search cache key with normalization
78    ///
79    /// # Normalization rules
80    ///
81    /// - query: lowercase, trimmed (search is case-insensitive)
82    /// - sort: lowercase, trimmed
83    #[must_use]
84    pub fn search_cache_key(query: &str, limit: u32, sort: Option<&str>) -> String {
85        let normalized_query = query.trim().to_lowercase();
86        let normalized_sort = sort.unwrap_or("relevance").trim().to_lowercase();
87        format!("search:{normalized_query}:{normalized_sort}:{limit}")
88    }
89
90    /// Build item cache key with normalization
91    ///
92    /// # Normalization rules
93    ///
94    /// - `crate_name`: lowercase, trimmed
95    ///   (crate names are case-insensitive on crates.io)
96    /// - `item_path`: trimmed but case-sensitive (Rust paths are case-sensitive)
97    /// - `version`: lowercase, trimmed
98    #[must_use]
99    pub fn item_cache_key(crate_name: &str, item_path: &str, version: Option<&str>) -> String {
100        let normalized_name = crate_name.trim().to_lowercase();
101        let normalized_path = item_path.trim();
102        let normalized_ver = version.map(|v| v.trim().to_lowercase());
103
104        if !is_valid_crate_name(&normalized_name) || !is_valid_item_path(normalized_path) {
105            let mut hasher = DefaultHasher::new();
106            normalized_name.hash(&mut hasher);
107            normalized_path.hash(&mut hasher);
108            let hash = hasher.finish();
109            return match normalized_ver {
110                Some(ver) => {
111                    format!("item:{normalized_name}:{ver}:hash:{hash}")
112                }
113                None => format!("item:{normalized_name}:hash:{hash}"),
114            };
115        }
116
117        match normalized_ver {
118            Some(ver) => {
119                format!("item:{normalized_name}:{ver}:{normalized_path}")
120            }
121            None => format!("item:{normalized_name}:{normalized_path}"),
122        }
123    }
124
125    /// Build a raw item HTML cache key with normalization.
126    ///
127    /// This key stores the fetched docs.rs search-result HTML artifact shared
128    /// across markdown, text, and html responses for the same item lookup.
129    ///
130    /// Key format: `item:{crate}:{path}:html` or `item:{crate}:{version}:{path}:html`
131    #[must_use]
132    pub fn item_html_cache_key(crate_name: &str, item_path: &str, version: Option<&str>) -> String {
133        let base_key = Self::item_cache_key(crate_name, item_path, version);
134        format!("{base_key}:html")
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn test_cache_key_generation() {
144        assert_eq!(
145            CacheKeyGenerator::crate_cache_key("serde", None),
146            "crate:serde"
147        );
148        assert_eq!(
149            CacheKeyGenerator::crate_cache_key("serde", Some("1.0")),
150            "crate:serde:1.0"
151        );
152        assert_eq!(
153            CacheKeyGenerator::crate_html_cache_key("serde", Some("1.0")),
154            "crate:serde:1.0:html"
155        );
156
157        assert_eq!(
158            CacheKeyGenerator::search_cache_key("web framework", 10, None),
159            "search:web framework:relevance:10"
160        );
161        assert_eq!(
162            CacheKeyGenerator::search_cache_key("web framework", 10, Some("downloads")),
163            "search:web framework:downloads:10"
164        );
165
166        assert_eq!(
167            CacheKeyGenerator::item_cache_key("serde", "Serialize", None),
168            "item:serde:Serialize"
169        );
170        assert_eq!(
171            CacheKeyGenerator::item_cache_key("serde", "Serialize", Some("1.0")),
172            "item:serde:1.0:Serialize"
173        );
174        assert_eq!(
175            CacheKeyGenerator::item_html_cache_key("serde", "Serialize", Some("1.0")),
176            "item:serde:1.0:Serialize:html"
177        );
178    }
179
180    #[test]
181    fn test_cache_key_normalization_case_insensitivity() {
182        assert_eq!(
183            CacheKeyGenerator::crate_cache_key("Serde", None),
184            CacheKeyGenerator::crate_cache_key("serde", None)
185        );
186        assert_eq!(
187            CacheKeyGenerator::crate_cache_key("SERDE", None),
188            CacheKeyGenerator::crate_cache_key("serde", None)
189        );
190
191        assert_eq!(
192            CacheKeyGenerator::crate_cache_key("Tokio", Some("1.0")),
193            CacheKeyGenerator::crate_cache_key("tokio", Some("1.0"))
194        );
195
196        assert_eq!(
197            CacheKeyGenerator::search_cache_key("Web Framework", 10, Some("Relevance")),
198            CacheKeyGenerator::search_cache_key("web framework", 10, Some("relevance"))
199        );
200
201        assert_eq!(
202            CacheKeyGenerator::item_cache_key("Serde", "Serialize", None),
203            CacheKeyGenerator::item_cache_key("serde", "Serialize", None)
204        );
205    }
206
207    #[test]
208    fn test_cache_key_normalization_whitespace() {
209        assert_eq!(
210            CacheKeyGenerator::crate_cache_key("serde", Some(" 1.0 ")),
211            "crate:serde:1.0"
212        );
213
214        assert_eq!(
215            CacheKeyGenerator::search_cache_key("  web framework  ", 10, Some(" downloads ")),
216            "search:web framework:downloads:10"
217        );
218
219        assert_eq!(
220            CacheKeyGenerator::item_cache_key("serde", "  Serialize  ", None),
221            "item:serde:Serialize"
222        );
223    }
224
225    #[test]
226    fn test_cache_key_normalization_version_case() {
227        assert_eq!(
228            CacheKeyGenerator::crate_cache_key("serde", Some("1.0-RC1")),
229            "crate:serde:1.0-rc1"
230        );
231        assert_eq!(
232            CacheKeyGenerator::item_cache_key("serde", "Serialize", Some("V1.0")),
233            "item:serde:v1.0:Serialize"
234        );
235    }
236
237    #[test]
238    fn test_cache_key_injection_prevention() {
239        let malicious_key = CacheKeyGenerator::crate_cache_key("serde:malicious", None);
240        assert!(malicious_key.starts_with("crate:hash:"));
241        assert!(!malicious_key.contains("serde:malicious"));
242
243        let malicious_key_with_version =
244            CacheKeyGenerator::crate_cache_key("crate:evil", Some("1.0"));
245        assert!(malicious_key_with_version.starts_with("crate:hash:"));
246        assert!(!malicious_key_with_version.contains("crate:evil"));
247
248        let valid_key = CacheKeyGenerator::crate_cache_key("serde-json", None);
249        assert_eq!(valid_key, "crate:serde-json");
250
251        let valid_key_underscore = CacheKeyGenerator::crate_cache_key("my_crate", None);
252        assert_eq!(valid_key_underscore, "crate:my_crate");
253    }
254
255    #[test]
256    fn test_item_path_case_sensitivity() {
257        assert_ne!(
258            CacheKeyGenerator::item_cache_key("serde", "Serialize", None),
259            CacheKeyGenerator::item_cache_key("serde", "serialize", None)
260        );
261    }
262
263    #[test]
264    fn test_cache_key_edge_cases() {
265        let empty_key = CacheKeyGenerator::crate_cache_key("", None);
266        assert!(empty_key.starts_with("crate:hash:"));
267
268        let whitespace_key = CacheKeyGenerator::crate_cache_key("   ", None);
269        assert!(whitespace_key.starts_with("crate:hash:"));
270
271        assert_eq!(
272            CacheKeyGenerator::crate_cache_key("serde", Some("")),
273            "crate:serde:"
274        );
275
276        let unicode_key = CacheKeyGenerator::crate_cache_key("serde测试", None);
277        assert!(unicode_key.starts_with("crate:hash:"));
278        assert!(!unicode_key.contains("测试"));
279
280        let malicious_item_path =
281            CacheKeyGenerator::item_cache_key("serde", "Serialize\nmalicious", None);
282        assert!(malicious_item_path.contains("hash:"));
283        assert!(!malicious_item_path.contains('\n'));
284
285        let malicious_item_colon =
286            CacheKeyGenerator::item_cache_key("serde", "Serialize:extra:colons", None);
287        assert_eq!(malicious_item_colon, "item:serde:Serialize:extra:colons");
288
289        let valid_item_path = CacheKeyGenerator::item_cache_key("serde", "serde::Serialize", None);
290        assert_eq!(valid_item_path, "item:serde:serde::Serialize");
291
292        let empty_item_key = CacheKeyGenerator::item_cache_key("serde", "", None);
293        assert!(empty_item_key.contains("hash:"));
294
295        let empty_item_crate = CacheKeyGenerator::item_cache_key("", "Crate", None);
296        assert!(empty_item_crate.contains("hash:"));
297    }
298}