Skip to main content

har/
opaque.rs

1/// True when a string chunk looks like an opaque blob (base64 / percent-encoded /
2/// long high-entropy token) rather than a readable path/identifier. Hex strings
3/// and UUIDs are treated as readable ids and excluded.
4pub fn is_opaque(s: &str) -> bool {
5    let len = s.len();
6    if len < 16 {
7        return false;
8    }
9    if s.bytes().all(|b| b.is_ascii_hexdigit()) {
10        return false; // hex hash/etag -> readable id
11    }
12    if is_uuid(s) {
13        return false; // resource id, not a secret blob
14    }
15
16    // Percent-encoded blob (URL-encoded JSON/config).
17    if s.contains('%') {
18        return true;
19    }
20
21    // Must be within the base64 alphabet to be considered a blob.
22    let base64_alpha = s
23        .bytes()
24        .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'+' | b'/' | b'=' | b'_' | b'-'));
25    if !base64_alpha {
26        return false;
27    }
28    let has_digit = s.bytes().any(|b| b.is_ascii_digit());
29    let has_upper = s.bytes().any(|b| b.is_ascii_uppercase());
30    let has_lower = s.bytes().any(|b| b.is_ascii_lowercase());
31    let has_b64_symbol = s.bytes().any(|b| matches!(b, b'+' | b'/' | b'='));
32
33    // Standard base64 markers never appear in slugs/ids -> strong signal.
34    if len >= 24 && has_b64_symbol && has_digit {
35        return true;
36    }
37    // Symbol-less base64url token: long, mixed case + digit (slugs are lowercase).
38    if len >= 32 && has_digit && has_upper && has_lower {
39        return true;
40    }
41    false
42}
43
44/// Canonical 8-4-4-4-12 hex UUID check.
45pub fn is_uuid(s: &str) -> bool {
46    let groups = [8usize, 4, 4, 4, 12];
47    let parts: Vec<&str> = s.split('-').collect();
48    if parts.len() != groups.len() {
49        return false;
50    }
51    parts
52        .iter()
53        .zip(groups)
54        .all(|(p, n)| p.len() == n && p.bytes().all(|b| b.is_ascii_hexdigit()))
55}
56
57#[cfg(test)]
58mod tests {
59    use super::is_opaque;
60
61    #[test]
62    fn flags_standard_base64_blob() {
63        // jackettio-style standard base64 with padding
64        assert!(is_opaque("eyJtYXhUb3JyZW50cyI6OCwiZGVicmlkIjp0cnVlfQ=="));
65    }
66
67    #[test]
68    fn flags_percent_encoded_blob() {
69        assert!(is_opaque("%7B%22NexioTorii%22%3A%22eyJ1c2VFbmdsaXNo%22%7D"));
70    }
71
72    #[test]
73    fn flags_base64url_token() {
74        // JWT-like base64url, no +/=, mixed case + digits
75        assert!(is_opaque("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"));
76    }
77
78    #[test]
79    fn does_not_flag_readable_segments() {
80        assert!(!is_opaque("manifest.json"));
81        assert!(!is_opaque("videoplayback"));
82        assert!(!is_opaque("sync_resolve_account_secret"));
83        assert!(!is_opaque("v1"));
84        assert!(!is_opaque("popular"));
85    }
86
87    #[test]
88    fn does_not_flag_lowercase_slug_with_digit() {
89        // long, has a digit, but lowercase dashed slug -> readable, must stay
90        assert!(!is_opaque("my-very-long-feature-slug-2024-edition"));
91    }
92
93    #[test]
94    fn does_not_flag_hex_or_uuid() {
95        assert!(!is_opaque("0123456789abcdef0123")); // hex hash
96        assert!(!is_opaque("550e8400-e29b-41d4-a716-446655440000")); // uuid
97    }
98
99    #[test]
100    fn does_not_flag_short_strings() {
101        assert!(!is_opaque("szpwe4fx4ngs8u9q")); // 16-char token, below blob threshold
102        assert!(!is_opaque("abc"));
103    }
104}