1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
use ;
use HashSet;
/// Canonicalize a URL according to the Google Web Risk API specification.
///
/// Returns `None` if the URL is invalid (empty, mailto, host too long, etc.).
///
/// # Examples
/// ```
/// assert_eq!(
/// webrisk_hash::canonicalize("http://www.GOOgle.com/"),
/// Some("http://www.google.com/".to_string())
/// );
/// assert_eq!(
/// webrisk_hash::canonicalize("http://3279880203/blah"),
/// Some("http://195.127.0.11/blah".to_string())
/// );
/// ```
/// Generate suffix/prefix expressions for a canonicalized URL.
///
/// Returns up to 30 host suffix / path prefix combinations
/// (at most 5 host suffixes x 6 path prefixes).
///
/// # Examples
/// ```
/// let exprs = webrisk_hash::suffix_postfix_expressions("http://a.b.c/1/2.html?param=1");
/// assert!(exprs.contains(&"a.b.c/1/2.html?param=1".to_string()));
/// assert!(exprs.contains(&"b.c/".to_string()));
/// ```
/// Compute a truncated SHA-256 hash prefix.
///
/// Returns the most significant `bits / 8` bytes of the SHA-256 digest.
///
/// # Examples
/// ```
/// // FIPS-180-2 Example B1 (32 bits)
/// let out = webrisk_hash::truncated_sha256_prefix("abc", 32);
/// assert_eq!(out, vec![0xba, 0x78, 0x16, 0xbf]);
/// ```
/// Get hash prefixes for all suffix/prefix expressions of a URL.
///
/// Canonicalizes the URL, generates expressions, and returns
/// a set of truncated SHA-256 hash prefixes.
///
/// # Arguments
/// * `url` - The URL to process
/// * `bits` - Hash prefix size in bits (e.g., 32 for 4-byte prefixes, 256 for full hash)
///
/// # Examples
/// ```
/// let prefixes = webrisk_hash::get_prefixes("https://google.com/a/test/index.html?abc123", 32);
/// assert_eq!(prefixes.len(), 5);
/// ```
/// Get a mapping of expressions to their hash prefixes for a URL.
///
/// Like `get_prefixes` but returns the expression-to-hash mapping,
/// useful for debugging or detailed analysis.
///
/// # Arguments
/// * `url` - The URL to process
/// * `bits` - Hash prefix size in bits (default 256)