1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
pub use *;
/// Excludes only the unreserved URI characters in RFC-3986:
///
/// <https://datatracker.ietf.org/doc/html/rfc3986#section-2.3>
///
/// Characters that are allowed in a URI but do not have a reserved
/// purpose are called unreserved. These include uppercase and lowercase
/// letters, decimal digits, hyphen, period, underscore, and tilde.
///
/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
pub const URL_ENCODE_CHARSET: &AsciiSet = &NON_ALPHANUMERIC
.remove
.remove
.remove
.remove;
/// Characters to percent-encode for hive values such that they round-trip from bucket storage.
///
/// This is much more relaxed than the RFC-3986 URI spec as bucket storage is more permissive of allowed
/// characters.
pub const HIVE_VALUE_ENCODE_CHARSET: &AsciiSet = &CONTROLS
.add // Exclude path separator
.add // Exclude hive `key=value` separator
.add // Percent itself.
// Colon and space are supported by object storage, but are encoded to mimic
// the datetime output format from pyarrow:
// * i.e. 'date2=2023-01-01 00:00:00.000000' becomes 'date2=2023-01-01%2000%3A00%3A00.000000'
.add
.add;