rs_urlencoding/
enc.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::io;
4use std::str;
5
6/// Wrapper type that implements `Display`. Encodes on the fly, without allocating.
7/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
8///
9/// ```rust
10/// use urlencoding::Encoded;
11/// format!("{}", Encoded("hello!"));
12/// ```
13#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
14#[repr(transparent)]
15pub struct Encoded<Str>(pub Str);
16
17impl<Str: AsRef<[u8]>> Encoded<Str> {
18    /// Long way of writing `Encoded(data)`
19    ///
20    /// Takes any string-like type or a slice of bytes, either owned or borrowed.
21    #[inline(always)]
22    pub fn new(string: Str) -> Self {
23        Self(string)
24    }
25
26    #[inline(always)]
27    pub fn to_str(&self) -> Cow<str> {
28        encode_binary(self.0.as_ref())
29    }
30
31    /// Perform urlencoding to a string
32    #[inline]
33    #[allow(clippy::inherent_to_string_shadow_display)]
34    pub fn to_string(&self) -> String {
35        self.to_str().into_owned()
36    }
37
38    /// Perform urlencoding into a writer
39    #[inline]
40    pub fn write<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
41        encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?;
42        Ok(())
43    }
44
45    /// Perform urlencoding into a string
46    #[inline]
47    pub fn append_to(&self, string: &mut String) {
48        append_string(&self.0.as_ref(), string, false);
49    }
50}
51
52impl<'a> Encoded<&'a str> {
53    /// Same as new, but hints a more specific type, so you can avoid errors about `AsRef<[u8]>` not implemented
54    /// on references-to-references.
55    #[inline(always)]
56    pub fn str(string: &'a str) -> Self {
57        Self(string)
58    }
59}
60
61impl<String: AsRef<[u8]>> fmt::Display for Encoded<String> {
62    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63        encode_into(self.0.as_ref(), false, |s| f.write_str(s))?;
64        Ok(())
65    }
66}
67
68/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
69///
70/// Call `.into_owned()` if you need a `String`
71#[inline(always)]
72pub fn encode(data: &str) -> Cow<str> {
73    encode_binary(data.as_bytes())
74}
75
76/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`.
77#[inline]
78pub fn encode_binary(data: &[u8]) -> Cow<str> {
79    // add maybe extra capacity, but try not to exceed allocator's bucket size
80    let mut escaped = String::with_capacity(data.len() | 15);
81    let unmodified = append_string(data, &mut escaped, true);
82    if unmodified {
83        return Cow::Borrowed(unsafe {
84            // encode_into has checked it's ASCII
85            str::from_utf8_unchecked(data)
86        });
87    }
88    Cow::Owned(escaped)
89}
90
91fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool {
92    encode_into(data, may_skip, |s| Ok::<_, std::convert::Infallible>(escaped.push_str(s))).unwrap()
93}
94
95fn encode_into<E>(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result<bool, E> {
96    let mut pushed = false;
97    loop {
98        // Fast path to skip over safe chars at the beginning of the remaining string
99        let ascii_len = data.iter()
100            .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' |  b'-' | b'.' | b'_' | b'~')).count();
101
102        let (safe, rest) = if ascii_len >= data.len() {
103            if !pushed && may_skip_write {
104                return Ok(true);
105            }
106            (data, &[][..]) // redundatnt to optimize out a panic in split_at
107        } else {
108            data.split_at(ascii_len)
109        };
110        pushed = true;
111        if !safe.is_empty() {
112            push_str(unsafe { str::from_utf8_unchecked(safe) })?;
113        }
114        if rest.is_empty() {
115            break;
116        }
117
118        match rest.split_first() {
119            Some((byte, rest)) => {
120                let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)];
121                push_str(unsafe { str::from_utf8_unchecked(enc) })?;
122                data = rest;
123            }
124            None => break,
125        };
126    }
127    Ok(false)
128}
129
130#[inline]
131fn to_hex_digit(digit: u8) -> u8 {
132    match digit {
133        0..=9 => b'0' + digit,
134        10..=255 => b'A' - 10 + digit,
135    }
136}