Skip to main content

qubit_codec/
percent_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Percent text codec.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16    Encoder,
17};
18
19/// Encodes and decodes percent-encoded UTF-8 text.
20#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
21pub struct PercentCodec;
22
23impl PercentCodec {
24    /// Creates a percent codec.
25    ///
26    /// # Returns
27    /// Percent codec.
28    pub fn new() -> Self {
29        Self
30    }
31
32    /// Encodes text using percent encoding.
33    ///
34    /// # Parameters
35    /// - `text`: UTF-8 text to encode.
36    ///
37    /// # Returns
38    /// Percent-encoded text.
39    pub fn encode(&self, text: &str) -> String {
40        percent_encode_bytes(text.as_bytes(), false)
41    }
42
43    /// Decodes percent-encoded UTF-8 text.
44    ///
45    /// # Parameters
46    /// - `text`: Percent-encoded text.
47    ///
48    /// # Returns
49    /// Decoded UTF-8 text.
50    ///
51    /// # Errors
52    /// Returns [`CodecError`] when a percent escape is malformed or decoded
53    /// bytes are not valid UTF-8.
54    pub fn decode(&self, text: &str) -> CodecResult<String> {
55        String::from_utf8(percent_decode_bytes(text, false)?).map_err(CodecError::from)
56    }
57}
58
59impl Encoder<str> for PercentCodec {
60    type Error = CodecError;
61    type Output = String;
62
63    /// Encodes text using percent encoding.
64    fn encode(&self, input: &str) -> Result<Self::Output, Self::Error> {
65        Ok(PercentCodec::encode(self, input))
66    }
67}
68
69impl Decoder<str> for PercentCodec {
70    type Error = CodecError;
71    type Output = String;
72
73    /// Decodes percent-encoded text.
74    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
75        PercentCodec::decode(self, input)
76    }
77}
78
79/// Percent-encodes UTF-8 bytes.
80///
81/// # Parameters
82/// - `bytes`: Bytes to encode.
83/// - `space_as_plus`: Whether spaces should be encoded as `+`.
84///
85/// # Returns
86/// Encoded text.
87pub(crate) fn percent_encode_bytes(bytes: &[u8], space_as_plus: bool) -> String {
88    let mut output = String::with_capacity(bytes.len());
89    for byte in bytes {
90        if *byte == b' ' && space_as_plus {
91            output.push('+');
92        } else if is_unreserved(*byte) {
93            output.push(*byte as char);
94        } else {
95            output.push('%');
96            output.push(percent_hex_digit(byte >> 4));
97            output.push(percent_hex_digit(byte & 0x0f));
98        }
99    }
100    output
101}
102
103/// Percent-decodes UTF-8 bytes.
104///
105/// # Parameters
106/// - `text`: Text to decode.
107/// - `plus_as_space`: Whether `+` should decode to a space byte.
108///
109/// # Returns
110/// Decoded bytes.
111///
112/// # Errors
113/// Returns [`CodecError::InvalidEscape`] for malformed escapes.
114pub(crate) fn percent_decode_bytes(text: &str, plus_as_space: bool) -> CodecResult<Vec<u8>> {
115    let bytes = text.as_bytes();
116    let mut output = Vec::with_capacity(bytes.len());
117    let mut index = 0;
118    while let Some(&byte) = bytes.get(index) {
119        match byte {
120            b'%' => {
121                let (Some(&high_byte), Some(&low_byte)) =
122                    (bytes.get(index + 1), bytes.get(index + 2))
123                else {
124                    return Err(invalid_percent_escape(index));
125                };
126                let high =
127                    percent_hex_value(high_byte).ok_or_else(|| invalid_percent_escape(index))?;
128                let low =
129                    percent_hex_value(low_byte).ok_or_else(|| invalid_percent_escape(index))?;
130                output.push((high << 4) | low);
131                index += 3;
132            }
133            b'+' if plus_as_space => {
134                output.push(b' ');
135                index += 1;
136            }
137            byte => {
138                output.push(byte);
139                index += 1;
140            }
141        }
142    }
143    Ok(output)
144}
145
146/// Builds a malformed percent escape error.
147///
148/// # Parameters
149/// - `index`: Byte index of the `%` marker in the original input.
150///
151/// # Returns
152/// An invalid escape error for a `%XX` sequence.
153fn invalid_percent_escape(index: usize) -> CodecError {
154    CodecError::InvalidEscape {
155        index,
156        escape: "%".to_owned(),
157        reason: "expected two hexadecimal digits".to_owned(),
158    }
159}
160
161/// Tests whether a byte may be left unescaped.
162///
163/// # Parameters
164/// - `byte`: Byte to inspect.
165///
166/// # Returns
167/// `true` for RFC 3986 unreserved bytes.
168fn is_unreserved(byte: u8) -> bool {
169    matches!(
170        byte,
171        b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~'
172    )
173}
174
175/// Converts one ASCII hex byte to its nibble value.
176///
177/// # Parameters
178/// - `byte`: ASCII byte to inspect.
179///
180/// # Returns
181/// Nibble value, or `None` when `byte` is not hex.
182fn percent_hex_value(byte: u8) -> Option<u8> {
183    match byte {
184        b'0'..=b'9' => Some(byte - b'0'),
185        b'a'..=b'f' => Some(byte - b'a' + 10),
186        b'A'..=b'F' => Some(byte - b'A' + 10),
187        _ => None,
188    }
189}
190
191/// Converts one nibble to an uppercase hexadecimal digit.
192///
193/// # Parameters
194/// - `value`: Nibble value.
195///
196/// # Returns
197/// Uppercase hexadecimal digit. Values above `0x0f` are masked to their low nibble.
198fn percent_hex_digit(value: u8) -> char {
199    match value & 0x0f {
200        0x0 => '0',
201        0x1 => '1',
202        0x2 => '2',
203        0x3 => '3',
204        0x4 => '4',
205        0x5 => '5',
206        0x6 => '6',
207        0x7 => '7',
208        0x8 => '8',
209        0x9 => '9',
210        0x0a => 'A',
211        0x0b => 'B',
212        0x0c => 'C',
213        0x0d => 'D',
214        0x0e => 'E',
215        _ => 'F',
216    }
217}