Skip to main content

qubit_codec_misc/
percent_codec.rs

1// =============================================================================
2//    Copyright (c) 2026 Haixing Hu.
3//
4//    SPDX-License-Identifier: Apache-2.0
5//
6//    Licensed under the Apache License, Version 2.0.
7// =============================================================================
8//! Percent text codec.
9
10use crate::{
11    Codec,
12    MiscCodecError,
13    MiscCodecResult,
14    ValueDecoder,
15    ValueEncoder,
16};
17
18const UPPER_HEX_DIGITS: [char; 16] = [
19    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E',
20    'F',
21];
22
23/// Encodes and decodes percent-encoded UTF-8 text.
24///
25/// Its low-level [`Codec<Value = u8, Unit = u8>`] implementation converts one
26/// byte to either one unreserved ASCII unit or a `%XX` escape. UTF-8 validation
27/// remains part of the owned [`decode`](Self::decode) helper.
28#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
29pub struct PercentCodec;
30
31impl PercentCodec {
32    /// Creates a percent codec.
33    ///
34    /// # Returns
35    /// Percent codec.
36    #[inline]
37    pub fn new() -> Self {
38        Self
39    }
40
41    /// Encodes text using percent encoding.
42    ///
43    /// # Parameters
44    /// - `text`: UTF-8 text to encode.
45    ///
46    /// # Returns
47    /// Percent-encoded text.
48    #[inline]
49    pub fn encode(&self, text: &str) -> String {
50        percent_encode_bytes(text.as_bytes(), false)
51    }
52
53    /// Decodes percent-encoded UTF-8 text.
54    ///
55    /// # Parameters
56    /// - `text`: Percent-encoded text.
57    ///
58    /// # Returns
59    /// Decoded UTF-8 text.
60    ///
61    /// # Errors
62    /// Returns [`MiscCodecError`] when a percent escape is malformed or decoded
63    /// bytes are not valid UTF-8.
64    #[inline]
65    pub fn decode(&self, text: &str) -> MiscCodecResult<String> {
66        String::from_utf8(percent_decode_bytes(text, false)?)
67            .map_err(MiscCodecError::from)
68    }
69}
70
71impl ValueEncoder<str> for PercentCodec {
72    type Error = MiscCodecError;
73    type Output = String;
74
75    /// Encodes text using percent encoding.
76    #[inline]
77    fn encode(&self, input: &str) -> Result<Self::Output, Self::Error> {
78        Ok(PercentCodec::encode(self, input))
79    }
80}
81
82impl ValueDecoder<str> for PercentCodec {
83    type Error = MiscCodecError;
84    type Output = String;
85
86    /// Decodes percent-encoded text.
87    #[inline]
88    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
89        PercentCodec::decode(self, input)
90    }
91}
92
93unsafe impl Codec for PercentCodec {
94    type Value = u8;
95    type Unit = u8;
96    type DecodeError = MiscCodecError;
97    type EncodeError = MiscCodecError;
98
99    /// Returns the shortest representation length for one byte.
100    #[inline(always)]
101    fn min_units_per_value(&self) -> core::num::NonZeroUsize {
102        core::num::NonZeroUsize::MIN
103    }
104
105    /// Returns the longest `%XX` representation length for one byte.
106    #[inline(always)]
107    fn max_units_per_value(&self) -> core::num::NonZeroUsize {
108        unsafe { core::num::NonZeroUsize::new_unchecked(3) }
109    }
110
111    /// Decodes one raw byte or `%XX` escape.
112    #[inline]
113    unsafe fn decode_unchecked(
114        &self,
115        input: &[u8],
116        index: usize,
117    ) -> Result<(u8, core::num::NonZeroUsize), Self::DecodeError> {
118        debug_assert!(index < input.len());
119
120        let (value, consumed) = percent_decode_byte(input, index, false)?;
121        debug_assert!(consumed > 0);
122        // SAFETY: `percent_decode_byte` returns a non-zero width for every
123        // successful raw byte or escape.
124        let consumed =
125            unsafe { core::num::NonZeroUsize::new_unchecked(consumed) };
126        Ok((value, consumed))
127    }
128
129    /// Encodes one byte using percent encoding.
130    #[inline]
131    unsafe fn encode_unchecked(
132        &self,
133        value: &u8,
134        output: &mut [u8],
135        index: usize,
136    ) -> Result<usize, Self::EncodeError> {
137        debug_assert!(
138            index + if is_unreserved(*value) { 1 } else { 3 } <= output.len()
139        );
140
141        Ok(percent_encode_byte(*value, output, index, false))
142    }
143}
144
145/// Percent-encodes UTF-8 bytes.
146///
147/// # Parameters
148/// - `bytes`: Bytes to encode.
149/// - `space_as_plus`: Whether spaces should be encoded as `+`.
150///
151/// # Returns
152/// Encoded text.
153#[inline]
154pub(crate) fn percent_encode_bytes(
155    bytes: &[u8],
156    space_as_plus: bool,
157) -> String {
158    let mut output = String::with_capacity(bytes.len());
159    for byte in bytes {
160        if *byte == b' ' && space_as_plus {
161            output.push('+');
162        } else if is_unreserved(*byte) {
163            output.push(*byte as char);
164        } else {
165            output.push('%');
166            output.push(percent_hex_digit(byte >> 4));
167            output.push(percent_hex_digit(byte & 0x0f));
168        }
169    }
170    output
171}
172
173/// Percent-decodes UTF-8 bytes.
174///
175/// # Parameters
176/// - `text`: Text to decode.
177/// - `plus_as_space`: Whether `+` should decode to a space byte.
178///
179/// # Returns
180/// Decoded bytes.
181///
182/// # Errors
183/// Returns [`MiscCodecError::InvalidEscape`] for malformed escapes.
184#[inline]
185pub(crate) fn percent_decode_bytes(
186    text: &str,
187    plus_as_space: bool,
188) -> MiscCodecResult<Vec<u8>> {
189    let bytes = text.as_bytes();
190    let mut output = Vec::with_capacity(bytes.len());
191    let mut index = 0;
192    while index < bytes.len() {
193        let (decoded, consumed) =
194            percent_decode_byte(bytes, index, plus_as_space)?;
195        output.push(decoded);
196        index += consumed;
197    }
198    Ok(output)
199}
200
201/// Percent-encodes one byte into `output`.
202///
203/// # Parameters
204/// - `byte`: Byte to encode.
205/// - `output`: Destination unit buffer.
206/// - `index`: Start index in `output`.
207/// - `space_as_plus`: Whether spaces should be encoded as `+`.
208///
209/// # Returns
210/// Number of units written.
211#[inline]
212pub(crate) fn percent_encode_byte(
213    byte: u8,
214    output: &mut [u8],
215    index: usize,
216    space_as_plus: bool,
217) -> usize {
218    if byte == b' ' && space_as_plus {
219        output[index] = b'+';
220        return 1;
221    }
222    if is_unreserved(byte) {
223        output[index] = byte;
224        return 1;
225    }
226    output[index] = b'%';
227    output[index + 1] = percent_hex_digit(byte >> 4) as u8;
228    output[index + 2] = percent_hex_digit(byte & 0x0f) as u8;
229    3
230}
231
232/// Decodes one raw byte or `%XX` escape from `input`.
233///
234/// # Parameters
235/// - `input`: Encoded byte units.
236/// - `index`: Start index in `input`.
237/// - `plus_as_space`: Whether `+` should decode to a space byte.
238///
239/// # Returns
240/// Decoded byte and the number of consumed units.
241///
242/// # Errors
243/// Returns [`MiscCodecError::InvalidEscape`] for malformed `%XX` escapes.
244#[inline]
245pub(crate) fn percent_decode_byte(
246    input: &[u8],
247    index: usize,
248    plus_as_space: bool,
249) -> MiscCodecResult<(u8, usize)> {
250    let available = input.len().saturating_sub(index);
251    if available == 0 {
252        return Err(MiscCodecError::Incomplete {
253            required: 1,
254            available,
255        });
256    }
257    match input[index] {
258        b'%' => {
259            if available < 3 {
260                return Err(MiscCodecError::Incomplete {
261                    required: 3,
262                    available,
263                });
264            }
265            let (Some(&high_byte), Some(&low_byte)) =
266                (input.get(index + 1), input.get(index + 2))
267            else {
268                return Err(invalid_percent_escape(index));
269            };
270            let high = percent_hex_value(high_byte)
271                .ok_or_else(|| invalid_percent_escape(index))?;
272            let low = percent_hex_value(low_byte)
273                .ok_or_else(|| invalid_percent_escape(index))?;
274            Ok(((high << 4) | low, 3))
275        }
276        b'+' if plus_as_space => Ok((b' ', 1)),
277        byte => Ok((byte, 1)),
278    }
279}
280
281/// Builds a malformed percent escape error.
282///
283/// # Parameters
284/// - `index`: Byte index of the `%` marker in the original input.
285///
286/// # Returns
287/// An invalid escape error for a `%XX` sequence.
288fn invalid_percent_escape(index: usize) -> MiscCodecError {
289    MiscCodecError::InvalidEscape {
290        index,
291        escape: "%".to_owned(),
292        reason: "expected two hexadecimal digits".to_owned(),
293    }
294}
295
296/// Tests whether a byte may be left unescaped.
297///
298/// # Parameters
299/// - `byte`: Byte to inspect.
300///
301/// # Returns
302/// `true` for RFC 3986 unreserved bytes.
303#[inline(always)]
304fn is_unreserved(byte: u8) -> bool {
305    matches!(
306        byte,
307        b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~'
308    )
309}
310
311/// Converts one ASCII hex byte to its nibble value.
312///
313/// # Parameters
314/// - `byte`: ASCII byte to inspect.
315///
316/// # Returns
317/// Nibble value, or `None` when `byte` is not hex.
318#[inline(always)]
319fn percent_hex_value(byte: u8) -> Option<u8> {
320    match byte {
321        b'0'..=b'9' => Some(byte - b'0'),
322        b'a'..=b'f' => Some(byte - b'a' + 10),
323        b'A'..=b'F' => Some(byte - b'A' + 10),
324        _ => None,
325    }
326}
327
328/// Converts one nibble to an uppercase hexadecimal digit.
329///
330/// # Parameters
331/// - `value`: Nibble value.
332///
333/// # Returns
334/// Uppercase hexadecimal digit. Values above `0x0f` are masked to their low
335/// nibble.
336#[inline(always)]
337fn percent_hex_digit(value: u8) -> char {
338    UPPER_HEX_DIGITS[(value & 0x0f) as usize]
339}