Skip to main content

qubit_codec/
percent_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Percent text codec.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16    Encoder,
17};
18
19/// Encodes and decodes percent-encoded UTF-8 text.
20#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
21pub struct PercentCodec;
22
23impl PercentCodec {
24    /// Creates a percent codec.
25    ///
26    /// # Returns
27    /// Percent codec.
28    pub fn new() -> Self {
29        Self
30    }
31
32    /// Encodes text using percent encoding.
33    ///
34    /// # Parameters
35    /// - `text`: UTF-8 text to encode.
36    ///
37    /// # Returns
38    /// Percent-encoded text.
39    pub fn encode(&self, text: &str) -> String {
40        percent_encode_bytes(text.as_bytes(), false)
41    }
42
43    /// Decodes percent-encoded UTF-8 text.
44    ///
45    /// # Parameters
46    /// - `text`: Percent-encoded text.
47    ///
48    /// # Returns
49    /// Decoded UTF-8 text.
50    ///
51    /// # Errors
52    /// Returns [`CodecError`] when a percent escape is malformed or decoded
53    /// bytes are not valid UTF-8.
54    pub fn decode(&self, text: &str) -> CodecResult<String> {
55        String::from_utf8(percent_decode_bytes(text, false)?).map_err(CodecError::from)
56    }
57}
58
59impl Encoder<str> for PercentCodec {
60    type Error = CodecError;
61    type Output = String;
62
63    /// Encodes text using percent encoding.
64    fn encode(&self, input: &str) -> Result<Self::Output, Self::Error> {
65        Ok(PercentCodec::encode(self, input))
66    }
67}
68
69impl Decoder<str> for PercentCodec {
70    type Error = CodecError;
71    type Output = String;
72
73    /// Decodes percent-encoded text.
74    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
75        PercentCodec::decode(self, input)
76    }
77}
78
79/// Percent-encodes UTF-8 bytes.
80///
81/// # Parameters
82/// - `bytes`: Bytes to encode.
83/// - `space_as_plus`: Whether spaces should be encoded as `+`.
84///
85/// # Returns
86/// Encoded text.
87pub(crate) fn percent_encode_bytes(bytes: &[u8], space_as_plus: bool) -> String {
88    let mut output = String::with_capacity(bytes.len());
89    for byte in bytes {
90        if *byte == b' ' && space_as_plus {
91            output.push('+');
92        } else if is_unreserved(*byte) {
93            output.push(*byte as char);
94        } else {
95            output.push('%');
96            output.push(percent_hex_digit(byte >> 4));
97            output.push(percent_hex_digit(byte & 0x0f));
98        }
99    }
100    output
101}
102
103/// Percent-decodes UTF-8 bytes.
104///
105/// # Parameters
106/// - `text`: Text to decode.
107/// - `plus_as_space`: Whether `+` should decode to a space byte.
108///
109/// # Returns
110/// Decoded bytes.
111///
112/// # Errors
113/// Returns [`CodecError::InvalidEscape`] for malformed escapes.
114pub(crate) fn percent_decode_bytes(text: &str, plus_as_space: bool) -> CodecResult<Vec<u8>> {
115    let bytes = text.as_bytes();
116    let mut output = Vec::with_capacity(bytes.len());
117    let mut index = 0;
118    while let Some(&byte) = bytes.get(index) {
119        match byte {
120            b'%' => {
121                let (Some(&high_byte), Some(&low_byte)) = (bytes.get(index + 1), bytes.get(index + 2)) else {
122                    return Err(invalid_percent_escape(index));
123                };
124                let high = percent_hex_value(high_byte).ok_or_else(|| invalid_percent_escape(index))?;
125                let low = percent_hex_value(low_byte).ok_or_else(|| invalid_percent_escape(index))?;
126                output.push((high << 4) | low);
127                index += 3;
128            }
129            b'+' if plus_as_space => {
130                output.push(b' ');
131                index += 1;
132            }
133            byte => {
134                output.push(byte);
135                index += 1;
136            }
137        }
138    }
139    Ok(output)
140}
141
142/// Builds a malformed percent escape error.
143///
144/// # Parameters
145/// - `index`: Byte index of the `%` marker in the original input.
146///
147/// # Returns
148/// An invalid escape error for a `%XX` sequence.
149fn invalid_percent_escape(index: usize) -> CodecError {
150    CodecError::InvalidEscape {
151        index,
152        escape: "%".to_owned(),
153        reason: "expected two hexadecimal digits".to_owned(),
154    }
155}
156
157/// Tests whether a byte may be left unescaped.
158///
159/// # Parameters
160/// - `byte`: Byte to inspect.
161///
162/// # Returns
163/// `true` for RFC 3986 unreserved bytes.
164fn is_unreserved(byte: u8) -> bool {
165    matches!(
166        byte,
167        b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~'
168    )
169}
170
171/// Converts one ASCII hex byte to its nibble value.
172///
173/// # Parameters
174/// - `byte`: ASCII byte to inspect.
175///
176/// # Returns
177/// Nibble value, or `None` when `byte` is not hex.
178fn percent_hex_value(byte: u8) -> Option<u8> {
179    match byte {
180        b'0'..=b'9' => Some(byte - b'0'),
181        b'a'..=b'f' => Some(byte - b'a' + 10),
182        b'A'..=b'F' => Some(byte - b'A' + 10),
183        _ => None,
184    }
185}
186
187/// Converts one nibble to an uppercase hexadecimal digit.
188///
189/// # Parameters
190/// - `value`: Nibble value.
191///
192/// # Returns
193/// Uppercase hexadecimal digit. Values above `0x0f` are masked to their low nibble.
194fn percent_hex_digit(value: u8) -> char {
195    match value & 0x0f {
196        0x0 => '0',
197        0x1 => '1',
198        0x2 => '2',
199        0x3 => '3',
200        0x4 => '4',
201        0x5 => '5',
202        0x6 => '6',
203        0x7 => '7',
204        0x8 => '8',
205        0x9 => '9',
206        0x0a => 'A',
207        0x0b => 'B',
208        0x0c => 'C',
209        0x0d => 'D',
210        0x0e => 'E',
211        _ => 'F',
212    }
213}