qubit_codec/percent_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Percent text codec.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19/// Encodes and decodes percent-encoded UTF-8 text.
20#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
21pub struct PercentCodec;
22
23impl PercentCodec {
24 /// Creates a percent codec.
25 ///
26 /// # Returns
27 /// Percent codec.
28 pub fn new() -> Self {
29 Self
30 }
31
32 /// Encodes text using percent encoding.
33 ///
34 /// # Parameters
35 /// - `text`: UTF-8 text to encode.
36 ///
37 /// # Returns
38 /// Percent-encoded text.
39 pub fn encode(&self, text: &str) -> String {
40 percent_encode_bytes(text.as_bytes(), false)
41 }
42
43 /// Decodes percent-encoded UTF-8 text.
44 ///
45 /// # Parameters
46 /// - `text`: Percent-encoded text.
47 ///
48 /// # Returns
49 /// Decoded UTF-8 text.
50 ///
51 /// # Errors
52 /// Returns [`CodecError`] when a percent escape is malformed or decoded
53 /// bytes are not valid UTF-8.
54 pub fn decode(&self, text: &str) -> CodecResult<String> {
55 String::from_utf8(percent_decode_bytes(text, false)?).map_err(CodecError::from)
56 }
57}
58
59impl Encoder<str> for PercentCodec {
60 type Error = CodecError;
61 type Output = String;
62
63 /// Encodes text using percent encoding.
64 fn encode(&self, input: &str) -> Result<Self::Output, Self::Error> {
65 Ok(PercentCodec::encode(self, input))
66 }
67}
68
69impl Decoder<str> for PercentCodec {
70 type Error = CodecError;
71 type Output = String;
72
73 /// Decodes percent-encoded text.
74 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
75 PercentCodec::decode(self, input)
76 }
77}
78
79/// Percent-encodes UTF-8 bytes.
80///
81/// # Parameters
82/// - `bytes`: Bytes to encode.
83/// - `space_as_plus`: Whether spaces should be encoded as `+`.
84///
85/// # Returns
86/// Encoded text.
87pub(crate) fn percent_encode_bytes(bytes: &[u8], space_as_plus: bool) -> String {
88 let mut output = String::with_capacity(bytes.len());
89 for byte in bytes {
90 if *byte == b' ' && space_as_plus {
91 output.push('+');
92 } else if is_unreserved(*byte) {
93 output.push(*byte as char);
94 } else {
95 output.push('%');
96 output.push(percent_hex_digit(byte >> 4));
97 output.push(percent_hex_digit(byte & 0x0f));
98 }
99 }
100 output
101}
102
103/// Percent-decodes UTF-8 bytes.
104///
105/// # Parameters
106/// - `text`: Text to decode.
107/// - `plus_as_space`: Whether `+` should decode to a space byte.
108///
109/// # Returns
110/// Decoded bytes.
111///
112/// # Errors
113/// Returns [`CodecError::InvalidEscape`] for malformed escapes.
114pub(crate) fn percent_decode_bytes(text: &str, plus_as_space: bool) -> CodecResult<Vec<u8>> {
115 let bytes = text.as_bytes();
116 let mut output = Vec::with_capacity(bytes.len());
117 let mut index = 0;
118 while let Some(&byte) = bytes.get(index) {
119 match byte {
120 b'%' => {
121 let (Some(&high_byte), Some(&low_byte)) =
122 (bytes.get(index + 1), bytes.get(index + 2))
123 else {
124 return Err(invalid_percent_escape(index));
125 };
126 let high =
127 percent_hex_value(high_byte).ok_or_else(|| invalid_percent_escape(index))?;
128 let low =
129 percent_hex_value(low_byte).ok_or_else(|| invalid_percent_escape(index))?;
130 output.push((high << 4) | low);
131 index += 3;
132 }
133 b'+' if plus_as_space => {
134 output.push(b' ');
135 index += 1;
136 }
137 byte => {
138 output.push(byte);
139 index += 1;
140 }
141 }
142 }
143 Ok(output)
144}
145
146/// Builds a malformed percent escape error.
147///
148/// # Parameters
149/// - `index`: Byte index of the `%` marker in the original input.
150///
151/// # Returns
152/// An invalid escape error for a `%XX` sequence.
153fn invalid_percent_escape(index: usize) -> CodecError {
154 CodecError::InvalidEscape {
155 index,
156 escape: "%".to_owned(),
157 reason: "expected two hexadecimal digits".to_owned(),
158 }
159}
160
161/// Tests whether a byte may be left unescaped.
162///
163/// # Parameters
164/// - `byte`: Byte to inspect.
165///
166/// # Returns
167/// `true` for RFC 3986 unreserved bytes.
168fn is_unreserved(byte: u8) -> bool {
169 matches!(
170 byte,
171 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~'
172 )
173}
174
175/// Converts one ASCII hex byte to its nibble value.
176///
177/// # Parameters
178/// - `byte`: ASCII byte to inspect.
179///
180/// # Returns
181/// Nibble value, or `None` when `byte` is not hex.
182fn percent_hex_value(byte: u8) -> Option<u8> {
183 match byte {
184 b'0'..=b'9' => Some(byte - b'0'),
185 b'a'..=b'f' => Some(byte - b'a' + 10),
186 b'A'..=b'F' => Some(byte - b'A' + 10),
187 _ => None,
188 }
189}
190
191/// Converts one nibble to an uppercase hexadecimal digit.
192///
193/// # Parameters
194/// - `value`: Nibble value.
195///
196/// # Returns
197/// Uppercase hexadecimal digit. Values above `0x0f` are masked to their low nibble.
198fn percent_hex_digit(value: u8) -> char {
199 match value & 0x0f {
200 0x0 => '0',
201 0x1 => '1',
202 0x2 => '2',
203 0x3 => '3',
204 0x4 => '4',
205 0x5 => '5',
206 0x6 => '6',
207 0x7 => '7',
208 0x8 => '8',
209 0x9 => '9',
210 0x0a => 'A',
211 0x0b => 'B',
212 0x0c => 'C',
213 0x0d => 'D',
214 0x0e => 'E',
215 _ => 'F',
216 }
217}