pocopine_codec/lib.rs
1//! Shared encoding / codec utilities for the pocopine workspace.
2//!
3//! The point of this crate is that crates never re-implement encoding helpers or
4//! their serde adapters and never reach for `base64` or `percent-encoding`
5//! directly. Add new codecs here rather than inlining them per crate.
6//!
7//! ```
8//! use pocopine_codec::{base64_decode, base64_encode};
9//!
10//! assert_eq!(base64_encode(b"hi"), "aGk=");
11//! assert_eq!(base64_decode("aGk=").unwrap(), b"hi");
12//! ```
13//!
14//! Percent-encoding goes through one component encoder (RFC 3986
15//! "unreserved" set — what URL path segments, query parts, and fragments
16//! want) plus a lossy decoder. Pass a custom [`AsciiSet`] to
17//! [`percent_encode_set`] when a backend needs a different escape set.
18//!
19//! ```
20//! use pocopine_codec::{percent_decode, percent_encode};
21//!
22//! assert_eq!(percent_encode("a b/c~d"), "a%20b%2Fc~d");
23//! assert_eq!(percent_decode("a%20b+c", true), "a b c");
24//! ```
25//!
26//! For a `Vec<u8>` struct field that should serialize as a base64 string, use the
27//! [`base64_bytes`] serde adapter:
28//!
29//! ```
30//! use serde::{Deserialize, Serialize};
31//!
32//! #[derive(Serialize, Deserialize)]
33//! struct Chunk {
34//! #[serde(with = "pocopine_codec::base64_bytes", default, skip_serializing_if = "Vec::is_empty")]
35//! payload: Vec<u8>,
36//! }
37//! ```
38//!
39//! This crate is `no_std` (it only needs `alloc`).
40
41#![cfg_attr(not(test), no_std)]
42
43extern crate alloc;
44
45use alloc::string::{String, ToString};
46use alloc::vec::Vec;
47
48pub use base64;
49pub use percent_encoding;
50// Re-exported so callers can express a custom percent-encode set
51// (e.g. a backend-specific path-segment set) without depending on
52// `percent-encoding` directly. Pair with [`percent_encode_set`].
53pub use percent_encoding::{AsciiSet, CONTROLS, NON_ALPHANUMERIC};
54
55use base64::engine::general_purpose::STANDARD;
56use base64::Engine as _;
57use percent_encoding::{percent_decode_str, utf8_percent_encode};
58
59/// Encode bytes as a standard (padded) base64 string.
60pub fn base64_encode(bytes: &[u8]) -> String {
61 STANDARD.encode(bytes)
62}
63
64/// Decode a standard (padded) base64 string.
65pub fn base64_decode(encoded: &str) -> Result<Vec<u8>, base64::DecodeError> {
66 STANDARD.decode(encoded)
67}
68
69/// RFC 3986 "component" encode set: everything **except** the unreserved
70/// set `A-Z` / `a-z` / `0-9` / `-` / `.` / `_` / `~` is percent-encoded
71/// (uppercase hex). Non-ASCII bytes are always encoded. This is the
72/// conservative encoder URL path segments, query keys/values, and
73/// fragments want — equivalent to JS `encodeURIComponent` minus its
74/// `!*'()` carve-outs.
75const COMPONENT: &AsciiSet = &NON_ALPHANUMERIC
76 .remove(b'-')
77 .remove(b'.')
78 .remove(b'_')
79 .remove(b'~');
80
81/// Percent-encode `s` with the RFC 3986 component set (see [`COMPONENT`]).
82///
83/// Use this for a single URL path segment, query key/value, or fragment.
84pub fn percent_encode(s: &str) -> String {
85 utf8_percent_encode(s, COMPONENT).to_string()
86}
87
88/// Percent-encode `s` with the component set, appending into `out`.
89///
90/// Allocation-free variant of [`percent_encode`] for callers building a
91/// URL piece by piece.
92pub fn percent_encode_into(out: &mut String, s: &str) {
93 out.extend(utf8_percent_encode(s, COMPONENT));
94}
95
96/// Percent-encode `s` with a caller-supplied [`AsciiSet`].
97///
98/// For the common case prefer [`percent_encode`]. Reach for this only when
99/// a backend needs a different escape set — build one from the re-exported
100/// [`CONTROLS`] / [`NON_ALPHANUMERIC`] + [`AsciiSet::add`]/[`AsciiSet::remove`].
101pub fn percent_encode_set(s: &str, set: &'static AsciiSet) -> String {
102 utf8_percent_encode(s, set).to_string()
103}
104
105/// Percent-decode `s`, lossily (invalid UTF-8 becomes U+FFFD, and a `%`
106/// not followed by two hex digits is passed through verbatim).
107///
108/// When `plus_as_space` is set, `+` is first replaced with a space — the
109/// `application/x-www-form-urlencoded` query semantics. Leave it `false`
110/// for path segments, where `+` is a literal plus.
111pub fn percent_decode(s: &str, plus_as_space: bool) -> String {
112 if plus_as_space && s.contains('+') {
113 let replaced = s.replace('+', " ");
114 percent_decode_str(&replaced)
115 .decode_utf8_lossy()
116 .into_owned()
117 } else {
118 percent_decode_str(s).decode_utf8_lossy().into_owned()
119 }
120}
121
122/// Serde adapter for a `Vec<u8>` field encoded as a base64 string.
123///
124/// Use via `#[serde(with = "pocopine_codec::base64_bytes")]`. Pair with
125/// `#[serde(default, skip_serializing_if = "Vec::is_empty")]` to omit empty values.
126pub mod base64_bytes {
127 use alloc::string::String;
128 use alloc::vec::Vec;
129
130 use serde::{Deserialize, Deserializer, Serializer};
131
132 pub fn serialize<S: Serializer>(bytes: &[u8], serializer: S) -> Result<S::Ok, S::Error> {
133 serializer.serialize_str(&super::base64_encode(bytes))
134 }
135
136 pub fn deserialize<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<u8>, D::Error> {
137 let encoded = String::deserialize(deserializer)?;
138 super::base64_decode(&encoded).map_err(serde::de::Error::custom)
139 }
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145
146 #[test]
147 fn base64_round_trips() {
148 for input in [b"".as_slice(), b"a", b"hello world", &[0u8, 255, 128, 1]] {
149 let encoded = base64_encode(input);
150 assert_eq!(base64_decode(&encoded).unwrap(), input);
151 }
152 }
153
154 #[test]
155 fn known_base64_vectors() {
156 assert_eq!(base64_encode(b"hi"), "aGk=");
157 assert_eq!(base64_decode("aGk=").unwrap(), b"hi");
158 }
159
160 #[test]
161 fn invalid_base64_is_rejected() {
162 assert!(base64_decode("not valid base64!!!").is_err());
163 }
164
165 #[derive(serde::Serialize, serde::Deserialize, PartialEq, Debug)]
166 struct Chunk {
167 #[serde(with = "base64_bytes", default, skip_serializing_if = "Vec::is_empty")]
168 payload: Vec<u8>,
169 }
170
171 #[test]
172 fn percent_encode_keeps_unreserved_and_escapes_the_rest() {
173 // Unreserved set passes through untouched.
174 assert_eq!(percent_encode("AZaz09-._~"), "AZaz09-._~");
175 // Reserved / delimiter bytes become uppercase %XX.
176 assert_eq!(percent_encode("a b/c?d#e"), "a%20b%2Fc%3Fd%23e");
177 // Non-ASCII is encoded byte-by-byte (UTF-8).
178 assert_eq!(percent_encode("é"), "%C3%A9");
179 }
180
181 #[test]
182 fn percent_encode_into_appends() {
183 let mut out = String::from("p/");
184 percent_encode_into(&mut out, "a b");
185 assert_eq!(out, "p/a%20b");
186 }
187
188 #[test]
189 fn percent_encode_set_honors_custom_set() {
190 // A set that only escapes spaces leaves `/` and `~` alone.
191 const SPACE_ONLY: &AsciiSet = &CONTROLS.add(b' ');
192 assert_eq!(percent_encode_set("a b/c~d", SPACE_ONLY), "a%20b/c~d");
193 // NON_ALPHANUMERIC escapes the unreserved punctuation too.
194 assert_eq!(percent_encode_set("a-b.c", NON_ALPHANUMERIC), "a%2Db%2Ec");
195 }
196
197 #[test]
198 fn percent_decode_round_trips_component() {
199 for input in ["", "plain", "a b/c?d#e", "é", "100%done"] {
200 assert_eq!(percent_decode(&percent_encode(input), false), input);
201 }
202 }
203
204 #[test]
205 fn percent_decode_plus_semantics() {
206 // Query semantics: `+` is a space.
207 assert_eq!(percent_decode("a+b%20c", true), "a b c");
208 // Path semantics: `+` is a literal plus.
209 assert_eq!(percent_decode("a+b%20c", false), "a+b c");
210 }
211
212 #[test]
213 fn percent_decode_is_lossy_and_passes_through_stray_percent() {
214 // Invalid UTF-8 becomes the replacement character.
215 assert_eq!(percent_decode("%FF", false), "\u{FFFD}");
216 // A `%` not followed by two hex digits is left verbatim.
217 assert_eq!(percent_decode("100%zz", false), "100%zz");
218 }
219
220 #[test]
221 fn serde_adapter_round_trips_and_omits_empty() {
222 let chunk = Chunk {
223 payload: alloc::vec![1, 2, 3, 4],
224 };
225 let json = serde_json::to_string(&chunk).unwrap();
226 assert!(json.contains("AQIDBA=="));
227 assert_eq!(serde_json::from_str::<Chunk>(&json).unwrap(), chunk);
228
229 let empty = Chunk {
230 payload: Vec::new(),
231 };
232 let json = serde_json::to_string(&empty).unwrap();
233 assert_eq!(json, "{}");
234 assert_eq!(serde_json::from_str::<Chunk>("{}").unwrap(), empty);
235 }
236}