coreutils_rs/base64/
core.rs1use std::io::{self, Read, Write};
2
3use base64_simd::AsOut;
4
5const BASE64_ENGINE: &base64_simd::Base64 = &base64_simd::STANDARD;
6
7const STREAM_ENCODE_CHUNK: usize = 4 * 1024 * 1024 - (4 * 1024 * 1024 % 3);
9
10const NOWRAP_CHUNK: usize = 4 * 1024 * 1024 - (4 * 1024 * 1024 % 3);
12
13pub fn encode_to_writer(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
16 if data.is_empty() {
17 return Ok(());
18 }
19
20 if wrap_col == 0 {
21 return encode_no_wrap(data, out);
22 }
23
24 encode_wrapped(data, wrap_col, out)
25}
26
27fn encode_no_wrap(data: &[u8], out: &mut impl Write) -> io::Result<()> {
31 let enc_max = BASE64_ENGINE.encoded_length(NOWRAP_CHUNK);
32 let mut buf = vec![0u8; enc_max];
33
34 for chunk in data.chunks(NOWRAP_CHUNK) {
35 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
36 let encoded = BASE64_ENGINE.encode(chunk, buf[..enc_len].as_out());
37 out.write_all(encoded)?;
38 }
39 Ok(())
40}
41
42fn encode_wrapped(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
46 let bytes_per_line = wrap_col * 3 / 4;
47
48 let lines_per_chunk = (4 * 1024 * 1024) / bytes_per_line;
51 let chunk_input = lines_per_chunk * bytes_per_line;
52 let chunk_encoded_max = BASE64_ENGINE.encoded_length(chunk_input);
53
54 let mut encode_buf = vec![0u8; chunk_encoded_max];
56 let wrapped_max = (lines_per_chunk + 1) * (wrap_col + 1);
58 let mut wrap_buf = vec![0u8; wrapped_max];
59
60 for chunk in data.chunks(chunk_input) {
61 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
62 let encoded = BASE64_ENGINE.encode(chunk, encode_buf[..enc_len].as_out());
63
64 let mut rp = 0;
66 let mut wp = 0;
67
68 while rp + wrap_col <= encoded.len() {
69 wrap_buf[wp..wp + wrap_col].copy_from_slice(&encoded[rp..rp + wrap_col]);
70 wp += wrap_col;
71 wrap_buf[wp] = b'\n';
72 wp += 1;
73 rp += wrap_col;
74 }
75
76 if rp < encoded.len() {
77 let remaining = encoded.len() - rp;
78 wrap_buf[wp..wp + remaining].copy_from_slice(&encoded[rp..rp + remaining]);
79 wp += remaining;
80 wrap_buf[wp] = b'\n';
81 wp += 1;
82 }
83
84 out.write_all(&wrap_buf[..wp])?;
86 }
87
88 Ok(())
89}
90
91pub fn decode_to_writer(data: &[u8], ignore_garbage: bool, out: &mut impl Write) -> io::Result<()> {
95 if data.is_empty() {
96 return Ok(());
97 }
98
99 if ignore_garbage {
100 let mut cleaned = strip_non_base64(data);
101 return decode_owned_clean(&mut cleaned, out);
102 }
103
104 decode_stripping_whitespace(data, out)
106}
107
108pub fn decode_owned(
112 data: &mut Vec<u8>,
113 ignore_garbage: bool,
114 out: &mut impl Write,
115) -> io::Result<()> {
116 if data.is_empty() {
117 return Ok(());
118 }
119
120 if ignore_garbage {
121 data.retain(|&b| is_base64_char(b));
122 } else {
123 strip_whitespace_inplace(data);
124 }
125
126 decode_owned_clean(data, out)
127}
128
129fn strip_whitespace_inplace(data: &mut Vec<u8>) {
132 let positions: Vec<usize> = memchr::memchr_iter(b'\n', data.as_slice()).collect();
134
135 if positions.is_empty() {
136 if data.iter().any(|&b| is_whitespace(b)) {
138 data.retain(|&b| !is_whitespace(b));
139 }
140 return;
141 }
142
143 let mut wp = 0;
145 let mut rp = 0;
146
147 for &pos in &positions {
148 if pos > rp {
149 let len = pos - rp;
150 data.copy_within(rp..pos, wp);
151 wp += len;
152 }
153 rp = pos + 1;
154 }
155
156 let data_len = data.len();
157 if rp < data_len {
158 let len = data_len - rp;
159 data.copy_within(rp..data_len, wp);
160 wp += len;
161 }
162
163 data.truncate(wp);
164
165 if data.iter().any(|&b| is_whitespace(b)) {
167 data.retain(|&b| !is_whitespace(b));
168 }
169}
170
171fn decode_stripping_whitespace(data: &[u8], out: &mut impl Write) -> io::Result<()> {
174 if memchr::memchr(b'\n', data).is_none() && !data.iter().any(|&b| is_whitespace(b)) {
176 return decode_borrowed_clean(out, data);
177 }
178
179 let mut clean = Vec::with_capacity(data.len());
181 let mut last = 0;
182 for pos in memchr::memchr_iter(b'\n', data) {
183 if pos > last {
184 clean.extend_from_slice(&data[last..pos]);
185 }
186 last = pos + 1;
187 }
188 if last < data.len() {
189 clean.extend_from_slice(&data[last..]);
190 }
191
192 if clean.iter().any(|&b| is_whitespace(b)) {
194 clean.retain(|&b| !is_whitespace(b));
195 }
196
197 decode_owned_clean(&mut clean, out)
198}
199
200fn decode_owned_clean(data: &mut [u8], out: &mut impl Write) -> io::Result<()> {
202 if data.is_empty() {
203 return Ok(());
204 }
205 match BASE64_ENGINE.decode_inplace(data) {
206 Ok(decoded) => out.write_all(decoded),
207 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
208 }
209}
210
211fn decode_borrowed_clean(out: &mut impl Write, data: &[u8]) -> io::Result<()> {
213 if data.is_empty() {
214 return Ok(());
215 }
216 match BASE64_ENGINE.decode_to_vec(data) {
217 Ok(decoded) => {
218 out.write_all(&decoded)?;
219 Ok(())
220 }
221 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
222 }
223}
224
225fn strip_non_base64(data: &[u8]) -> Vec<u8> {
227 data.iter()
228 .copied()
229 .filter(|&b| is_base64_char(b))
230 .collect()
231}
232
233#[inline]
235fn is_base64_char(b: u8) -> bool {
236 b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'='
237}
238
239#[inline]
241fn is_whitespace(b: u8) -> bool {
242 matches!(b, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
243}
244
245pub fn encode_stream(
249 reader: &mut impl Read,
250 wrap_col: usize,
251 writer: &mut impl Write,
252) -> io::Result<()> {
253 let mut buf = vec![0u8; STREAM_ENCODE_CHUNK];
254
255 let encode_buf_size = BASE64_ENGINE.encoded_length(STREAM_ENCODE_CHUNK);
256 let mut encode_buf = vec![0u8; encode_buf_size];
257
258 if wrap_col == 0 {
259 loop {
261 let n = read_full(reader, &mut buf)?;
262 if n == 0 {
263 break;
264 }
265 let enc_len = BASE64_ENGINE.encoded_length(n);
266 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
267 writer.write_all(encoded)?;
268 }
269 } else {
270 let max_wrapped = encode_buf_size + (encode_buf_size / wrap_col + 2);
273 let mut wrap_buf = vec![0u8; max_wrapped];
274 let mut col = 0usize;
275
276 loop {
277 let n = read_full(reader, &mut buf)?;
278 if n == 0 {
279 break;
280 }
281 let enc_len = BASE64_ENGINE.encoded_length(n);
282 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
283
284 let wp = build_wrapped_output(encoded, wrap_col, &mut col, &mut wrap_buf);
286 writer.write_all(&wrap_buf[..wp])?;
287 }
288
289 if col > 0 {
290 writer.write_all(b"\n")?;
291 }
292 }
293
294 Ok(())
295}
296
297#[inline]
301fn build_wrapped_output(
302 data: &[u8],
303 wrap_col: usize,
304 col: &mut usize,
305 wrap_buf: &mut [u8],
306) -> usize {
307 let mut rp = 0;
308 let mut wp = 0;
309
310 while rp < data.len() {
311 let space = wrap_col - *col;
312 let avail = data.len() - rp;
313
314 if avail <= space {
315 wrap_buf[wp..wp + avail].copy_from_slice(&data[rp..rp + avail]);
316 wp += avail;
317 *col += avail;
318 if *col == wrap_col {
319 wrap_buf[wp] = b'\n';
320 wp += 1;
321 *col = 0;
322 }
323 break;
324 } else {
325 wrap_buf[wp..wp + space].copy_from_slice(&data[rp..rp + space]);
326 wp += space;
327 wrap_buf[wp] = b'\n';
328 wp += 1;
329 rp += space;
330 *col = 0;
331 }
332 }
333
334 wp
335}
336
337pub fn decode_stream(
341 reader: &mut impl Read,
342 ignore_garbage: bool,
343 writer: &mut impl Write,
344) -> io::Result<()> {
345 let mut data = Vec::new();
346 reader.read_to_end(&mut data)?;
347
348 if ignore_garbage {
349 data.retain(|&b| is_base64_char(b));
350 } else {
351 strip_whitespace_inplace(&mut data);
352 }
353
354 decode_owned_clean(&mut data, writer)
355}
356
357fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
359 let mut total = 0;
360 while total < buf.len() {
361 match reader.read(&mut buf[total..]) {
362 Ok(0) => break,
363 Ok(n) => total += n,
364 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
365 Err(e) => return Err(e),
366 }
367 }
368 Ok(total)
369}