coreutils_rs/base64/
core.rs1use std::io::{self, Read, Write};
2
3use base64_simd::AsOut;
4
5const BASE64_ENGINE: &base64_simd::Base64 = &base64_simd::STANDARD;
6
7const STREAM_ENCODE_CHUNK: usize = 8 * 1024 * 1024 - (8 * 1024 * 1024 % 3);
9
10const NOWRAP_CHUNK: usize = 8 * 1024 * 1024 - (8 * 1024 * 1024 % 3);
12
13pub fn encode_to_writer(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
16 if data.is_empty() {
17 return Ok(());
18 }
19
20 if wrap_col == 0 {
21 return encode_no_wrap(data, out);
22 }
23
24 encode_wrapped(data, wrap_col, out)
25}
26
27fn encode_no_wrap(data: &[u8], out: &mut impl Write) -> io::Result<()> {
31 let enc_max = BASE64_ENGINE.encoded_length(NOWRAP_CHUNK);
32 let mut buf = vec![0u8; enc_max];
33
34 for chunk in data.chunks(NOWRAP_CHUNK) {
35 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
36 let encoded = BASE64_ENGINE.encode(chunk, buf[..enc_len].as_out());
37 out.write_all(encoded)?;
38 }
39 Ok(())
40}
41
42fn encode_wrapped(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
46 let bytes_per_line = wrap_col * 3 / 4;
47
48 let lines_per_chunk = (8 * 1024 * 1024) / bytes_per_line;
51 let chunk_input = lines_per_chunk * bytes_per_line;
52 let chunk_encoded_max = BASE64_ENGINE.encoded_length(chunk_input);
53
54 let mut encode_buf = vec![0u8; chunk_encoded_max];
56 let wrapped_max = (lines_per_chunk + 1) * (wrap_col + 1);
58 let mut wrap_buf = vec![0u8; wrapped_max];
59
60 let line_out = wrap_col + 1; for chunk in data.chunks(chunk_input) {
63 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
64 let encoded = BASE64_ENGINE.encode(chunk, encode_buf[..enc_len].as_out());
65
66 let mut rp = 0;
68 let mut wp = 0;
69
70 while rp + 4 * wrap_col <= encoded.len() {
72 unsafe {
73 let src = encoded.as_ptr().add(rp);
74 let dst = wrap_buf.as_mut_ptr().add(wp);
75
76 std::ptr::copy_nonoverlapping(src, dst, wrap_col);
77 *dst.add(wrap_col) = b'\n';
78
79 std::ptr::copy_nonoverlapping(src.add(wrap_col), dst.add(line_out), wrap_col);
80 *dst.add(line_out + wrap_col) = b'\n';
81
82 std::ptr::copy_nonoverlapping(
83 src.add(2 * wrap_col),
84 dst.add(2 * line_out),
85 wrap_col,
86 );
87 *dst.add(2 * line_out + wrap_col) = b'\n';
88
89 std::ptr::copy_nonoverlapping(
90 src.add(3 * wrap_col),
91 dst.add(3 * line_out),
92 wrap_col,
93 );
94 *dst.add(3 * line_out + wrap_col) = b'\n';
95 }
96 rp += 4 * wrap_col;
97 wp += 4 * line_out;
98 }
99
100 while rp + wrap_col <= encoded.len() {
102 wrap_buf[wp..wp + wrap_col].copy_from_slice(&encoded[rp..rp + wrap_col]);
103 wp += wrap_col;
104 wrap_buf[wp] = b'\n';
105 wp += 1;
106 rp += wrap_col;
107 }
108
109 if rp < encoded.len() {
111 let remaining = encoded.len() - rp;
112 wrap_buf[wp..wp + remaining].copy_from_slice(&encoded[rp..rp + remaining]);
113 wp += remaining;
114 wrap_buf[wp] = b'\n';
115 wp += 1;
116 }
117
118 out.write_all(&wrap_buf[..wp])?;
119 }
120
121 Ok(())
122}
123
124pub fn decode_to_writer(data: &[u8], ignore_garbage: bool, out: &mut impl Write) -> io::Result<()> {
128 if data.is_empty() {
129 return Ok(());
130 }
131
132 if ignore_garbage {
133 let mut cleaned = strip_non_base64(data);
134 return decode_owned_clean(&mut cleaned, out);
135 }
136
137 decode_stripping_whitespace(data, out)
139}
140
141pub fn decode_owned(
145 data: &mut Vec<u8>,
146 ignore_garbage: bool,
147 out: &mut impl Write,
148) -> io::Result<()> {
149 if data.is_empty() {
150 return Ok(());
151 }
152
153 if ignore_garbage {
154 data.retain(|&b| is_base64_char(b));
155 } else {
156 strip_whitespace_inplace(data);
157 }
158
159 decode_owned_clean(data, out)
160}
161
162fn strip_whitespace_inplace(data: &mut Vec<u8>) {
165 let positions: Vec<usize> = memchr::memchr_iter(b'\n', data.as_slice()).collect();
167
168 if positions.is_empty() {
169 if data.iter().any(|&b| is_whitespace(b)) {
171 data.retain(|&b| !is_whitespace(b));
172 }
173 return;
174 }
175
176 let mut wp = 0;
178 let mut rp = 0;
179
180 for &pos in &positions {
181 if pos > rp {
182 let len = pos - rp;
183 data.copy_within(rp..pos, wp);
184 wp += len;
185 }
186 rp = pos + 1;
187 }
188
189 let data_len = data.len();
190 if rp < data_len {
191 let len = data_len - rp;
192 data.copy_within(rp..data_len, wp);
193 wp += len;
194 }
195
196 data.truncate(wp);
197
198 if data.iter().any(|&b| is_whitespace(b)) {
200 data.retain(|&b| !is_whitespace(b));
201 }
202}
203
204fn decode_stripping_whitespace(data: &[u8], out: &mut impl Write) -> io::Result<()> {
207 if memchr::memchr(b'\n', data).is_none() && !data.iter().any(|&b| is_whitespace(b)) {
209 return decode_borrowed_clean(out, data);
210 }
211
212 let mut clean = Vec::with_capacity(data.len());
214 let mut last = 0;
215 for pos in memchr::memchr_iter(b'\n', data) {
216 if pos > last {
217 clean.extend_from_slice(&data[last..pos]);
218 }
219 last = pos + 1;
220 }
221 if last < data.len() {
222 clean.extend_from_slice(&data[last..]);
223 }
224
225 if clean.iter().any(|&b| is_whitespace(b)) {
227 clean.retain(|&b| !is_whitespace(b));
228 }
229
230 decode_owned_clean(&mut clean, out)
231}
232
233fn decode_owned_clean(data: &mut [u8], out: &mut impl Write) -> io::Result<()> {
235 if data.is_empty() {
236 return Ok(());
237 }
238 match BASE64_ENGINE.decode_inplace(data) {
239 Ok(decoded) => out.write_all(decoded),
240 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
241 }
242}
243
244fn decode_borrowed_clean(out: &mut impl Write, data: &[u8]) -> io::Result<()> {
246 if data.is_empty() {
247 return Ok(());
248 }
249 match BASE64_ENGINE.decode_to_vec(data) {
250 Ok(decoded) => {
251 out.write_all(&decoded)?;
252 Ok(())
253 }
254 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
255 }
256}
257
258fn strip_non_base64(data: &[u8]) -> Vec<u8> {
260 data.iter()
261 .copied()
262 .filter(|&b| is_base64_char(b))
263 .collect()
264}
265
266#[inline]
268fn is_base64_char(b: u8) -> bool {
269 b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'='
270}
271
272#[inline]
274fn is_whitespace(b: u8) -> bool {
275 matches!(b, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
276}
277
278pub fn encode_stream(
282 reader: &mut impl Read,
283 wrap_col: usize,
284 writer: &mut impl Write,
285) -> io::Result<()> {
286 let mut buf = vec![0u8; STREAM_ENCODE_CHUNK];
287
288 let encode_buf_size = BASE64_ENGINE.encoded_length(STREAM_ENCODE_CHUNK);
289 let mut encode_buf = vec![0u8; encode_buf_size];
290
291 if wrap_col == 0 {
292 loop {
294 let n = read_full(reader, &mut buf)?;
295 if n == 0 {
296 break;
297 }
298 let enc_len = BASE64_ENGINE.encoded_length(n);
299 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
300 writer.write_all(encoded)?;
301 }
302 } else {
303 let max_wrapped = encode_buf_size + (encode_buf_size / wrap_col + 2);
306 let mut wrap_buf = vec![0u8; max_wrapped];
307 let mut col = 0usize;
308
309 loop {
310 let n = read_full(reader, &mut buf)?;
311 if n == 0 {
312 break;
313 }
314 let enc_len = BASE64_ENGINE.encoded_length(n);
315 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
316
317 let wp = build_wrapped_output(encoded, wrap_col, &mut col, &mut wrap_buf);
319 writer.write_all(&wrap_buf[..wp])?;
320 }
321
322 if col > 0 {
323 writer.write_all(b"\n")?;
324 }
325 }
326
327 Ok(())
328}
329
330#[inline]
334fn build_wrapped_output(
335 data: &[u8],
336 wrap_col: usize,
337 col: &mut usize,
338 wrap_buf: &mut [u8],
339) -> usize {
340 let mut rp = 0;
341 let mut wp = 0;
342
343 while rp < data.len() {
344 let space = wrap_col - *col;
345 let avail = data.len() - rp;
346
347 if avail <= space {
348 wrap_buf[wp..wp + avail].copy_from_slice(&data[rp..rp + avail]);
349 wp += avail;
350 *col += avail;
351 if *col == wrap_col {
352 wrap_buf[wp] = b'\n';
353 wp += 1;
354 *col = 0;
355 }
356 break;
357 } else {
358 wrap_buf[wp..wp + space].copy_from_slice(&data[rp..rp + space]);
359 wp += space;
360 wrap_buf[wp] = b'\n';
361 wp += 1;
362 rp += space;
363 *col = 0;
364 }
365 }
366
367 wp
368}
369
370pub fn decode_stream(
374 reader: &mut impl Read,
375 ignore_garbage: bool,
376 writer: &mut impl Write,
377) -> io::Result<()> {
378 let mut data = Vec::new();
379 reader.read_to_end(&mut data)?;
380
381 if ignore_garbage {
382 data.retain(|&b| is_base64_char(b));
383 } else {
384 strip_whitespace_inplace(&mut data);
385 }
386
387 decode_owned_clean(&mut data, writer)
388}
389
390fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
392 let mut total = 0;
393 while total < buf.len() {
394 match reader.read(&mut buf[total..]) {
395 Ok(0) => break,
396 Ok(n) => total += n,
397 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
398 Err(e) => return Err(e),
399 }
400 }
401 Ok(total)
402}