1use std::io::{self, Read, Write};
2
3use base64_simd::AsOut;
4use rayon::prelude::*;
5
6const BASE64_ENGINE: &base64_simd::Base64 = &base64_simd::STANDARD;
7
8const STREAM_ENCODE_CHUNK: usize = 12 * 1024 * 1024 - (12 * 1024 * 1024 % 3);
10
11const NOWRAP_CHUNK: usize = 2 * 1024 * 1024 - (2 * 1024 * 1024 % 3);
15
16const PARALLEL_ENCODE_THRESHOLD: usize = 32 * 1024 * 1024;
20
21pub fn encode_to_writer(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
24 if data.is_empty() {
25 return Ok(());
26 }
27
28 if wrap_col == 0 {
29 return encode_no_wrap(data, out);
30 }
31
32 encode_wrapped(data, wrap_col, out)
33}
34
35fn encode_no_wrap(data: &[u8], out: &mut impl Write) -> io::Result<()> {
37 if data.len() >= PARALLEL_ENCODE_THRESHOLD {
38 let num_threads = rayon::current_num_threads().max(1);
40 let raw_chunk = (data.len() + num_threads - 1) / num_threads;
41 let chunk_size = ((raw_chunk + 2) / 3) * 3;
43
44 let encoded_chunks: Vec<Vec<u8>> = data
45 .par_chunks(chunk_size)
46 .map(|chunk| {
47 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
48 let mut buf = vec![0u8; enc_len];
49 let encoded = BASE64_ENGINE.encode(chunk, buf[..enc_len].as_out());
50 let len = encoded.len();
51 buf.truncate(len);
52 buf
53 })
54 .collect();
55
56 for chunk in &encoded_chunks {
57 out.write_all(chunk)?;
58 }
59 return Ok(());
60 }
61
62 let actual_chunk = NOWRAP_CHUNK.min(data.len());
65 let enc_max = BASE64_ENGINE.encoded_length(actual_chunk);
66 let mut buf = vec![0u8; enc_max];
67
68 for chunk in data.chunks(NOWRAP_CHUNK) {
69 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
70 let encoded = BASE64_ENGINE.encode(chunk, buf[..enc_len].as_out());
71 out.write_all(encoded)?;
72 }
73 Ok(())
74}
75
76fn encode_wrapped(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
78 let bytes_per_line = wrap_col * 3 / 4;
79
80 if data.len() >= PARALLEL_ENCODE_THRESHOLD && bytes_per_line > 0 {
81 let num_threads = rayon::current_num_threads().max(1);
84 let lines_per_thread = ((data.len() / bytes_per_line) + num_threads - 1) / num_threads;
85 let chunk_input = (lines_per_thread * bytes_per_line).max(bytes_per_line);
86
87 let wrapped_chunks: Vec<Vec<u8>> = data
88 .par_chunks(chunk_input)
89 .map(|chunk| {
90 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
91 let mut encode_buf = vec![0u8; enc_len];
92 let encoded = BASE64_ENGINE.encode(chunk, encode_buf[..enc_len].as_out());
93
94 let line_out = wrap_col + 1;
96 let max_lines = (encoded.len() + wrap_col - 1) / wrap_col + 1;
97 let mut wrap_buf = vec![0u8; max_lines * line_out];
98 let wp = wrap_encoded(encoded, wrap_col, &mut wrap_buf);
99 wrap_buf.truncate(wp);
100 wrap_buf
101 })
102 .collect();
103
104 for chunk in &wrapped_chunks {
105 out.write_all(chunk)?;
106 }
107 return Ok(());
108 }
109
110 let lines_per_chunk = (2 * 1024 * 1024) / bytes_per_line.max(1);
114 let chunk_input = lines_per_chunk * bytes_per_line.max(1);
115 let effective_chunk = chunk_input.max(1).min(data.len());
116 let chunk_encoded_max = BASE64_ENGINE.encoded_length(effective_chunk);
117 let mut encode_buf = vec![0u8; chunk_encoded_max];
118 let effective_lines = effective_chunk / bytes_per_line.max(1) + 1;
119 let wrapped_max = (effective_lines + 1) * (wrap_col + 1);
120 let mut wrap_buf = vec![0u8; wrapped_max];
121
122 for chunk in data.chunks(chunk_input.max(1)) {
123 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
124 let encoded = BASE64_ENGINE.encode(chunk, encode_buf[..enc_len].as_out());
125 let wp = wrap_encoded(encoded, wrap_col, &mut wrap_buf);
126 out.write_all(&wrap_buf[..wp])?;
127 }
128
129 Ok(())
130}
131
132#[inline]
135fn wrap_encoded(encoded: &[u8], wrap_col: usize, wrap_buf: &mut [u8]) -> usize {
136 let line_out = wrap_col + 1;
137 let mut rp = 0;
138 let mut wp = 0;
139
140 while rp + 4 * wrap_col <= encoded.len() {
142 unsafe {
143 let src = encoded.as_ptr().add(rp);
144 let dst = wrap_buf.as_mut_ptr().add(wp);
145
146 std::ptr::copy_nonoverlapping(src, dst, wrap_col);
147 *dst.add(wrap_col) = b'\n';
148
149 std::ptr::copy_nonoverlapping(src.add(wrap_col), dst.add(line_out), wrap_col);
150 *dst.add(line_out + wrap_col) = b'\n';
151
152 std::ptr::copy_nonoverlapping(src.add(2 * wrap_col), dst.add(2 * line_out), wrap_col);
153 *dst.add(2 * line_out + wrap_col) = b'\n';
154
155 std::ptr::copy_nonoverlapping(src.add(3 * wrap_col), dst.add(3 * line_out), wrap_col);
156 *dst.add(3 * line_out + wrap_col) = b'\n';
157 }
158 rp += 4 * wrap_col;
159 wp += 4 * line_out;
160 }
161
162 while rp + wrap_col <= encoded.len() {
164 wrap_buf[wp..wp + wrap_col].copy_from_slice(&encoded[rp..rp + wrap_col]);
165 wp += wrap_col;
166 wrap_buf[wp] = b'\n';
167 wp += 1;
168 rp += wrap_col;
169 }
170
171 if rp < encoded.len() {
173 let remaining = encoded.len() - rp;
174 wrap_buf[wp..wp + remaining].copy_from_slice(&encoded[rp..rp + remaining]);
175 wp += remaining;
176 wrap_buf[wp] = b'\n';
177 wp += 1;
178 }
179
180 wp
181}
182
183pub fn decode_to_writer(data: &[u8], ignore_garbage: bool, out: &mut impl Write) -> io::Result<()> {
187 if data.is_empty() {
188 return Ok(());
189 }
190
191 if ignore_garbage {
192 let mut cleaned = strip_non_base64(data);
193 return decode_owned_clean(&mut cleaned, out);
194 }
195
196 decode_stripping_whitespace(data, out)
198}
199
200pub fn decode_owned(
204 data: &mut Vec<u8>,
205 ignore_garbage: bool,
206 out: &mut impl Write,
207) -> io::Result<()> {
208 if data.is_empty() {
209 return Ok(());
210 }
211
212 if ignore_garbage {
213 data.retain(|&b| is_base64_char(b));
214 } else {
215 strip_whitespace_inplace(data);
216 }
217
218 decode_owned_clean(data, out)
219}
220
221fn strip_whitespace_inplace(data: &mut Vec<u8>) {
224 let positions: Vec<usize> = memchr::memchr_iter(b'\n', data.as_slice()).collect();
226
227 if positions.is_empty() {
228 if data.iter().any(|&b| is_whitespace(b)) {
230 data.retain(|&b| !is_whitespace(b));
231 }
232 return;
233 }
234
235 let mut wp = 0;
237 let mut rp = 0;
238
239 for &pos in &positions {
240 if pos > rp {
241 let len = pos - rp;
242 data.copy_within(rp..pos, wp);
243 wp += len;
244 }
245 rp = pos + 1;
246 }
247
248 let data_len = data.len();
249 if rp < data_len {
250 let len = data_len - rp;
251 data.copy_within(rp..data_len, wp);
252 wp += len;
253 }
254
255 data.truncate(wp);
256
257 if data.iter().any(|&b| is_whitespace(b)) {
259 data.retain(|&b| !is_whitespace(b));
260 }
261}
262
263fn decode_stripping_whitespace(data: &[u8], out: &mut impl Write) -> io::Result<()> {
267 if memchr::memchr2(b'\n', b'\r', data).is_none()
270 && !data.iter().any(|&b| b == b' ' || b == b'\t')
271 {
272 if data.len() >= PARALLEL_ENCODE_THRESHOLD {
274 return decode_parallel(data, out);
275 }
276 return decode_borrowed_clean(out, data);
277 }
278
279 let mut clean = Vec::with_capacity(data.len());
281 let mut last = 0;
282 for pos in memchr::memchr_iter(b'\n', data) {
283 if pos > last {
284 clean.extend_from_slice(&data[last..pos]);
285 }
286 last = pos + 1;
287 }
288 if last < data.len() {
289 clean.extend_from_slice(&data[last..]);
290 }
291
292 if clean.iter().any(|&b| is_whitespace(b)) {
294 clean.retain(|&b| !is_whitespace(b));
295 }
296
297 if clean.len() >= PARALLEL_ENCODE_THRESHOLD {
299 return decode_parallel(&clean, out);
300 }
301
302 decode_owned_clean(&mut clean, out)
303}
304
305fn decode_parallel(data: &[u8], out: &mut impl Write) -> io::Result<()> {
307 let num_threads = rayon::current_num_threads().max(1);
308 let raw_chunk = (data.len() + num_threads - 1) / num_threads;
310 let chunk_size = ((raw_chunk + 3) / 4) * 4;
311
312 let decoded_chunks: Vec<Result<Vec<u8>, _>> = data
315 .par_chunks(chunk_size)
316 .map(|chunk| match BASE64_ENGINE.decode_to_vec(chunk) {
317 Ok(decoded) => Ok(decoded),
318 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
319 })
320 .collect();
321
322 for chunk_result in decoded_chunks {
323 let chunk = chunk_result?;
324 out.write_all(&chunk)?;
325 }
326
327 Ok(())
328}
329
330fn decode_owned_clean(data: &mut [u8], out: &mut impl Write) -> io::Result<()> {
332 if data.is_empty() {
333 return Ok(());
334 }
335 match BASE64_ENGINE.decode_inplace(data) {
336 Ok(decoded) => out.write_all(decoded),
337 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
338 }
339}
340
341fn decode_borrowed_clean(out: &mut impl Write, data: &[u8]) -> io::Result<()> {
343 if data.is_empty() {
344 return Ok(());
345 }
346 match BASE64_ENGINE.decode_to_vec(data) {
347 Ok(decoded) => {
348 out.write_all(&decoded)?;
349 Ok(())
350 }
351 Err(_) => Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input")),
352 }
353}
354
355fn strip_non_base64(data: &[u8]) -> Vec<u8> {
357 data.iter()
358 .copied()
359 .filter(|&b| is_base64_char(b))
360 .collect()
361}
362
363#[inline]
365fn is_base64_char(b: u8) -> bool {
366 b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'='
367}
368
369#[inline]
371fn is_whitespace(b: u8) -> bool {
372 matches!(b, b' ' | b'\t' | b'\n' | b'\r' | 0x0b | 0x0c)
373}
374
375pub fn encode_stream(
379 reader: &mut impl Read,
380 wrap_col: usize,
381 writer: &mut impl Write,
382) -> io::Result<()> {
383 let mut buf = vec![0u8; STREAM_ENCODE_CHUNK];
384
385 let encode_buf_size = BASE64_ENGINE.encoded_length(STREAM_ENCODE_CHUNK);
386 let mut encode_buf = vec![0u8; encode_buf_size];
387
388 if wrap_col == 0 {
389 loop {
391 let n = read_full(reader, &mut buf)?;
392 if n == 0 {
393 break;
394 }
395 let enc_len = BASE64_ENGINE.encoded_length(n);
396 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
397 writer.write_all(encoded)?;
398 }
399 } else {
400 let max_wrapped = encode_buf_size + (encode_buf_size / wrap_col + 2);
403 let mut wrap_buf = vec![0u8; max_wrapped];
404 let mut col = 0usize;
405
406 loop {
407 let n = read_full(reader, &mut buf)?;
408 if n == 0 {
409 break;
410 }
411 let enc_len = BASE64_ENGINE.encoded_length(n);
412 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
413
414 let wp = build_wrapped_output(encoded, wrap_col, &mut col, &mut wrap_buf);
416 writer.write_all(&wrap_buf[..wp])?;
417 }
418
419 if col > 0 {
420 writer.write_all(b"\n")?;
421 }
422 }
423
424 Ok(())
425}
426
427#[inline]
431fn build_wrapped_output(
432 data: &[u8],
433 wrap_col: usize,
434 col: &mut usize,
435 wrap_buf: &mut [u8],
436) -> usize {
437 let mut rp = 0;
438 let mut wp = 0;
439
440 while rp < data.len() {
441 let space = wrap_col - *col;
442 let avail = data.len() - rp;
443
444 if avail <= space {
445 wrap_buf[wp..wp + avail].copy_from_slice(&data[rp..rp + avail]);
446 wp += avail;
447 *col += avail;
448 if *col == wrap_col {
449 wrap_buf[wp] = b'\n';
450 wp += 1;
451 *col = 0;
452 }
453 break;
454 } else {
455 wrap_buf[wp..wp + space].copy_from_slice(&data[rp..rp + space]);
456 wp += space;
457 wrap_buf[wp] = b'\n';
458 wp += 1;
459 rp += space;
460 *col = 0;
461 }
462 }
463
464 wp
465}
466
467pub fn decode_stream(
471 reader: &mut impl Read,
472 ignore_garbage: bool,
473 writer: &mut impl Write,
474) -> io::Result<()> {
475 const READ_CHUNK: usize = 4 * 1024 * 1024;
476 let mut buf = vec![0u8; READ_CHUNK];
477 let mut clean = Vec::with_capacity(READ_CHUNK);
478 let mut carry: Vec<u8> = Vec::with_capacity(4);
479
480 loop {
481 let n = read_full(reader, &mut buf)?;
482 if n == 0 {
483 break;
484 }
485
486 clean.clear();
488 clean.extend_from_slice(&carry);
489 carry.clear();
490
491 let chunk = &buf[..n];
492 if ignore_garbage {
493 clean.extend(chunk.iter().copied().filter(|&b| is_base64_char(b)));
494 } else {
495 let mut last = 0;
497 for pos in memchr::memchr_iter(b'\n', chunk) {
498 if pos > last {
499 clean.extend_from_slice(&chunk[last..pos]);
500 }
501 last = pos + 1;
502 }
503 if last < n {
504 clean.extend_from_slice(&chunk[last..]);
505 }
506 if clean.iter().any(|&b| is_whitespace(b) && b != b'\n') {
508 clean.retain(|&b| !is_whitespace(b));
509 }
510 }
511
512 let is_last = n < READ_CHUNK;
513
514 if is_last {
515 decode_owned_clean(&mut clean, writer)?;
517 } else {
518 let decode_len = (clean.len() / 4) * 4;
520 if decode_len < clean.len() {
521 carry.extend_from_slice(&clean[decode_len..]);
522 }
523 if decode_len > 0 {
524 clean.truncate(decode_len);
525 decode_owned_clean(&mut clean, writer)?;
526 }
527 }
528 }
529
530 if !carry.is_empty() {
532 decode_owned_clean(&mut carry, writer)?;
533 }
534
535 Ok(())
536}
537
538fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
540 let mut total = 0;
541 while total < buf.len() {
542 match reader.read(&mut buf[total..]) {
543 Ok(0) => break,
544 Ok(n) => total += n,
545 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
546 Err(e) => return Err(e),
547 }
548 }
549 Ok(total)
550}