1use std::io::{self, Read, Write};
2
3use base64_simd::AsOut;
4
5const BASE64_ENGINE: &base64_simd::Base64 = &base64_simd::STANDARD;
6
7#[inline]
10fn num_cpus() -> usize {
11 std::thread::available_parallelism()
12 .map(|n| n.get())
13 .unwrap_or(1)
14}
15
16const NOWRAP_CHUNK: usize = 8 * 1024 * 1024 - (8 * 1024 * 1024 % 3);
20
21const PARALLEL_NOWRAP_THRESHOLD: usize = 4 * 1024 * 1024;
26
27const PARALLEL_WRAPPED_THRESHOLD: usize = 2 * 1024 * 1024;
32
33const PARALLEL_DECODE_THRESHOLD: usize = 2 * 1024 * 1024;
36
37pub fn encode_to_writer(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
40 if data.is_empty() {
41 return Ok(());
42 }
43
44 if wrap_col == 0 {
45 return encode_no_wrap(data, out);
46 }
47
48 encode_wrapped(data, wrap_col, out)
49}
50
51fn encode_no_wrap(data: &[u8], out: &mut impl Write) -> io::Result<()> {
53 if data.len() >= PARALLEL_NOWRAP_THRESHOLD && num_cpus() > 1 {
54 return encode_no_wrap_parallel(data, out);
55 }
56
57 let enc_len = BASE64_ENGINE.encoded_length(data.len().min(NOWRAP_CHUNK));
60 let mut buf: Vec<u8> = Vec::with_capacity(enc_len);
61 #[allow(clippy::uninit_vec)]
62 unsafe {
63 buf.set_len(enc_len);
64 }
65
66 for chunk in data.chunks(NOWRAP_CHUNK) {
67 let clen = BASE64_ENGINE.encoded_length(chunk.len());
68 let encoded = BASE64_ENGINE.encode(chunk, buf[..clen].as_out());
69 out.write_all(encoded)?;
70 }
71 Ok(())
72}
73
74fn encode_no_wrap_parallel(data: &[u8], out: &mut impl Write) -> io::Result<()> {
81 let num_threads = num_cpus().max(1);
82 let raw_chunk = data.len() / num_threads;
83 let chunk_size = ((raw_chunk + 2) / 3) * 3;
85
86 let chunks: Vec<&[u8]> = data.chunks(chunk_size.max(3)).collect();
88
89 let results: Vec<Vec<u8>> = std::thread::scope(|s| {
91 let handles: Vec<_> = chunks
92 .iter()
93 .map(|chunk| {
94 s.spawn(|| {
95 let enc_len = BASE64_ENGINE.encoded_length(chunk.len());
96 let mut buf: Vec<u8> = Vec::with_capacity(enc_len);
97 #[allow(clippy::uninit_vec)]
98 unsafe {
99 buf.set_len(enc_len);
100 }
101 #[cfg(target_os = "linux")]
103 if enc_len >= 2 * 1024 * 1024 {
104 unsafe {
105 libc::madvise(
106 buf.as_mut_ptr() as *mut libc::c_void,
107 enc_len,
108 libc::MADV_HUGEPAGE,
109 );
110 }
111 }
112 let _ = BASE64_ENGINE.encode(chunk, buf[..enc_len].as_out());
113 buf
114 })
115 })
116 .collect();
117 handles.into_iter().map(|h| h.join().unwrap()).collect()
118 });
119
120 let slices: Vec<io::IoSlice> = results.iter().map(|r| io::IoSlice::new(r)).collect();
122 write_all_vectored(out, &slices)
123}
124
125fn encode_wrapped(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
134 let bytes_per_line = wrap_col * 3 / 4;
135 if bytes_per_line == 0 {
136 return encode_wrapped_small(data, wrap_col, out);
137 }
138
139 if data.len() >= PARALLEL_WRAPPED_THRESHOLD && bytes_per_line.is_multiple_of(3) {
140 return encode_wrapped_parallel(data, wrap_col, bytes_per_line, out);
141 }
142
143 if bytes_per_line.is_multiple_of(3) {
144 return encode_wrapped_scatter(data, wrap_col, bytes_per_line, out);
145 }
146
147 let enc_max = BASE64_ENGINE.encoded_length(data.len());
149 let num_full = enc_max / wrap_col;
150 let rem = enc_max % wrap_col;
151 let out_len = num_full * (wrap_col + 1) + if rem > 0 { rem + 1 } else { 0 };
152
153 let mut enc_buf: Vec<u8> = Vec::with_capacity(enc_max);
155 #[allow(clippy::uninit_vec)]
156 unsafe {
157 enc_buf.set_len(enc_max);
158 }
159 let _ = BASE64_ENGINE.encode(data, enc_buf[..enc_max].as_out());
160
161 let mut out_buf: Vec<u8> = Vec::with_capacity(out_len);
162 #[allow(clippy::uninit_vec)]
163 unsafe {
164 out_buf.set_len(out_len);
165 }
166 let n = fuse_wrap(&enc_buf, wrap_col, &mut out_buf);
167 out.write_all(&out_buf[..n])
168}
169
170fn encode_wrapped_scatter(
179 data: &[u8],
180 wrap_col: usize,
181 bytes_per_line: usize,
182 out: &mut impl Write,
183) -> io::Result<()> {
184 let enc_len = BASE64_ENGINE.encoded_length(data.len());
185 if enc_len == 0 {
186 return Ok(());
187 }
188
189 let num_full = enc_len / wrap_col;
190 let rem = enc_len % wrap_col;
191 let out_len = num_full * (wrap_col + 1) + if rem > 0 { rem + 1 } else { 0 };
192
193 let mut buf: Vec<u8> = Vec::with_capacity(out_len);
195 #[allow(clippy::uninit_vec)]
196 unsafe {
197 buf.set_len(out_len);
198 }
199 #[cfg(target_os = "linux")]
200 if out_len >= 2 * 1024 * 1024 {
201 unsafe {
202 libc::madvise(
203 buf.as_mut_ptr() as *mut libc::c_void,
204 out_len,
205 libc::MADV_HUGEPAGE,
206 );
207 }
208 }
209
210 const GROUP_LINES: usize = 256;
213 let group_input = GROUP_LINES * bytes_per_line;
214 let temp_size = GROUP_LINES * wrap_col;
215 let mut temp: Vec<u8> = Vec::with_capacity(temp_size);
216 #[allow(clippy::uninit_vec)]
217 unsafe {
218 temp.set_len(temp_size);
219 }
220
221 let line_out = wrap_col + 1;
222 let mut wp = 0usize; for chunk in data.chunks(group_input) {
225 let clen = BASE64_ENGINE.encoded_length(chunk.len());
226 let _ = BASE64_ENGINE.encode(chunk, temp[..clen].as_out());
227
228 let lines = clen / wrap_col;
230 let chunk_rem = clen % wrap_col;
231
232 let mut i = 0;
234 while i + 8 <= lines {
235 unsafe {
236 let src = temp.as_ptr().add(i * wrap_col);
237 let dst = buf.as_mut_ptr().add(wp);
238 std::ptr::copy_nonoverlapping(src, dst, wrap_col);
239 *dst.add(wrap_col) = b'\n';
240 std::ptr::copy_nonoverlapping(src.add(wrap_col), dst.add(line_out), wrap_col);
241 *dst.add(line_out + wrap_col) = b'\n';
242 std::ptr::copy_nonoverlapping(
243 src.add(2 * wrap_col),
244 dst.add(2 * line_out),
245 wrap_col,
246 );
247 *dst.add(2 * line_out + wrap_col) = b'\n';
248 std::ptr::copy_nonoverlapping(
249 src.add(3 * wrap_col),
250 dst.add(3 * line_out),
251 wrap_col,
252 );
253 *dst.add(3 * line_out + wrap_col) = b'\n';
254 std::ptr::copy_nonoverlapping(
255 src.add(4 * wrap_col),
256 dst.add(4 * line_out),
257 wrap_col,
258 );
259 *dst.add(4 * line_out + wrap_col) = b'\n';
260 std::ptr::copy_nonoverlapping(
261 src.add(5 * wrap_col),
262 dst.add(5 * line_out),
263 wrap_col,
264 );
265 *dst.add(5 * line_out + wrap_col) = b'\n';
266 std::ptr::copy_nonoverlapping(
267 src.add(6 * wrap_col),
268 dst.add(6 * line_out),
269 wrap_col,
270 );
271 *dst.add(6 * line_out + wrap_col) = b'\n';
272 std::ptr::copy_nonoverlapping(
273 src.add(7 * wrap_col),
274 dst.add(7 * line_out),
275 wrap_col,
276 );
277 *dst.add(7 * line_out + wrap_col) = b'\n';
278 }
279 wp += 8 * line_out;
280 i += 8;
281 }
282 while i < lines {
284 unsafe {
285 std::ptr::copy_nonoverlapping(
286 temp.as_ptr().add(i * wrap_col),
287 buf.as_mut_ptr().add(wp),
288 wrap_col,
289 );
290 *buf.as_mut_ptr().add(wp + wrap_col) = b'\n';
291 }
292 wp += line_out;
293 i += 1;
294 }
295 if chunk_rem > 0 {
297 unsafe {
298 std::ptr::copy_nonoverlapping(
299 temp.as_ptr().add(lines * wrap_col),
300 buf.as_mut_ptr().add(wp),
301 chunk_rem,
302 );
303 *buf.as_mut_ptr().add(wp + chunk_rem) = b'\n';
304 }
305 wp += chunk_rem + 1;
306 }
307 }
308
309 out.write_all(&buf[..wp])
310}
311
312#[inline]
315#[allow(dead_code)]
316fn scatter_lines(
317 temp: &[u8],
318 buf: &mut [u8],
319 line_start: usize,
320 count: usize,
321 wrap_col: usize,
322 line_out: usize,
323) {
324 unsafe {
325 let src = temp.as_ptr();
326 let dst = buf.as_mut_ptr();
327 for i in 0..count {
328 let s_off = i * wrap_col;
329 let d_off = (line_start + i) * line_out;
330 std::ptr::copy_nonoverlapping(src.add(s_off), dst.add(d_off), wrap_col);
331 *dst.add(d_off + wrap_col) = b'\n';
332 }
333 }
334}
335
336#[inline]
344#[allow(dead_code)]
345fn expand_backward(ptr: *mut u8, enc_len: usize, out_len: usize, wrap_col: usize) {
346 let num_full = enc_len / wrap_col;
347 let rem = enc_len % wrap_col;
348
349 unsafe {
350 let mut rp = enc_len;
351 let mut wp = out_len;
352
353 if rem > 0 {
355 wp -= 1;
356 *ptr.add(wp) = b'\n';
357 wp -= rem;
358 rp -= rem;
359 if rp != wp {
360 std::ptr::copy(ptr.add(rp), ptr.add(wp), rem);
361 }
362 }
363
364 let mut lines_left = num_full;
366 while lines_left >= 8 {
367 wp -= 1;
369 *ptr.add(wp) = b'\n';
370 rp -= wrap_col;
371 wp -= wrap_col;
372 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
373
374 wp -= 1;
375 *ptr.add(wp) = b'\n';
376 rp -= wrap_col;
377 wp -= wrap_col;
378 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
379
380 wp -= 1;
381 *ptr.add(wp) = b'\n';
382 rp -= wrap_col;
383 wp -= wrap_col;
384 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
385
386 wp -= 1;
387 *ptr.add(wp) = b'\n';
388 rp -= wrap_col;
389 wp -= wrap_col;
390 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
391
392 wp -= 1;
393 *ptr.add(wp) = b'\n';
394 rp -= wrap_col;
395 wp -= wrap_col;
396 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
397
398 wp -= 1;
399 *ptr.add(wp) = b'\n';
400 rp -= wrap_col;
401 wp -= wrap_col;
402 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
403
404 wp -= 1;
405 *ptr.add(wp) = b'\n';
406 rp -= wrap_col;
407 wp -= wrap_col;
408 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
409
410 wp -= 1;
411 *ptr.add(wp) = b'\n';
412 rp -= wrap_col;
413 wp -= wrap_col;
414 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
415
416 lines_left -= 8;
417 }
418
419 while lines_left > 0 {
421 wp -= 1;
422 *ptr.add(wp) = b'\n';
423 rp -= wrap_col;
424 wp -= wrap_col;
425 if rp != wp {
426 std::ptr::copy(ptr.add(rp), ptr.add(wp), wrap_col);
427 }
428 lines_left -= 1;
429 }
430 }
431}
432
433static NEWLINE: [u8; 1] = [b'\n'];
435
436#[inline]
441#[allow(dead_code)]
442fn write_wrapped_iov(encoded: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
443 const MAX_IOV: usize = 1024;
446
447 let num_full_lines = encoded.len() / wrap_col;
448 let remainder = encoded.len() % wrap_col;
449 let total_iov = num_full_lines * 2 + if remainder > 0 { 2 } else { 0 };
450
451 if total_iov <= MAX_IOV {
453 let mut iov: Vec<io::IoSlice> = Vec::with_capacity(total_iov);
454 let mut pos = 0;
455 for _ in 0..num_full_lines {
456 iov.push(io::IoSlice::new(&encoded[pos..pos + wrap_col]));
457 iov.push(io::IoSlice::new(&NEWLINE));
458 pos += wrap_col;
459 }
460 if remainder > 0 {
461 iov.push(io::IoSlice::new(&encoded[pos..pos + remainder]));
462 iov.push(io::IoSlice::new(&NEWLINE));
463 }
464 return write_all_vectored(out, &iov);
465 }
466
467 let line_out = wrap_col + 1;
474 const BATCH_LINES: usize = 512;
475 let batch_fused_size = BATCH_LINES * line_out;
476 let mut fused: Vec<u8> = Vec::with_capacity(batch_fused_size);
477 #[allow(clippy::uninit_vec)]
478 unsafe {
479 fused.set_len(batch_fused_size);
480 }
481
482 let mut rp = 0;
483 let mut lines_done = 0;
484
485 while lines_done + BATCH_LINES <= num_full_lines {
487 let n = fuse_wrap(
488 &encoded[rp..rp + BATCH_LINES * wrap_col],
489 wrap_col,
490 &mut fused,
491 );
492 out.write_all(&fused[..n])?;
493 rp += BATCH_LINES * wrap_col;
494 lines_done += BATCH_LINES;
495 }
496
497 let remaining_lines = num_full_lines - lines_done;
499 if remaining_lines > 0 {
500 let n = fuse_wrap(
501 &encoded[rp..rp + remaining_lines * wrap_col],
502 wrap_col,
503 &mut fused,
504 );
505 out.write_all(&fused[..n])?;
506 rp += remaining_lines * wrap_col;
507 }
508
509 if remainder > 0 {
511 out.write_all(&encoded[rp..rp + remainder])?;
512 out.write_all(b"\n")?;
513 }
514 Ok(())
515}
516
517#[inline]
521fn write_wrapped_iov_streaming(
522 encoded: &[u8],
523 wrap_col: usize,
524 col: &mut usize,
525 out: &mut impl Write,
526) -> io::Result<()> {
527 const MAX_IOV: usize = 1024;
528 let mut iov: Vec<io::IoSlice> = Vec::with_capacity(MAX_IOV);
529 let mut rp = 0;
530
531 while rp < encoded.len() {
532 let space = wrap_col - *col;
533 let avail = encoded.len() - rp;
534
535 if avail <= space {
536 iov.push(io::IoSlice::new(&encoded[rp..rp + avail]));
538 *col += avail;
539 if *col == wrap_col {
540 iov.push(io::IoSlice::new(&NEWLINE));
541 *col = 0;
542 }
543 break;
544 } else {
545 iov.push(io::IoSlice::new(&encoded[rp..rp + space]));
547 iov.push(io::IoSlice::new(&NEWLINE));
548 rp += space;
549 *col = 0;
550 }
551
552 if iov.len() >= MAX_IOV - 1 {
553 write_all_vectored(out, &iov)?;
554 iov.clear();
555 }
556 }
557
558 if !iov.is_empty() {
559 write_all_vectored(out, &iov)?;
560 }
561 Ok(())
562}
563
564fn encode_wrapped_parallel(
570 data: &[u8],
571 wrap_col: usize,
572 bytes_per_line: usize,
573 out: &mut impl Write,
574) -> io::Result<()> {
575 let num_threads = num_cpus().max(1);
576 let lines_per_chunk = ((data.len() / bytes_per_line) / num_threads).max(1);
577 let chunk_input = lines_per_chunk * bytes_per_line;
578
579 let chunks: Vec<&[u8]> = data.chunks(chunk_input.max(bytes_per_line)).collect();
581
582 let output_chunks: Vec<Vec<u8>> = std::thread::scope(|s| {
584 let handles: Vec<_> = chunks
585 .iter()
586 .map(|chunk| s.spawn(move || encode_chunk_l1_scatter(chunk, wrap_col, bytes_per_line)))
587 .collect();
588 handles.into_iter().map(|h| h.join().unwrap()).collect()
589 });
590
591 let slices: Vec<io::IoSlice> = output_chunks.iter().map(|c| io::IoSlice::new(c)).collect();
593 write_all_vectored(out, &slices)
594}
595
596fn encode_chunk_l1_scatter(data: &[u8], wrap_col: usize, bytes_per_line: usize) -> Vec<u8> {
600 let enc_len = BASE64_ENGINE.encoded_length(data.len());
601 let full_lines = enc_len / wrap_col;
602 let remainder = enc_len % wrap_col;
603 let out_size = full_lines * (wrap_col + 1) + if remainder > 0 { remainder + 1 } else { 0 };
604
605 let mut output: Vec<u8> = Vec::with_capacity(out_size);
606 #[allow(clippy::uninit_vec)]
607 unsafe {
608 output.set_len(out_size);
609 }
610 #[cfg(target_os = "linux")]
611 if out_size >= 2 * 1024 * 1024 {
612 unsafe {
613 libc::madvise(
614 output.as_mut_ptr() as *mut libc::c_void,
615 out_size,
616 libc::MADV_HUGEPAGE,
617 );
618 }
619 }
620
621 const GROUP_LINES: usize = 256;
623 let group_input = GROUP_LINES * bytes_per_line;
624 let temp_size = GROUP_LINES * wrap_col;
625 let mut temp: Vec<u8> = Vec::with_capacity(temp_size);
626 #[allow(clippy::uninit_vec)]
627 unsafe {
628 temp.set_len(temp_size);
629 }
630
631 let line_out = wrap_col + 1;
632 let mut wp = 0usize;
633
634 for chunk in data.chunks(group_input) {
635 let clen = BASE64_ENGINE.encoded_length(chunk.len());
636 let _ = BASE64_ENGINE.encode(chunk, temp[..clen].as_out());
637
638 let lines = clen / wrap_col;
639 let chunk_rem = clen % wrap_col;
640
641 let mut i = 0;
643 while i + 8 <= lines {
644 unsafe {
645 let src = temp.as_ptr().add(i * wrap_col);
646 let dst = output.as_mut_ptr().add(wp);
647 std::ptr::copy_nonoverlapping(src, dst, wrap_col);
648 *dst.add(wrap_col) = b'\n';
649 std::ptr::copy_nonoverlapping(src.add(wrap_col), dst.add(line_out), wrap_col);
650 *dst.add(line_out + wrap_col) = b'\n';
651 std::ptr::copy_nonoverlapping(
652 src.add(2 * wrap_col),
653 dst.add(2 * line_out),
654 wrap_col,
655 );
656 *dst.add(2 * line_out + wrap_col) = b'\n';
657 std::ptr::copy_nonoverlapping(
658 src.add(3 * wrap_col),
659 dst.add(3 * line_out),
660 wrap_col,
661 );
662 *dst.add(3 * line_out + wrap_col) = b'\n';
663 std::ptr::copy_nonoverlapping(
664 src.add(4 * wrap_col),
665 dst.add(4 * line_out),
666 wrap_col,
667 );
668 *dst.add(4 * line_out + wrap_col) = b'\n';
669 std::ptr::copy_nonoverlapping(
670 src.add(5 * wrap_col),
671 dst.add(5 * line_out),
672 wrap_col,
673 );
674 *dst.add(5 * line_out + wrap_col) = b'\n';
675 std::ptr::copy_nonoverlapping(
676 src.add(6 * wrap_col),
677 dst.add(6 * line_out),
678 wrap_col,
679 );
680 *dst.add(6 * line_out + wrap_col) = b'\n';
681 std::ptr::copy_nonoverlapping(
682 src.add(7 * wrap_col),
683 dst.add(7 * line_out),
684 wrap_col,
685 );
686 *dst.add(7 * line_out + wrap_col) = b'\n';
687 }
688 wp += 8 * line_out;
689 i += 8;
690 }
691 while i < lines {
692 unsafe {
693 std::ptr::copy_nonoverlapping(
694 temp.as_ptr().add(i * wrap_col),
695 output.as_mut_ptr().add(wp),
696 wrap_col,
697 );
698 *output.as_mut_ptr().add(wp + wrap_col) = b'\n';
699 }
700 wp += line_out;
701 i += 1;
702 }
703 if chunk_rem > 0 {
704 unsafe {
705 std::ptr::copy_nonoverlapping(
706 temp.as_ptr().add(lines * wrap_col),
707 output.as_mut_ptr().add(wp),
708 chunk_rem,
709 );
710 *output.as_mut_ptr().add(wp + chunk_rem) = b'\n';
711 }
712 wp += chunk_rem + 1;
713 }
714 }
715
716 unsafe { output.set_len(wp) };
717 output
718}
719
720#[inline]
724fn fuse_wrap(encoded: &[u8], wrap_col: usize, out_buf: &mut [u8]) -> usize {
725 let line_out = wrap_col + 1; let mut rp = 0;
727 let mut wp = 0;
728
729 while rp + 8 * wrap_col <= encoded.len() {
731 unsafe {
732 let src = encoded.as_ptr().add(rp);
733 let dst = out_buf.as_mut_ptr().add(wp);
734
735 std::ptr::copy_nonoverlapping(src, dst, wrap_col);
736 *dst.add(wrap_col) = b'\n';
737
738 std::ptr::copy_nonoverlapping(src.add(wrap_col), dst.add(line_out), wrap_col);
739 *dst.add(line_out + wrap_col) = b'\n';
740
741 std::ptr::copy_nonoverlapping(src.add(2 * wrap_col), dst.add(2 * line_out), wrap_col);
742 *dst.add(2 * line_out + wrap_col) = b'\n';
743
744 std::ptr::copy_nonoverlapping(src.add(3 * wrap_col), dst.add(3 * line_out), wrap_col);
745 *dst.add(3 * line_out + wrap_col) = b'\n';
746
747 std::ptr::copy_nonoverlapping(src.add(4 * wrap_col), dst.add(4 * line_out), wrap_col);
748 *dst.add(4 * line_out + wrap_col) = b'\n';
749
750 std::ptr::copy_nonoverlapping(src.add(5 * wrap_col), dst.add(5 * line_out), wrap_col);
751 *dst.add(5 * line_out + wrap_col) = b'\n';
752
753 std::ptr::copy_nonoverlapping(src.add(6 * wrap_col), dst.add(6 * line_out), wrap_col);
754 *dst.add(6 * line_out + wrap_col) = b'\n';
755
756 std::ptr::copy_nonoverlapping(src.add(7 * wrap_col), dst.add(7 * line_out), wrap_col);
757 *dst.add(7 * line_out + wrap_col) = b'\n';
758 }
759 rp += 8 * wrap_col;
760 wp += 8 * line_out;
761 }
762
763 while rp + 4 * wrap_col <= encoded.len() {
765 unsafe {
766 let src = encoded.as_ptr().add(rp);
767 let dst = out_buf.as_mut_ptr().add(wp);
768
769 std::ptr::copy_nonoverlapping(src, dst, wrap_col);
770 *dst.add(wrap_col) = b'\n';
771
772 std::ptr::copy_nonoverlapping(src.add(wrap_col), dst.add(line_out), wrap_col);
773 *dst.add(line_out + wrap_col) = b'\n';
774
775 std::ptr::copy_nonoverlapping(src.add(2 * wrap_col), dst.add(2 * line_out), wrap_col);
776 *dst.add(2 * line_out + wrap_col) = b'\n';
777
778 std::ptr::copy_nonoverlapping(src.add(3 * wrap_col), dst.add(3 * line_out), wrap_col);
779 *dst.add(3 * line_out + wrap_col) = b'\n';
780 }
781 rp += 4 * wrap_col;
782 wp += 4 * line_out;
783 }
784
785 while rp + wrap_col <= encoded.len() {
787 unsafe {
788 std::ptr::copy_nonoverlapping(
789 encoded.as_ptr().add(rp),
790 out_buf.as_mut_ptr().add(wp),
791 wrap_col,
792 );
793 *out_buf.as_mut_ptr().add(wp + wrap_col) = b'\n';
794 }
795 rp += wrap_col;
796 wp += line_out;
797 }
798
799 if rp < encoded.len() {
801 let remaining = encoded.len() - rp;
802 unsafe {
803 std::ptr::copy_nonoverlapping(
804 encoded.as_ptr().add(rp),
805 out_buf.as_mut_ptr().add(wp),
806 remaining,
807 );
808 }
809 wp += remaining;
810 out_buf[wp] = b'\n';
811 wp += 1;
812 }
813
814 wp
815}
816
817fn encode_wrapped_small(data: &[u8], wrap_col: usize, out: &mut impl Write) -> io::Result<()> {
819 let enc_max = BASE64_ENGINE.encoded_length(data.len());
820 let mut buf: Vec<u8> = Vec::with_capacity(enc_max);
821 #[allow(clippy::uninit_vec)]
822 unsafe {
823 buf.set_len(enc_max);
824 }
825 let encoded = BASE64_ENGINE.encode(data, buf[..enc_max].as_out());
826
827 let wc = wrap_col.max(1);
828 for line in encoded.chunks(wc) {
829 out.write_all(line)?;
830 out.write_all(b"\n")?;
831 }
832 Ok(())
833}
834
835pub fn decode_to_writer(data: &[u8], ignore_garbage: bool, out: &mut impl Write) -> io::Result<()> {
839 if data.is_empty() {
840 return Ok(());
841 }
842
843 if ignore_garbage {
844 let mut cleaned = strip_non_base64(data);
845 return decode_clean_slice(&mut cleaned, out);
846 }
847
848 if data.len() < 512 * 1024 && data.len() >= 77 {
854 if let Some(result) = try_line_decode(data, out) {
855 return result;
856 }
857 }
858
859 decode_stripping_whitespace(data, out)
861}
862
863pub fn decode_mmap_inplace(
872 data: &mut [u8],
873 ignore_garbage: bool,
874 out: &mut impl Write,
875) -> io::Result<()> {
876 if data.is_empty() {
877 return Ok(());
878 }
879
880 if !ignore_garbage && data.len() >= 77 && data.len() < 512 * 1024 {
883 if let Some(result) = try_line_decode(data, out) {
884 return result;
885 }
886 }
887
888 if ignore_garbage {
889 let ptr = data.as_mut_ptr();
891 let len = data.len();
892 let mut wp = 0;
893 for rp in 0..len {
894 let b = unsafe { *ptr.add(rp) };
895 if is_base64_char(b) {
896 unsafe { *ptr.add(wp) = b };
897 wp += 1;
898 }
899 }
900 match BASE64_ENGINE.decode_inplace(&mut data[..wp]) {
901 Ok(decoded) => return out.write_all(decoded),
902 Err(_) => return decode_error(),
903 }
904 }
905
906 if data.len() >= 77 {
908 if let Some(result) = try_decode_uniform_lines(data, out) {
909 return result;
910 }
911 }
912
913 if memchr::memchr2(b'\n', b'\r', data).is_none() {
917 if !data
919 .iter()
920 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c)
921 {
922 match BASE64_ENGINE.decode_inplace(data) {
924 Ok(decoded) => return out.write_all(decoded),
925 Err(_) => return decode_error(),
926 }
927 }
928 let ptr = data.as_mut_ptr();
930 let len = data.len();
931 let mut wp = 0;
932 for rp in 0..len {
933 let b = unsafe { *ptr.add(rp) };
934 if NOT_WHITESPACE[b as usize] {
935 unsafe { *ptr.add(wp) = b };
936 wp += 1;
937 }
938 }
939 match BASE64_ENGINE.decode_inplace(&mut data[..wp]) {
940 Ok(decoded) => return out.write_all(decoded),
941 Err(_) => return decode_error(),
942 }
943 }
944
945 let ptr = data.as_mut_ptr();
947 let len = data.len();
948 let mut wp = 0usize;
949 let mut gap_start = 0usize;
950 let mut has_rare_ws = false;
951
952 for pos in memchr::memchr2_iter(b'\n', b'\r', data) {
955 let gap_len = pos - gap_start;
956 if gap_len > 0 {
957 if !has_rare_ws {
958 has_rare_ws = unsafe {
960 std::slice::from_raw_parts(ptr.add(gap_start), gap_len)
961 .iter()
962 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c)
963 };
964 }
965 if wp != gap_start {
966 unsafe { std::ptr::copy(ptr.add(gap_start), ptr.add(wp), gap_len) };
967 }
968 wp += gap_len;
969 }
970 gap_start = pos + 1;
971 }
972 let tail_len = len - gap_start;
974 if tail_len > 0 {
975 if !has_rare_ws {
976 has_rare_ws = unsafe {
977 std::slice::from_raw_parts(ptr.add(gap_start), tail_len)
978 .iter()
979 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c)
980 };
981 }
982 if wp != gap_start {
983 unsafe { std::ptr::copy(ptr.add(gap_start), ptr.add(wp), tail_len) };
984 }
985 wp += tail_len;
986 }
987
988 if has_rare_ws {
990 let mut rp = 0;
991 let mut cwp = 0;
992 while rp < wp {
993 let b = unsafe { *ptr.add(rp) };
994 if NOT_WHITESPACE[b as usize] {
995 unsafe { *ptr.add(cwp) = b };
996 cwp += 1;
997 }
998 rp += 1;
999 }
1000 wp = cwp;
1001 }
1002
1003 if wp >= PARALLEL_DECODE_THRESHOLD {
1005 return decode_borrowed_clean_parallel(out, &data[..wp]);
1007 }
1008 match BASE64_ENGINE.decode_inplace(&mut data[..wp]) {
1009 Ok(decoded) => out.write_all(decoded),
1010 Err(_) => decode_error(),
1011 }
1012}
1013
1014pub fn decode_owned(
1016 data: &mut Vec<u8>,
1017 ignore_garbage: bool,
1018 out: &mut impl Write,
1019) -> io::Result<()> {
1020 if data.is_empty() {
1021 return Ok(());
1022 }
1023
1024 if ignore_garbage {
1025 data.retain(|&b| is_base64_char(b));
1026 } else {
1027 strip_whitespace_inplace(data);
1028 }
1029
1030 decode_clean_slice(data, out)
1031}
1032
1033fn strip_whitespace_inplace(data: &mut Vec<u8>) {
1038 if memchr::memchr2(b'\n', b'\r', data).is_none() {
1042 if data
1044 .iter()
1045 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c)
1046 {
1047 data.retain(|&b| NOT_WHITESPACE[b as usize]);
1048 }
1049 return;
1050 }
1051
1052 let ptr = data.as_mut_ptr();
1056 let len = data.len();
1057 let mut wp = 0usize;
1058 let mut gap_start = 0usize;
1059 let mut has_rare_ws = false;
1060
1061 for pos in memchr::memchr2_iter(b'\n', b'\r', data.as_slice()) {
1062 let gap_len = pos - gap_start;
1063 if gap_len > 0 {
1064 if !has_rare_ws {
1065 has_rare_ws = data[gap_start..pos]
1067 .iter()
1068 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c);
1069 }
1070 if wp != gap_start {
1071 unsafe {
1072 std::ptr::copy(ptr.add(gap_start), ptr.add(wp), gap_len);
1073 }
1074 }
1075 wp += gap_len;
1076 }
1077 gap_start = pos + 1;
1078 }
1079 let tail_len = len - gap_start;
1081 if tail_len > 0 {
1082 if !has_rare_ws {
1083 has_rare_ws = data[gap_start..]
1084 .iter()
1085 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c);
1086 }
1087 if wp != gap_start {
1088 unsafe {
1089 std::ptr::copy(ptr.add(gap_start), ptr.add(wp), tail_len);
1090 }
1091 }
1092 wp += tail_len;
1093 }
1094
1095 data.truncate(wp);
1096
1097 if has_rare_ws {
1100 let ptr = data.as_mut_ptr();
1101 let len = data.len();
1102 let mut rp = 0;
1103 let mut cwp = 0;
1104 while rp < len {
1105 let b = unsafe { *ptr.add(rp) };
1106 if NOT_WHITESPACE[b as usize] {
1107 unsafe { *ptr.add(cwp) = b };
1108 cwp += 1;
1109 }
1110 rp += 1;
1111 }
1112 data.truncate(cwp);
1113 }
1114}
1115
1116static NOT_WHITESPACE: [bool; 256] = {
1119 let mut table = [true; 256];
1120 table[b' ' as usize] = false;
1121 table[b'\t' as usize] = false;
1122 table[b'\n' as usize] = false;
1123 table[b'\r' as usize] = false;
1124 table[0x0b] = false; table[0x0c] = false; table
1127};
1128
1129fn try_decode_uniform_lines(data: &[u8], out: &mut impl Write) -> Option<io::Result<()>> {
1135 let first_nl = memchr::memchr(b'\n', data)?;
1136 let line_len = first_nl;
1137 if line_len == 0 || line_len % 4 != 0 {
1138 return None;
1139 }
1140
1141 let stride = line_len + 1;
1142
1143 let check_lines = 4.min(data.len() / stride);
1145 for i in 1..check_lines {
1146 let expected_nl = i * stride - 1;
1147 if expected_nl >= data.len() || data[expected_nl] != b'\n' {
1148 return None;
1149 }
1150 }
1151
1152 let full_lines = if data.len() >= stride {
1153 let candidate = data.len() / stride;
1154 if candidate > 0 && data[candidate * stride - 1] != b'\n' {
1155 return None;
1156 }
1157 candidate
1158 } else {
1159 0
1160 };
1161
1162 let remainder_start = full_lines * stride;
1163 let remainder = &data[remainder_start..];
1164 let rem_clean = if remainder.last() == Some(&b'\n') {
1165 &remainder[..remainder.len() - 1]
1166 } else {
1167 remainder
1168 };
1169
1170 let decoded_per_line = line_len * 3 / 4;
1172 let rem_decoded_size = if rem_clean.is_empty() {
1173 0
1174 } else {
1175 let pad = rem_clean
1176 .iter()
1177 .rev()
1178 .take(2)
1179 .filter(|&&b| b == b'=')
1180 .count();
1181 rem_clean.len() * 3 / 4 - pad
1182 };
1183 let total_decoded = full_lines * decoded_per_line + rem_decoded_size;
1184 let clean_len = full_lines * line_len;
1185
1186 if clean_len >= PARALLEL_DECODE_THRESHOLD && num_cpus() > 1 {
1190 let mut output: Vec<u8> = Vec::with_capacity(total_decoded);
1191 #[allow(clippy::uninit_vec)]
1192 unsafe {
1193 output.set_len(total_decoded);
1194 }
1195
1196 let out_ptr = output.as_mut_ptr() as usize;
1197 let src_ptr = data.as_ptr() as usize;
1198 let num_threads = num_cpus().max(1);
1199 let lines_per_thread = (full_lines + num_threads - 1) / num_threads;
1200 let lines_per_sub = (256 * 1024 / line_len).max(1);
1201
1202 let result: Result<(), io::Error> = std::thread::scope(|s| {
1203 let handles: Vec<_> = (0..num_threads)
1204 .map(|t| {
1205 s.spawn(move || -> Result<(), io::Error> {
1206 let start_line = t * lines_per_thread;
1207 if start_line >= full_lines {
1208 return Ok(());
1209 }
1210 let end_line = (start_line + lines_per_thread).min(full_lines);
1211 let chunk_lines = end_line - start_line;
1212
1213 let sub_buf_size = lines_per_sub.min(chunk_lines) * line_len;
1214 let mut local_buf: Vec<u8> = Vec::with_capacity(sub_buf_size);
1215 #[allow(clippy::uninit_vec)]
1216 unsafe {
1217 local_buf.set_len(sub_buf_size);
1218 }
1219
1220 let src = src_ptr as *const u8;
1221 let out_base = out_ptr as *mut u8;
1222 let local_dst = local_buf.as_mut_ptr();
1223
1224 let mut sub_start = 0usize;
1225 while sub_start < chunk_lines {
1226 let sub_count = (chunk_lines - sub_start).min(lines_per_sub);
1227 let sub_clean = sub_count * line_len;
1228
1229 for i in 0..sub_count {
1230 unsafe {
1231 std::ptr::copy_nonoverlapping(
1232 src.add((start_line + sub_start + i) * stride),
1233 local_dst.add(i * line_len),
1234 line_len,
1235 );
1236 }
1237 }
1238
1239 let out_offset = (start_line + sub_start) * decoded_per_line;
1240 let out_size = sub_count * decoded_per_line;
1241 let out_slice = unsafe {
1242 std::slice::from_raw_parts_mut(out_base.add(out_offset), out_size)
1243 };
1244 BASE64_ENGINE
1245 .decode(&local_buf[..sub_clean], out_slice.as_out())
1246 .map_err(|_| {
1247 io::Error::new(io::ErrorKind::InvalidData, "invalid input")
1248 })?;
1249
1250 sub_start += sub_count;
1251 }
1252 Ok(())
1253 })
1254 })
1255 .collect();
1256 for h in handles {
1257 h.join().unwrap()?;
1258 }
1259 Ok(())
1260 });
1261
1262 if let Err(e) = result {
1263 return Some(Err(e));
1264 }
1265
1266 if !rem_clean.is_empty() {
1267 let rem_out = &mut output[full_lines * decoded_per_line..total_decoded];
1268 match BASE64_ENGINE.decode(rem_clean, rem_out.as_out()) {
1269 Ok(_) => {}
1270 Err(_) => return Some(decode_error()),
1271 }
1272 }
1273
1274 return Some(out.write_all(&output[..total_decoded]));
1275 }
1276
1277 let lines_per_sub = (256 * 1024 / line_len).max(1);
1281 let sub_buf_size = lines_per_sub * line_len;
1282 let mut local_buf: Vec<u8> = Vec::with_capacity(sub_buf_size);
1283 #[allow(clippy::uninit_vec)]
1284 unsafe {
1285 local_buf.set_len(sub_buf_size);
1286 }
1287
1288 let src = data.as_ptr();
1289 let local_dst = local_buf.as_mut_ptr();
1290
1291 let mut line_idx = 0usize;
1292 while line_idx < full_lines {
1293 let sub_count = (full_lines - line_idx).min(lines_per_sub);
1294 let sub_clean = sub_count * line_len;
1295
1296 for i in 0..sub_count {
1297 unsafe {
1298 std::ptr::copy_nonoverlapping(
1299 src.add((line_idx + i) * stride),
1300 local_dst.add(i * line_len),
1301 line_len,
1302 );
1303 }
1304 }
1305
1306 match BASE64_ENGINE.decode_inplace(&mut local_buf[..sub_clean]) {
1307 Ok(decoded) => {
1308 if let Err(e) = out.write_all(decoded) {
1309 return Some(Err(e));
1310 }
1311 }
1312 Err(_) => return Some(decode_error()),
1313 }
1314
1315 line_idx += sub_count;
1316 }
1317
1318 if !rem_clean.is_empty() {
1319 let mut rem_buf = rem_clean.to_vec();
1320 match BASE64_ENGINE.decode_inplace(&mut rem_buf) {
1321 Ok(decoded) => {
1322 if let Err(e) = out.write_all(decoded) {
1323 return Some(Err(e));
1324 }
1325 }
1326 Err(_) => return Some(decode_error()),
1327 }
1328 }
1329
1330 Some(Ok(()))
1331}
1332
1333fn decode_stripping_whitespace(data: &[u8], out: &mut impl Write) -> io::Result<()> {
1338 if data.len() >= 77 {
1342 if let Some(result) = try_decode_uniform_lines(data, out) {
1343 return result;
1344 }
1345 }
1346
1347 if memchr::memchr2(b'\n', b'\r', data).is_none() {
1350 if !data
1352 .iter()
1353 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c)
1354 {
1355 return decode_borrowed_clean(out, data);
1356 }
1357 let mut cleaned: Vec<u8> = Vec::with_capacity(data.len());
1359 for &b in data {
1360 if NOT_WHITESPACE[b as usize] {
1361 cleaned.push(b);
1362 }
1363 }
1364 return decode_clean_slice(&mut cleaned, out);
1365 }
1366
1367 let mut clean: Vec<u8> = Vec::with_capacity(data.len());
1371 let dst = clean.as_mut_ptr();
1372 let mut wp = 0usize;
1373 let mut gap_start = 0usize;
1374 let mut has_rare_ws = false;
1377
1378 for pos in memchr::memchr2_iter(b'\n', b'\r', data) {
1379 let gap_len = pos - gap_start;
1380 if gap_len > 0 {
1381 if !has_rare_ws {
1384 has_rare_ws = data[gap_start..pos]
1385 .iter()
1386 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c);
1387 }
1388 unsafe {
1389 std::ptr::copy_nonoverlapping(data.as_ptr().add(gap_start), dst.add(wp), gap_len);
1390 }
1391 wp += gap_len;
1392 }
1393 gap_start = pos + 1;
1394 }
1395 let tail_len = data.len() - gap_start;
1397 if tail_len > 0 {
1398 if !has_rare_ws {
1399 has_rare_ws = data[gap_start..]
1400 .iter()
1401 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c);
1402 }
1403 unsafe {
1404 std::ptr::copy_nonoverlapping(data.as_ptr().add(gap_start), dst.add(wp), tail_len);
1405 }
1406 wp += tail_len;
1407 }
1408 unsafe {
1409 clean.set_len(wp);
1410 }
1411
1412 if has_rare_ws {
1415 let ptr = clean.as_mut_ptr();
1416 let len = clean.len();
1417 let mut rp = 0;
1418 let mut cwp = 0;
1419 while rp < len {
1420 let b = unsafe { *ptr.add(rp) };
1421 if NOT_WHITESPACE[b as usize] {
1422 unsafe { *ptr.add(cwp) = b };
1423 cwp += 1;
1424 }
1425 rp += 1;
1426 }
1427 clean.truncate(cwp);
1428 }
1429
1430 if clean.len() >= PARALLEL_DECODE_THRESHOLD {
1433 decode_borrowed_clean_parallel(out, &clean)
1434 } else {
1435 decode_clean_slice(&mut clean, out)
1436 }
1437}
1438
1439fn try_line_decode(data: &[u8], out: &mut impl Write) -> Option<io::Result<()>> {
1447 let first_nl = memchr::memchr(b'\n', data)?;
1449 let line_len = first_nl; if line_len == 0 || line_len % 4 != 0 {
1453 return None;
1454 }
1455
1456 let line_stride = line_len + 1; let decoded_per_line = line_len * 3 / 4;
1458
1459 let check_lines = 4.min(data.len() / line_stride);
1461 for i in 1..check_lines {
1462 let expected_nl = i * line_stride - 1;
1463 if expected_nl >= data.len() {
1464 break;
1465 }
1466 if data[expected_nl] != b'\n' {
1467 return None; }
1469 }
1470
1471 let full_lines = if data.len() >= line_stride {
1473 let candidate = data.len() / line_stride;
1475 if candidate > 0 && data[candidate * line_stride - 1] != b'\n' {
1477 return None; }
1479 candidate
1480 } else {
1481 0
1482 };
1483
1484 let remainder_start = full_lines * line_stride;
1485 let remainder = &data[remainder_start..];
1486
1487 let remainder_clean_len = if remainder.is_empty() {
1489 0
1490 } else {
1491 let rem = if remainder.last() == Some(&b'\n') {
1493 &remainder[..remainder.len() - 1]
1494 } else {
1495 remainder
1496 };
1497 if rem.is_empty() {
1498 0
1499 } else {
1500 let pad = rem.iter().rev().take(2).filter(|&&b| b == b'=').count();
1502 if rem.len() % 4 != 0 {
1503 return None; }
1505 rem.len() * 3 / 4 - pad
1506 }
1507 };
1508
1509 let total_decoded = full_lines * decoded_per_line + remainder_clean_len;
1514 let mut out_buf: Vec<u8> = Vec::with_capacity(total_decoded);
1515 #[allow(clippy::uninit_vec)]
1516 unsafe {
1517 out_buf.set_len(total_decoded);
1518 }
1519
1520 let dst = out_buf.as_mut_ptr();
1521
1522 if data.len() >= PARALLEL_DECODE_THRESHOLD && full_lines >= 64 {
1526 let out_addr = dst as usize;
1527 let num_threads = num_cpus().max(1);
1528 let lines_per_chunk = (full_lines / num_threads).max(1);
1529
1530 let mut tasks: Vec<(usize, usize)> = Vec::new();
1532 let mut line_off = 0;
1533 while line_off < full_lines {
1534 let end = (line_off + lines_per_chunk).min(full_lines);
1535 tasks.push((line_off, end));
1536 line_off = end;
1537 }
1538
1539 let decode_result: Result<(), io::Error> = std::thread::scope(|s| {
1540 let handles: Vec<_> = tasks
1541 .iter()
1542 .map(|&(start_line, end_line)| {
1543 s.spawn(move || -> Result<(), io::Error> {
1544 let out_ptr = out_addr as *mut u8;
1545 let mut i = start_line;
1546
1547 while i + 4 <= end_line {
1548 let in_base = i * line_stride;
1549 let ob = i * decoded_per_line;
1550 unsafe {
1551 let s0 = std::slice::from_raw_parts_mut(
1552 out_ptr.add(ob),
1553 decoded_per_line,
1554 );
1555 if BASE64_ENGINE
1556 .decode(&data[in_base..in_base + line_len], s0.as_out())
1557 .is_err()
1558 {
1559 return Err(io::Error::new(
1560 io::ErrorKind::InvalidData,
1561 "invalid input",
1562 ));
1563 }
1564 let s1 = std::slice::from_raw_parts_mut(
1565 out_ptr.add(ob + decoded_per_line),
1566 decoded_per_line,
1567 );
1568 if BASE64_ENGINE
1569 .decode(
1570 &data[in_base + line_stride
1571 ..in_base + line_stride + line_len],
1572 s1.as_out(),
1573 )
1574 .is_err()
1575 {
1576 return Err(io::Error::new(
1577 io::ErrorKind::InvalidData,
1578 "invalid input",
1579 ));
1580 }
1581 let s2 = std::slice::from_raw_parts_mut(
1582 out_ptr.add(ob + 2 * decoded_per_line),
1583 decoded_per_line,
1584 );
1585 if BASE64_ENGINE
1586 .decode(
1587 &data[in_base + 2 * line_stride
1588 ..in_base + 2 * line_stride + line_len],
1589 s2.as_out(),
1590 )
1591 .is_err()
1592 {
1593 return Err(io::Error::new(
1594 io::ErrorKind::InvalidData,
1595 "invalid input",
1596 ));
1597 }
1598 let s3 = std::slice::from_raw_parts_mut(
1599 out_ptr.add(ob + 3 * decoded_per_line),
1600 decoded_per_line,
1601 );
1602 if BASE64_ENGINE
1603 .decode(
1604 &data[in_base + 3 * line_stride
1605 ..in_base + 3 * line_stride + line_len],
1606 s3.as_out(),
1607 )
1608 .is_err()
1609 {
1610 return Err(io::Error::new(
1611 io::ErrorKind::InvalidData,
1612 "invalid input",
1613 ));
1614 }
1615 }
1616 i += 4;
1617 }
1618
1619 while i < end_line {
1620 let in_start = i * line_stride;
1621 let out_off = i * decoded_per_line;
1622 let out_slice = unsafe {
1623 std::slice::from_raw_parts_mut(
1624 out_ptr.add(out_off),
1625 decoded_per_line,
1626 )
1627 };
1628 if BASE64_ENGINE
1629 .decode(&data[in_start..in_start + line_len], out_slice.as_out())
1630 .is_err()
1631 {
1632 return Err(io::Error::new(
1633 io::ErrorKind::InvalidData,
1634 "invalid input",
1635 ));
1636 }
1637 i += 1;
1638 }
1639
1640 Ok(())
1641 })
1642 })
1643 .collect();
1644 for h in handles {
1645 h.join().unwrap()?;
1646 }
1647 Ok(())
1648 });
1649
1650 if decode_result.is_err() {
1651 return Some(decode_error());
1652 }
1653 } else {
1654 let mut i = 0;
1656
1657 while i + 4 <= full_lines {
1658 let in_base = i * line_stride;
1659 let out_base = i * decoded_per_line;
1660 unsafe {
1661 let s0 = std::slice::from_raw_parts_mut(dst.add(out_base), decoded_per_line);
1662 if BASE64_ENGINE
1663 .decode(&data[in_base..in_base + line_len], s0.as_out())
1664 .is_err()
1665 {
1666 return Some(decode_error());
1667 }
1668
1669 let s1 = std::slice::from_raw_parts_mut(
1670 dst.add(out_base + decoded_per_line),
1671 decoded_per_line,
1672 );
1673 if BASE64_ENGINE
1674 .decode(
1675 &data[in_base + line_stride..in_base + line_stride + line_len],
1676 s1.as_out(),
1677 )
1678 .is_err()
1679 {
1680 return Some(decode_error());
1681 }
1682
1683 let s2 = std::slice::from_raw_parts_mut(
1684 dst.add(out_base + 2 * decoded_per_line),
1685 decoded_per_line,
1686 );
1687 if BASE64_ENGINE
1688 .decode(
1689 &data[in_base + 2 * line_stride..in_base + 2 * line_stride + line_len],
1690 s2.as_out(),
1691 )
1692 .is_err()
1693 {
1694 return Some(decode_error());
1695 }
1696
1697 let s3 = std::slice::from_raw_parts_mut(
1698 dst.add(out_base + 3 * decoded_per_line),
1699 decoded_per_line,
1700 );
1701 if BASE64_ENGINE
1702 .decode(
1703 &data[in_base + 3 * line_stride..in_base + 3 * line_stride + line_len],
1704 s3.as_out(),
1705 )
1706 .is_err()
1707 {
1708 return Some(decode_error());
1709 }
1710 }
1711 i += 4;
1712 }
1713
1714 while i < full_lines {
1715 let in_start = i * line_stride;
1716 let in_end = in_start + line_len;
1717 let out_off = i * decoded_per_line;
1718 let out_slice =
1719 unsafe { std::slice::from_raw_parts_mut(dst.add(out_off), decoded_per_line) };
1720 match BASE64_ENGINE.decode(&data[in_start..in_end], out_slice.as_out()) {
1721 Ok(_) => {}
1722 Err(_) => return Some(decode_error()),
1723 }
1724 i += 1;
1725 }
1726 }
1727
1728 if remainder_clean_len > 0 {
1730 let rem = if remainder.last() == Some(&b'\n') {
1731 &remainder[..remainder.len() - 1]
1732 } else {
1733 remainder
1734 };
1735 let out_off = full_lines * decoded_per_line;
1736 let out_slice =
1737 unsafe { std::slice::from_raw_parts_mut(dst.add(out_off), remainder_clean_len) };
1738 match BASE64_ENGINE.decode(rem, out_slice.as_out()) {
1739 Ok(_) => {}
1740 Err(_) => return Some(decode_error()),
1741 }
1742 }
1743
1744 Some(out.write_all(&out_buf[..total_decoded]))
1746}
1747
1748fn decode_clean_slice(data: &mut [u8], out: &mut impl Write) -> io::Result<()> {
1750 if data.is_empty() {
1751 return Ok(());
1752 }
1753 match BASE64_ENGINE.decode_inplace(data) {
1754 Ok(decoded) => out.write_all(decoded),
1755 Err(_) => decode_error(),
1756 }
1757}
1758
1759#[cold]
1761#[inline(never)]
1762fn decode_error() -> io::Result<()> {
1763 Err(io::Error::new(io::ErrorKind::InvalidData, "invalid input"))
1764}
1765
1766fn decode_borrowed_clean(out: &mut impl Write, data: &[u8]) -> io::Result<()> {
1768 if data.is_empty() {
1769 return Ok(());
1770 }
1771 if data.len() >= PARALLEL_DECODE_THRESHOLD {
1774 return decode_borrowed_clean_parallel(out, data);
1775 }
1776 let pad = data.iter().rev().take(2).filter(|&&b| b == b'=').count();
1779 let decoded_size = data.len() * 3 / 4 - pad;
1780 let mut buf: Vec<u8> = Vec::with_capacity(decoded_size);
1781 #[allow(clippy::uninit_vec)]
1782 unsafe {
1783 buf.set_len(decoded_size);
1784 }
1785 match BASE64_ENGINE.decode(data, buf[..decoded_size].as_out()) {
1786 Ok(decoded) => {
1787 out.write_all(decoded)?;
1788 Ok(())
1789 }
1790 Err(_) => decode_error(),
1791 }
1792}
1793
1794fn decode_borrowed_clean_parallel(out: &mut impl Write, data: &[u8]) -> io::Result<()> {
1798 let num_threads = num_cpus().max(1);
1799 let raw_chunk = data.len() / num_threads;
1800 let chunk_size = ((raw_chunk + 3) / 4) * 4;
1802
1803 let chunks: Vec<&[u8]> = data.chunks(chunk_size.max(4)).collect();
1804
1805 let mut offsets: Vec<usize> = Vec::with_capacity(chunks.len() + 1);
1807 offsets.push(0);
1808 let mut total_decoded = 0usize;
1809 for (i, chunk) in chunks.iter().enumerate() {
1810 let decoded_size = if i == chunks.len() - 1 {
1811 let pad = chunk.iter().rev().take(2).filter(|&&b| b == b'=').count();
1812 chunk.len() * 3 / 4 - pad
1813 } else {
1814 chunk.len() * 3 / 4
1815 };
1816 total_decoded += decoded_size;
1817 offsets.push(total_decoded);
1818 }
1819
1820 let mut output_buf: Vec<u8> = Vec::with_capacity(total_decoded);
1821 #[allow(clippy::uninit_vec)]
1822 unsafe {
1823 output_buf.set_len(total_decoded);
1824 }
1825
1826 let out_addr = output_buf.as_mut_ptr() as usize;
1829 let decode_result: Result<(), io::Error> = std::thread::scope(|s| {
1830 let handles: Vec<_> = chunks
1831 .iter()
1832 .enumerate()
1833 .map(|(i, chunk)| {
1834 let offset = offsets[i];
1835 let expected_size = offsets[i + 1] - offset;
1836 s.spawn(move || -> Result<(), io::Error> {
1837 let out_slice = unsafe {
1839 std::slice::from_raw_parts_mut(
1840 (out_addr as *mut u8).add(offset),
1841 expected_size,
1842 )
1843 };
1844 let decoded = BASE64_ENGINE
1845 .decode(chunk, out_slice.as_out())
1846 .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid input"))?;
1847 debug_assert_eq!(decoded.len(), expected_size);
1848 Ok(())
1849 })
1850 })
1851 .collect();
1852 for h in handles {
1853 h.join().unwrap()?;
1854 }
1855 Ok(())
1856 });
1857
1858 decode_result?;
1859
1860 out.write_all(&output_buf[..total_decoded])
1861}
1862
1863fn strip_non_base64(data: &[u8]) -> Vec<u8> {
1865 data.iter()
1866 .copied()
1867 .filter(|&b| is_base64_char(b))
1868 .collect()
1869}
1870
1871#[inline]
1873fn is_base64_char(b: u8) -> bool {
1874 b.is_ascii_alphanumeric() || b == b'+' || b == b'/' || b == b'='
1875}
1876
1877pub fn encode_stream(
1880 reader: &mut impl Read,
1881 wrap_col: usize,
1882 writer: &mut impl Write,
1883) -> io::Result<()> {
1884 if wrap_col == 0 {
1885 return encode_stream_nowrap(reader, writer);
1886 }
1887 encode_stream_wrapped(reader, wrap_col, writer)
1888}
1889
1890fn encode_stream_nowrap(reader: &mut impl Read, writer: &mut impl Write) -> io::Result<()> {
1895 const NOWRAP_READ: usize = 24 * 1024 * 1024; let mut buf: Vec<u8> = Vec::with_capacity(NOWRAP_READ);
1901 #[allow(clippy::uninit_vec)]
1902 unsafe {
1903 buf.set_len(NOWRAP_READ);
1904 }
1905 let encode_buf_size = BASE64_ENGINE.encoded_length(NOWRAP_READ);
1906 let mut encode_buf: Vec<u8> = Vec::with_capacity(encode_buf_size);
1907 #[allow(clippy::uninit_vec)]
1908 unsafe {
1909 encode_buf.set_len(encode_buf_size);
1910 }
1911
1912 loop {
1913 let n = read_full(reader, &mut buf)?;
1914 if n == 0 {
1915 break;
1916 }
1917 let enc_len = BASE64_ENGINE.encoded_length(n);
1918 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
1919 writer.write_all(encoded)?;
1920 }
1921 Ok(())
1922}
1923
1924fn encode_stream_wrapped(
1932 reader: &mut impl Read,
1933 wrap_col: usize,
1934 writer: &mut impl Write,
1935) -> io::Result<()> {
1936 let bytes_per_line = wrap_col * 3 / 4;
1937 if bytes_per_line > 0 && bytes_per_line.is_multiple_of(3) {
1941 return encode_stream_wrapped_fused(reader, wrap_col, bytes_per_line, writer);
1942 }
1943
1944 const STREAM_READ: usize = 12 * 1024 * 1024;
1946 let mut buf: Vec<u8> = Vec::with_capacity(STREAM_READ);
1947 #[allow(clippy::uninit_vec)]
1948 unsafe {
1949 buf.set_len(STREAM_READ);
1950 }
1951 let encode_buf_size = BASE64_ENGINE.encoded_length(STREAM_READ);
1952 let mut encode_buf: Vec<u8> = Vec::with_capacity(encode_buf_size);
1953 #[allow(clippy::uninit_vec)]
1954 unsafe {
1955 encode_buf.set_len(encode_buf_size);
1956 }
1957
1958 let mut col = 0usize;
1959
1960 loop {
1961 let n = read_full(reader, &mut buf)?;
1962 if n == 0 {
1963 break;
1964 }
1965 let enc_len = BASE64_ENGINE.encoded_length(n);
1966 let encoded = BASE64_ENGINE.encode(&buf[..n], encode_buf[..enc_len].as_out());
1967
1968 write_wrapped_iov_streaming(encoded, wrap_col, &mut col, writer)?;
1969 }
1970
1971 if col > 0 {
1972 writer.write_all(b"\n")?;
1973 }
1974
1975 Ok(())
1976}
1977
1978fn encode_stream_wrapped_fused(
1984 reader: &mut impl Read,
1985 wrap_col: usize,
1986 bytes_per_line: usize,
1987 writer: &mut impl Write,
1988) -> io::Result<()> {
1989 let lines_per_chunk = (24 * 1024 * 1024) / bytes_per_line;
1992 let read_size = lines_per_chunk * bytes_per_line;
1993 let line_out = wrap_col + 1; let mut buf: Vec<u8> = Vec::with_capacity(read_size);
1998 #[allow(clippy::uninit_vec)]
1999 unsafe {
2000 buf.set_len(read_size);
2001 }
2002 let max_output = lines_per_chunk * line_out + BASE64_ENGINE.encoded_length(bytes_per_line) + 2;
2004 let mut out_buf: Vec<u8> = Vec::with_capacity(max_output);
2005 #[allow(clippy::uninit_vec)]
2006 unsafe {
2007 out_buf.set_len(max_output);
2008 }
2009
2010 loop {
2011 let n = read_full(reader, &mut buf)?;
2012 if n == 0 {
2013 break;
2014 }
2015
2016 let full_lines = n / bytes_per_line;
2017 let remainder = n % bytes_per_line;
2018
2019 let dst = out_buf.as_mut_ptr();
2023 let mut line_idx = 0;
2024
2025 while line_idx + 4 <= full_lines {
2027 let in_base = line_idx * bytes_per_line;
2028 let out_base = line_idx * line_out;
2029 unsafe {
2030 let s0 = std::slice::from_raw_parts_mut(dst.add(out_base), wrap_col);
2031 let _ = BASE64_ENGINE.encode(&buf[in_base..in_base + bytes_per_line], s0.as_out());
2032 *dst.add(out_base + wrap_col) = b'\n';
2033
2034 let s1 = std::slice::from_raw_parts_mut(dst.add(out_base + line_out), wrap_col);
2035 let _ = BASE64_ENGINE.encode(
2036 &buf[in_base + bytes_per_line..in_base + 2 * bytes_per_line],
2037 s1.as_out(),
2038 );
2039 *dst.add(out_base + line_out + wrap_col) = b'\n';
2040
2041 let s2 = std::slice::from_raw_parts_mut(dst.add(out_base + 2 * line_out), wrap_col);
2042 let _ = BASE64_ENGINE.encode(
2043 &buf[in_base + 2 * bytes_per_line..in_base + 3 * bytes_per_line],
2044 s2.as_out(),
2045 );
2046 *dst.add(out_base + 2 * line_out + wrap_col) = b'\n';
2047
2048 let s3 = std::slice::from_raw_parts_mut(dst.add(out_base + 3 * line_out), wrap_col);
2049 let _ = BASE64_ENGINE.encode(
2050 &buf[in_base + 3 * bytes_per_line..in_base + 4 * bytes_per_line],
2051 s3.as_out(),
2052 );
2053 *dst.add(out_base + 3 * line_out + wrap_col) = b'\n';
2054 }
2055 line_idx += 4;
2056 }
2057
2058 while line_idx < full_lines {
2060 let in_base = line_idx * bytes_per_line;
2061 let out_base = line_idx * line_out;
2062 unsafe {
2063 let s = std::slice::from_raw_parts_mut(dst.add(out_base), wrap_col);
2064 let _ = BASE64_ENGINE.encode(&buf[in_base..in_base + bytes_per_line], s.as_out());
2065 *dst.add(out_base + wrap_col) = b'\n';
2066 }
2067 line_idx += 1;
2068 }
2069
2070 let mut wp = full_lines * line_out;
2071
2072 if remainder > 0 {
2074 let enc_len = BASE64_ENGINE.encoded_length(remainder);
2075 let line_input = &buf[full_lines * bytes_per_line..n];
2076 unsafe {
2077 let s = std::slice::from_raw_parts_mut(dst.add(wp), enc_len);
2078 let _ = BASE64_ENGINE.encode(line_input, s.as_out());
2079 *dst.add(wp + enc_len) = b'\n';
2080 }
2081 wp += enc_len + 1;
2082 }
2083
2084 writer.write_all(&out_buf[..wp])?;
2085 }
2086
2087 Ok(())
2088}
2089
2090pub fn decode_stream(
2097 reader: &mut impl Read,
2098 ignore_garbage: bool,
2099 writer: &mut impl Write,
2100) -> io::Result<()> {
2101 const READ_CHUNK: usize = 32 * 1024 * 1024;
2102 let mut buf: Vec<u8> = Vec::with_capacity(READ_CHUNK + 4);
2105 #[allow(clippy::uninit_vec)]
2106 unsafe {
2107 buf.set_len(READ_CHUNK + 4);
2108 }
2109 let mut carry = [0u8; 4];
2110 let mut carry_len = 0usize;
2111
2112 loop {
2113 if carry_len > 0 {
2115 unsafe {
2116 std::ptr::copy_nonoverlapping(carry.as_ptr(), buf.as_mut_ptr(), carry_len);
2117 }
2118 }
2119 let n = read_full(reader, &mut buf[carry_len..carry_len + READ_CHUNK])?;
2120 if n == 0 {
2121 break;
2122 }
2123 let total_raw = carry_len + n;
2124
2125 let clean_len = if ignore_garbage {
2128 let ptr = buf.as_mut_ptr();
2130 let mut wp = 0usize;
2131 for i in 0..total_raw {
2132 let b = unsafe { *ptr.add(i) };
2133 if is_base64_char(b) {
2134 unsafe { *ptr.add(wp) = b };
2135 wp += 1;
2136 }
2137 }
2138 wp
2139 } else {
2140 let ptr = buf.as_mut_ptr();
2144 let data = &buf[..total_raw];
2145 let mut wp = 0usize;
2146 let mut gap_start = 0usize;
2147 let mut has_rare_ws = false;
2148
2149 for pos in memchr::memchr2_iter(b'\n', b'\r', data) {
2150 let gap_len = pos - gap_start;
2151 if gap_len > 0 {
2152 if !has_rare_ws {
2153 has_rare_ws = data[gap_start..pos]
2154 .iter()
2155 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c);
2156 }
2157 if wp != gap_start {
2158 unsafe {
2159 std::ptr::copy(ptr.add(gap_start), ptr.add(wp), gap_len);
2160 }
2161 }
2162 wp += gap_len;
2163 }
2164 gap_start = pos + 1;
2165 }
2166 let tail_len = total_raw - gap_start;
2167 if tail_len > 0 {
2168 if !has_rare_ws {
2169 has_rare_ws = data[gap_start..total_raw]
2170 .iter()
2171 .any(|&b| b == b' ' || b == b'\t' || b == 0x0b || b == 0x0c);
2172 }
2173 if wp != gap_start {
2174 unsafe {
2175 std::ptr::copy(ptr.add(gap_start), ptr.add(wp), tail_len);
2176 }
2177 }
2178 wp += tail_len;
2179 }
2180
2181 if has_rare_ws {
2183 let mut rp = 0;
2184 let mut cwp = 0;
2185 while rp < wp {
2186 let b = unsafe { *ptr.add(rp) };
2187 if NOT_WHITESPACE[b as usize] {
2188 unsafe { *ptr.add(cwp) = b };
2189 cwp += 1;
2190 }
2191 rp += 1;
2192 }
2193 cwp
2194 } else {
2195 wp
2196 }
2197 };
2198
2199 carry_len = 0;
2200 let is_last = n < READ_CHUNK;
2201
2202 if is_last {
2203 decode_clean_slice(&mut buf[..clean_len], writer)?;
2205 } else {
2206 let decode_len = (clean_len / 4) * 4;
2208 let leftover = clean_len - decode_len;
2209 if leftover > 0 {
2210 unsafe {
2211 std::ptr::copy_nonoverlapping(
2212 buf.as_ptr().add(decode_len),
2213 carry.as_mut_ptr(),
2214 leftover,
2215 );
2216 }
2217 carry_len = leftover;
2218 }
2219 if decode_len > 0 {
2220 decode_clean_slice(&mut buf[..decode_len], writer)?;
2221 }
2222 }
2223 }
2224
2225 if carry_len > 0 {
2227 let mut carry_buf = carry[..carry_len].to_vec();
2228 decode_clean_slice(&mut carry_buf, writer)?;
2229 }
2230
2231 Ok(())
2232}
2233
2234#[inline(always)]
2238fn write_all_vectored(out: &mut impl Write, slices: &[io::IoSlice]) -> io::Result<()> {
2239 if slices.is_empty() {
2240 return Ok(());
2241 }
2242 let total: usize = slices.iter().map(|s| s.len()).sum();
2243 let written = out.write_vectored(slices)?;
2244 if written >= total {
2245 return Ok(());
2246 }
2247 if written == 0 {
2248 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
2249 }
2250 write_all_vectored_slow(out, slices, written)
2251}
2252
2253#[cold]
2255#[inline(never)]
2256fn write_all_vectored_slow(
2257 out: &mut impl Write,
2258 slices: &[io::IoSlice],
2259 mut skip: usize,
2260) -> io::Result<()> {
2261 for slice in slices {
2262 let len = slice.len();
2263 if skip >= len {
2264 skip -= len;
2265 continue;
2266 }
2267 out.write_all(&slice[skip..])?;
2268 skip = 0;
2269 }
2270 Ok(())
2271}
2272
2273#[inline]
2277fn read_full(reader: &mut impl Read, buf: &mut [u8]) -> io::Result<usize> {
2278 let n = reader.read(buf)?;
2280 if n == buf.len() || n == 0 {
2281 return Ok(n);
2282 }
2283 let mut total = n;
2285 while total < buf.len() {
2286 match reader.read(&mut buf[total..]) {
2287 Ok(0) => break,
2288 Ok(n) => total += n,
2289 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2290 Err(e) => return Err(e),
2291 }
2292 }
2293 Ok(total)
2294}