1use std::io::Write;
2
3pub struct PasteConfig {
5 pub delimiters: Vec<u8>,
7 pub serial: bool,
9 pub zero_terminated: bool,
11}
12
13impl Default for PasteConfig {
14 fn default() -> Self {
15 Self {
16 delimiters: vec![b'\t'],
17 serial: false,
18 zero_terminated: false,
19 }
20 }
21}
22
23pub fn parse_delimiters(s: &str) -> Vec<u8> {
26 if s.is_empty() {
27 return Vec::new();
28 }
29 let bytes = s.as_bytes();
30 let mut result = Vec::with_capacity(bytes.len());
31 let mut i = 0;
32 while i < bytes.len() {
33 if bytes[i] == b'\\' && i + 1 < bytes.len() {
34 match bytes[i + 1] {
35 b'n' => {
36 result.push(b'\n');
37 i += 2;
38 }
39 b't' => {
40 result.push(b'\t');
41 i += 2;
42 }
43 b'\\' => {
44 result.push(b'\\');
45 i += 2;
46 }
47 b'0' => {
48 result.push(0);
49 i += 2;
50 }
51 _ => {
52 result.push(b'\\');
54 i += 1;
55 }
56 }
57 } else {
58 result.push(bytes[i]);
59 i += 1;
60 }
61 }
62 result
63}
64
65const BUF_SIZE: usize = 2 * 1024 * 1024;
67
68#[cfg(unix)]
70pub fn raw_write_all(data: &[u8]) -> std::io::Result<()> {
71 let mut written = 0;
72 while written < data.len() {
73 let ret = unsafe {
74 libc::write(
75 1,
76 data[written..].as_ptr() as *const libc::c_void,
77 (data.len() - written) as _,
78 )
79 };
80 if ret > 0 {
81 written += ret as usize;
82 } else if ret == 0 {
83 return Err(std::io::Error::new(
84 std::io::ErrorKind::WriteZero,
85 "write returned 0",
86 ));
87 } else {
88 let err = std::io::Error::last_os_error();
89 if err.kind() == std::io::ErrorKind::Interrupted {
90 continue;
91 }
92 return Err(err);
93 }
94 }
95 Ok(())
96}
97
98#[cfg(not(unix))]
99pub fn raw_write_all(data: &[u8]) -> std::io::Result<()> {
100 let stdout = std::io::stdout();
101 let mut lock = stdout.lock();
102 lock.write_all(data)?;
103 lock.flush()
104}
105
106pub fn paste_parallel_stream(file_data: &[&[u8]], config: &PasteConfig) -> std::io::Result<()> {
110 let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
111 let delims = &config.delimiters;
112 let has_delims = !delims.is_empty();
113 let nfiles = file_data.len();
114
115 if nfiles == 0 || file_data.iter().all(|d| d.is_empty()) {
116 return Ok(());
117 }
118
119 if nfiles == 1 {
121 let data = file_data[0];
122 if data.is_empty() {
123 return Ok(());
124 }
125 if *data.last().unwrap() == terminator {
126 return raw_write_all(data);
127 }
128 raw_write_all(data)?;
129 return raw_write_all(&[terminator]);
130 }
131
132 if nfiles == 2 && delims.len() == 1 {
134 return paste_two_files_streaming(file_data[0], file_data[1], delims[0], terminator);
135 }
136
137 paste_n_files_streaming(file_data, delims, has_delims, nfiles, terminator)
139}
140
141fn paste_two_files_streaming(
146 data_a: &[u8],
147 data_b: &[u8],
148 delim: u8,
149 terminator: u8,
150) -> std::io::Result<()> {
151 if data_a.is_empty() && data_b.is_empty() {
152 return Ok(());
153 }
154
155 let ptr_a = data_a.as_ptr();
156 let ptr_b = data_b.as_ptr();
157 let len_a = data_a.len();
158 let len_b = data_b.len();
159
160 let buf_cap = BUF_SIZE;
161 let mut buf: Vec<u8> = Vec::with_capacity(buf_cap + 65536);
162 let mut pos: usize = 0;
163
164 let mut iter_a = memchr::memchr_iter(terminator, data_a);
166 let mut iter_b = memchr::memchr_iter(terminator, data_b);
167
168 let mut cur_a: usize = 0; let mut cur_b: usize = 0; let mut done_a = len_a == 0;
171 let mut done_b = len_b == 0;
172
173 while !done_a || !done_b {
174 let (a_start, a_len, a_has_line) = if !done_a {
176 match iter_a.next() {
177 Some(nl_pos) => {
178 let start = cur_a;
179 let line_len = nl_pos - cur_a;
180 cur_a = nl_pos + 1;
181 (start, line_len, true)
182 }
183 None => {
184 done_a = true;
186 if cur_a < len_a {
187 let start = cur_a;
188 let line_len = len_a - cur_a;
189 cur_a = len_a;
190 (start, line_len, true)
191 } else {
192 (0, 0, false)
193 }
194 }
195 }
196 } else {
197 (0, 0, false)
198 };
199
200 let (b_start, b_len, b_has_line) = if !done_b {
202 match iter_b.next() {
203 Some(nl_pos) => {
204 let start = cur_b;
205 let line_len = nl_pos - cur_b;
206 cur_b = nl_pos + 1;
207 (start, line_len, true)
208 }
209 None => {
210 done_b = true;
211 if cur_b < len_b {
212 let start = cur_b;
213 let line_len = len_b - cur_b;
214 cur_b = len_b;
215 (start, line_len, true)
216 } else {
217 (0, 0, false)
218 }
219 }
220 }
221 } else {
222 (0, 0, false)
223 };
224
225 if !a_has_line && !b_has_line {
227 break;
228 }
229
230 debug_assert!(a_start + a_len <= len_a, "a out of bounds");
231 debug_assert!(b_start + b_len <= len_b, "b out of bounds");
232 debug_assert!(a_len < isize::MAX as usize && b_len < isize::MAX as usize);
235 debug_assert!(
236 a_len
237 .checked_add(b_len)
238 .and_then(|x| x.checked_add(2))
239 .is_some()
240 );
241 let out_len = a_len + b_len + 2;
242
243 if pos + out_len > buf.capacity() {
245 unsafe { buf.set_len(pos) };
246 raw_write_all(&buf)?;
247 buf.clear();
248 pos = 0;
249 if out_len > buf.capacity() {
250 buf.reserve(out_len);
251 }
252 }
253
254 unsafe {
256 let base = buf.as_mut_ptr();
257 if a_len > 0 {
258 std::ptr::copy_nonoverlapping(ptr_a.add(a_start), base.add(pos), a_len);
259 pos += a_len;
260 }
261 *base.add(pos) = delim;
262 pos += 1;
263 if b_len > 0 {
264 std::ptr::copy_nonoverlapping(ptr_b.add(b_start), base.add(pos), b_len);
265 pos += b_len;
266 }
267 *base.add(pos) = terminator;
268 pos += 1;
269 }
270
271 if pos >= buf_cap {
273 unsafe { buf.set_len(pos) };
274 raw_write_all(&buf)?;
275 buf.clear();
276 pos = 0;
277 }
278 }
279
280 if pos > 0 {
282 unsafe { buf.set_len(pos) };
283 raw_write_all(&buf)?;
284 }
285
286 Ok(())
287}
288
289fn paste_n_files_streaming(
292 file_data: &[&[u8]],
293 delims: &[u8],
294 has_delims: bool,
295 nfiles: usize,
296 terminator: u8,
297) -> std::io::Result<()> {
298 if nfiles > 65536 {
303 return Err(std::io::Error::other("too many files"));
304 }
305
306 let mut cursors: Vec<usize> = vec![0; nfiles];
307 let mut done: Vec<bool> = file_data.iter().map(|d| d.is_empty()).collect();
308 let mut files_remaining = done.iter().filter(|&&d| !d).count();
309
310 let buf_cap = BUF_SIZE;
311 let mut buf: Vec<u8> = Vec::with_capacity(buf_cap + 65536);
312 let mut pos: usize = 0;
313
314 let mut iters: Vec<memchr::Memchr<'_>> = file_data
316 .iter()
317 .map(|d| memchr::memchr_iter(terminator, d))
318 .collect();
319
320 while files_remaining > 0 {
321 debug_assert!(
331 pos < buf_cap,
332 "saved_pos invariant: pos must be < buf_cap at iteration start"
333 );
334 let saved_pos = pos;
335 let mut any_iter_advanced = false;
336
337 for file_idx in 0..nfiles {
338 if file_idx > 0 && has_delims {
340 let d = unsafe { *delims.get_unchecked((file_idx - 1) % delims.len()) };
342 debug_assert!(
347 pos < buf.capacity(),
348 "delimiter flush should be unreachable under nfiles invariant"
349 );
350 if pos >= buf.capacity() {
351 unsafe { buf.set_len(pos) };
352 raw_write_all(&buf)?;
353 buf.clear();
354 pos = 0;
355 }
356 unsafe { *buf.as_mut_ptr().add(pos) = d };
357 pos += 1;
358 }
359
360 if !done[file_idx] {
361 let data = file_data[file_idx];
362 let cur = cursors[file_idx];
363
364 match iters[file_idx].next() {
365 Some(nl_pos) => {
366 let line_len = nl_pos - cur;
367 any_iter_advanced = true;
368 if line_len > 0 {
369 if pos + line_len > buf.capacity() {
370 unsafe { buf.set_len(pos) };
371 raw_write_all(&buf)?;
372 buf.clear();
373 pos = 0;
374 if line_len > buf.capacity() {
375 buf.reserve(line_len + 4096);
376 }
377 }
378 unsafe {
379 std::ptr::copy_nonoverlapping(
380 data.as_ptr().add(cur),
381 buf.as_mut_ptr().add(pos),
382 line_len,
383 );
384 }
385 pos += line_len;
386 }
387 cursors[file_idx] = nl_pos + 1;
388 }
389 None => {
390 let rem = data.len() - cur;
392 if rem > 0 {
393 any_iter_advanced = true;
394 if pos + rem > buf.capacity() {
395 unsafe { buf.set_len(pos) };
396 raw_write_all(&buf)?;
397 buf.clear();
398 pos = 0;
399 if rem > buf.capacity() {
400 buf.reserve(rem + 4096);
401 }
402 }
403 unsafe {
404 std::ptr::copy_nonoverlapping(
405 data.as_ptr().add(cur),
406 buf.as_mut_ptr().add(pos),
407 rem,
408 );
409 }
410 pos += rem;
411 }
412 done[file_idx] = true;
413 files_remaining -= 1;
414 cursors[file_idx] = data.len();
415 }
416 }
417 }
418 }
419
420 if !any_iter_advanced {
421 debug_assert_eq!(files_remaining, 0);
427 pos = saved_pos;
428 break;
429 }
430
431 if pos >= buf.capacity() {
433 unsafe { buf.set_len(pos) };
434 raw_write_all(&buf)?;
435 buf.clear();
436 pos = 0;
437 }
438 unsafe { *buf.as_mut_ptr().add(pos) = terminator };
439 pos += 1;
440
441 if pos >= buf_cap {
443 unsafe { buf.set_len(pos) };
444 raw_write_all(&buf)?;
445 buf.clear();
446 pos = 0;
447 }
448 }
449
450 if pos > 0 {
452 unsafe { buf.set_len(pos) };
453 raw_write_all(&buf)?;
454 }
455
456 Ok(())
457}
458
459pub fn paste_serial_stream(file_data: &[&[u8]], config: &PasteConfig) -> std::io::Result<()> {
462 let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
463 let delims = &config.delimiters;
464 let has_delims = !delims.is_empty();
465
466 if has_delims && delims.len() == 1 {
471 let replacement = delims[0];
472 let needs_replace = replacement != terminator;
473 let mut buf: Vec<u8> = Vec::with_capacity(BUF_SIZE + 4096);
474
475 for data in file_data {
476 if data.is_empty() {
477 buf.push(terminator);
478 if buf.len() >= BUF_SIZE {
479 raw_write_all(&buf)?;
480 buf.clear();
481 }
482 continue;
483 }
484
485 let effective = if data.last() == Some(&terminator) {
487 &data[..data.len() - 1]
488 } else {
489 data
490 };
491
492 let mut cursor = 0usize;
494 while cursor < effective.len() {
495 let chunk_end = (cursor + BUF_SIZE).min(effective.len());
496 let chunk = &effective[cursor..chunk_end];
497 let start = buf.len();
498 buf.extend_from_slice(chunk);
499 if needs_replace {
501 for pos in memchr::memchr_iter(terminator, chunk) {
502 buf[start + pos] = replacement;
503 }
504 }
505 cursor = chunk_end;
506
507 if buf.len() >= BUF_SIZE {
508 raw_write_all(&buf)?;
509 buf.clear();
510 }
511 }
512
513 buf.push(terminator);
514 if buf.len() >= BUF_SIZE {
515 raw_write_all(&buf)?;
516 buf.clear();
517 }
518 }
519
520 if !buf.is_empty() {
521 raw_write_all(&buf)?;
522 }
523 return Ok(());
524 }
525
526 let mut buf: Vec<u8> = Vec::with_capacity(BUF_SIZE + 4096);
527
528 for data in file_data {
529 if data.is_empty() {
530 buf.push(terminator);
531 if buf.len() >= BUF_SIZE {
532 raw_write_all(&buf)?;
533 buf.clear();
534 }
535 continue;
536 }
537
538 let mut cursor = 0usize;
539 let mut line_idx = 0usize;
540 let mut iter = memchr::memchr_iter(terminator, data);
541
542 loop {
543 if line_idx > 0 && has_delims {
545 buf.push(delims[(line_idx - 1) % delims.len()]);
546 }
547
548 match iter.next() {
549 Some(nl_pos) => {
550 let line = &data[cursor..nl_pos];
551 if !line.is_empty() {
552 if buf.len() + line.len() > buf.capacity() {
553 raw_write_all(&buf)?;
554 buf.clear();
555 if line.len() > buf.capacity() {
556 buf.reserve(line.len() + 4096);
557 }
558 }
559 buf.extend_from_slice(line);
560 }
561 cursor = nl_pos + 1;
562 }
563 None => {
564 if cursor < data.len() {
566 let remaining = &data[cursor..];
567 if buf.len() + remaining.len() > buf.capacity() {
568 raw_write_all(&buf)?;
569 buf.clear();
570 if remaining.len() > buf.capacity() {
571 buf.reserve(remaining.len() + 4096);
572 }
573 }
574 buf.extend_from_slice(remaining);
575 }
576 break;
577 }
578 }
579
580 line_idx += 1;
581
582 if buf.len() >= BUF_SIZE {
583 raw_write_all(&buf)?;
584 buf.clear();
585 }
586 }
587
588 buf.push(terminator);
589 if buf.len() >= BUF_SIZE {
590 raw_write_all(&buf)?;
591 buf.clear();
592 }
593 }
594
595 if !buf.is_empty() {
597 raw_write_all(&buf)?;
598 }
599
600 Ok(())
601}
602
603pub fn paste_stream(file_data: &[&[u8]], config: &PasteConfig) -> std::io::Result<()> {
605 if config.serial {
606 paste_serial_stream(file_data, config)
607 } else {
608 paste_parallel_stream(file_data, config)
609 }
610}
611
612#[inline]
615fn presplit_lines(data: &[u8], terminator: u8) -> Vec<(u32, u32)> {
616 if data.is_empty() {
617 return Vec::new();
618 }
619 assert!(
620 data.len() <= u32::MAX as usize,
621 "presplit_lines: data exceeds 4 GiB"
622 );
623 let estimated_lines = data.len() / 40 + 1;
625 let mut offsets = Vec::with_capacity(estimated_lines);
626 let mut start = 0u32;
627 for pos in memchr::memchr_iter(terminator, data) {
628 offsets.push((start, pos as u32));
629 start = pos as u32 + 1;
630 }
631 if data.last() != Some(&terminator) && (start as usize) < data.len() {
632 offsets.push((start, data.len() as u32));
633 }
634 offsets
635}
636
637pub fn paste_parallel_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
642 let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
643 let delims = &config.delimiters;
644
645 if file_data.is_empty() || file_data.iter().all(|d| d.is_empty()) {
646 return Vec::new();
647 }
648
649 let file_lines: Vec<Vec<(u32, u32)>> = file_data
651 .iter()
652 .map(|data| presplit_lines(data, terminator))
653 .collect();
654
655 let max_lines = file_lines.iter().map(|l| l.len()).max().unwrap_or(0);
656 if max_lines == 0 {
657 return Vec::new();
658 }
659
660 let nfiles = file_data.len();
662 let has_delims = !delims.is_empty();
663 let delims_per_line = if has_delims && nfiles > 1 {
664 nfiles - 1
665 } else {
666 0
667 };
668
669 let mut exact_size = max_lines * (delims_per_line + 1); for fl in &file_lines {
671 for &(s, e) in fl.iter() {
672 exact_size += (e - s) as usize;
673 }
674 }
675 let mut output = Vec::with_capacity(exact_size);
678
679 unsafe {
682 let base: *mut u8 = output.as_mut_ptr();
683 let mut pos = 0usize;
684
685 for line_idx in 0..max_lines {
686 for file_idx in 0..nfiles {
687 if file_idx > 0 && has_delims {
688 *base.add(pos) = delims[(file_idx - 1) % delims.len()];
689 pos += 1;
690 }
691 let lines = &file_lines[file_idx];
692 if line_idx < lines.len() {
693 let (s, e) = *lines.get_unchecked(line_idx);
694 let len = (e - s) as usize;
695 if len > 0 {
696 std::ptr::copy_nonoverlapping(
697 file_data.get_unchecked(file_idx).as_ptr().add(s as usize),
698 base.add(pos),
699 len,
700 );
701 pos += len;
702 }
703 }
704 }
705 *base.add(pos) = terminator;
706 pos += 1;
707 }
708
709 assert_eq!(pos, exact_size, "exact_size miscalculated");
710 output.set_len(pos);
711 }
712
713 output
714}
715
716pub fn paste_serial_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
719 let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
720 let delims = &config.delimiters;
721 let has_delims = !delims.is_empty();
722
723 let total_input: usize = file_data.iter().map(|d| d.len()).sum();
724 let mut output = Vec::with_capacity(total_input + file_data.len());
725
726 if has_delims && delims.len() == 1 {
731 let delim = delims[0];
732 let needs_replace = delim != terminator;
733 for data in file_data {
734 if data.is_empty() {
735 output.push(terminator);
736 continue;
737 }
738 let effective = if data.last() == Some(&terminator) {
739 &data[..data.len() - 1]
740 } else {
741 *data
742 };
743 if effective.is_empty() {
744 output.push(terminator);
745 continue;
746 }
747 let start = output.len();
748 output.extend_from_slice(effective);
749 if needs_replace {
750 for pos in memchr::memchr_iter(terminator, effective) {
751 output[start + pos] = delim;
752 }
753 }
754 output.push(terminator);
755 }
756 return output;
757 }
758
759 for data in file_data {
760 if data.is_empty() {
761 output.push(terminator);
762 continue;
763 }
764 let lines = presplit_lines(data, terminator);
765 if lines.is_empty() {
766 output.push(terminator);
767 continue;
768 }
769 let (s, e) = lines[0];
770 output.extend_from_slice(&data[s as usize..e as usize]);
771 for (i, &(s, e)) in lines[1..].iter().enumerate() {
772 if has_delims {
773 output.push(delims[i % delims.len()]);
774 }
775 output.extend_from_slice(&data[s as usize..e as usize]);
776 }
777 output.push(terminator);
778 }
779
780 output
781}
782
783pub fn paste(
785 file_data: &[&[u8]],
786 config: &PasteConfig,
787 out: &mut impl Write,
788) -> std::io::Result<()> {
789 let output = if config.serial {
790 paste_serial_to_vec(file_data, config)
791 } else {
792 paste_parallel_to_vec(file_data, config)
793 };
794 out.write_all(&output)
795}
796
797pub fn paste_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
800 if config.serial {
801 paste_serial_to_vec(file_data, config)
802 } else {
803 paste_parallel_to_vec(file_data, config)
804 }
805}