Skip to main content

coreutils_rs/paste/
core.rs

1use std::io::Write;
2
3/// Configuration for the paste command.
4pub struct PasteConfig {
5    /// Delimiter characters, cycled through columns.
6    pub delimiters: Vec<u8>,
7    /// Serial mode: paste one file at a time.
8    pub serial: bool,
9    /// Use NUL as line terminator instead of newline.
10    pub zero_terminated: bool,
11}
12
13impl Default for PasteConfig {
14    fn default() -> Self {
15        Self {
16            delimiters: vec![b'\t'],
17            serial: false,
18            zero_terminated: false,
19        }
20    }
21}
22
23/// Parse delimiter string with escape sequences.
24/// Supports: \n (newline), \t (tab), \\ (backslash), \0 (NUL), empty string (no delimiter).
25pub fn parse_delimiters(s: &str) -> Vec<u8> {
26    if s.is_empty() {
27        return Vec::new();
28    }
29    let bytes = s.as_bytes();
30    let mut result = Vec::with_capacity(bytes.len());
31    let mut i = 0;
32    while i < bytes.len() {
33        if bytes[i] == b'\\' && i + 1 < bytes.len() {
34            match bytes[i + 1] {
35                b'n' => {
36                    result.push(b'\n');
37                    i += 2;
38                }
39                b't' => {
40                    result.push(b'\t');
41                    i += 2;
42                }
43                b'\\' => {
44                    result.push(b'\\');
45                    i += 2;
46                }
47                b'0' => {
48                    result.push(0);
49                    i += 2;
50                }
51                _ => {
52                    // Unknown escape: treat backslash as literal
53                    result.push(b'\\');
54                    i += 1;
55                }
56            }
57        } else {
58            result.push(bytes[i]);
59            i += 1;
60        }
61    }
62    result
63}
64
65/// Output buffer size for streaming paste (2 MiB).
66const BUF_SIZE: usize = 2 * 1024 * 1024;
67
68/// Raw write to stdout fd 1. Returns any error encountered.
69#[cfg(unix)]
70pub fn raw_write_all(data: &[u8]) -> std::io::Result<()> {
71    let mut written = 0;
72    while written < data.len() {
73        let ret = unsafe {
74            libc::write(
75                1,
76                data[written..].as_ptr() as *const libc::c_void,
77                (data.len() - written) as _,
78            )
79        };
80        if ret > 0 {
81            written += ret as usize;
82        } else if ret == 0 {
83            return Err(std::io::Error::new(
84                std::io::ErrorKind::WriteZero,
85                "write returned 0",
86            ));
87        } else {
88            let err = std::io::Error::last_os_error();
89            if err.kind() == std::io::ErrorKind::Interrupted {
90                continue;
91            }
92            return Err(err);
93        }
94    }
95    Ok(())
96}
97
98#[cfg(not(unix))]
99pub fn raw_write_all(data: &[u8]) -> std::io::Result<()> {
100    let stdout = std::io::stdout();
101    let mut lock = stdout.lock();
102    lock.write_all(data)?;
103    lock.flush()
104}
105
106/// Streaming paste for the parallel (normal) mode.
107/// Scans each file line-by-line with memchr on-the-fly — no pre-split offset arrays.
108/// Uses a single 2MB output buffer with raw fd writes.
109pub fn paste_parallel_stream(file_data: &[&[u8]], config: &PasteConfig) -> std::io::Result<()> {
110    let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
111    let delims = &config.delimiters;
112    let has_delims = !delims.is_empty();
113    let nfiles = file_data.len();
114
115    if nfiles == 0 || file_data.iter().all(|d| d.is_empty()) {
116        return Ok(());
117    }
118
119    // Fast path: single file is a passthrough (output == input)
120    if nfiles == 1 {
121        let data = file_data[0];
122        if data.is_empty() {
123            return Ok(());
124        }
125        if *data.last().unwrap() == terminator {
126            return raw_write_all(data);
127        }
128        raw_write_all(data)?;
129        return raw_write_all(&[terminator]);
130    }
131
132    // Fast path: 2 files with single-byte delimiter (the most common case)
133    if nfiles == 2 && delims.len() == 1 {
134        return paste_two_files_streaming(file_data[0], file_data[1], delims[0], terminator);
135    }
136
137    // General N-file streaming paste
138    paste_n_files_streaming(file_data, delims, has_delims, nfiles, terminator)
139}
140
141/// Fast path for 2-file paste: uses memchr_iter iterators advanced in lockstep.
142/// Zero allocation for line offsets — each iterator maintains its internal SIMD state.
143/// memchr_iter amortizes SIMD setup across the entire file scan, avoiding per-line
144/// memchr call overhead.
145fn paste_two_files_streaming(
146    data_a: &[u8],
147    data_b: &[u8],
148    delim: u8,
149    terminator: u8,
150) -> std::io::Result<()> {
151    if data_a.is_empty() && data_b.is_empty() {
152        return Ok(());
153    }
154
155    let ptr_a = data_a.as_ptr();
156    let ptr_b = data_b.as_ptr();
157    let len_a = data_a.len();
158    let len_b = data_b.len();
159
160    let buf_cap = BUF_SIZE;
161    let mut buf: Vec<u8> = Vec::with_capacity(buf_cap + 65536);
162    let mut pos: usize = 0;
163
164    // Use memchr_iter to scan both files — no per-line memchr calls, no offset arrays.
165    let mut iter_a = memchr::memchr_iter(terminator, data_a);
166    let mut iter_b = memchr::memchr_iter(terminator, data_b);
167
168    let mut cur_a: usize = 0; // start of current line in A
169    let mut cur_b: usize = 0; // start of current line in B
170    let mut done_a = len_a == 0;
171    let mut done_b = len_b == 0;
172
173    while !done_a || !done_b {
174        // Advance file A iterator to get next line
175        let (a_start, a_len, a_has_line) = if !done_a {
176            match iter_a.next() {
177                Some(nl_pos) => {
178                    let start = cur_a;
179                    let line_len = nl_pos - cur_a;
180                    cur_a = nl_pos + 1;
181                    (start, line_len, true)
182                }
183                None => {
184                    // No more newlines — check for trailing data without terminator
185                    done_a = true;
186                    if cur_a < len_a {
187                        let start = cur_a;
188                        let line_len = len_a - cur_a;
189                        cur_a = len_a;
190                        (start, line_len, true)
191                    } else {
192                        (0, 0, false)
193                    }
194                }
195            }
196        } else {
197            (0, 0, false)
198        };
199
200        // Advance file B iterator to get next line
201        let (b_start, b_len, b_has_line) = if !done_b {
202            match iter_b.next() {
203                Some(nl_pos) => {
204                    let start = cur_b;
205                    let line_len = nl_pos - cur_b;
206                    cur_b = nl_pos + 1;
207                    (start, line_len, true)
208                }
209                None => {
210                    done_b = true;
211                    if cur_b < len_b {
212                        let start = cur_b;
213                        let line_len = len_b - cur_b;
214                        cur_b = len_b;
215                        (start, line_len, true)
216                    } else {
217                        (0, 0, false)
218                    }
219                }
220            }
221        } else {
222            (0, 0, false)
223        };
224
225        // If neither file produced a line this iteration, we're truly done.
226        if !a_has_line && !b_has_line {
227            break;
228        }
229
230        debug_assert!(a_start + a_len <= len_a, "a out of bounds");
231        debug_assert!(b_start + b_len <= len_b, "b out of bounds");
232        // On 64-bit: overflow requires a 9 EiB file. On 32-bit: a single
233        // line cannot exceed 2 GiB (unmappable in 4 GiB address space).
234        debug_assert!(a_len < isize::MAX as usize && b_len < isize::MAX as usize);
235        debug_assert!(
236            a_len
237                .checked_add(b_len)
238                .and_then(|x| x.checked_add(2))
239                .is_some()
240        );
241        let out_len = a_len + b_len + 2;
242
243        // Flush if needed
244        if pos + out_len > buf.capacity() {
245            unsafe { buf.set_len(pos) };
246            raw_write_all(&buf)?;
247            buf.clear();
248            pos = 0;
249            if out_len > buf.capacity() {
250                buf.reserve(out_len);
251            }
252        }
253
254        // Write: lineA + delim + lineB + terminator
255        unsafe {
256            let base = buf.as_mut_ptr();
257            if a_len > 0 {
258                std::ptr::copy_nonoverlapping(ptr_a.add(a_start), base.add(pos), a_len);
259                pos += a_len;
260            }
261            *base.add(pos) = delim;
262            pos += 1;
263            if b_len > 0 {
264                std::ptr::copy_nonoverlapping(ptr_b.add(b_start), base.add(pos), b_len);
265                pos += b_len;
266            }
267            *base.add(pos) = terminator;
268            pos += 1;
269        }
270
271        // Periodic flush
272        if pos >= buf_cap {
273            unsafe { buf.set_len(pos) };
274            raw_write_all(&buf)?;
275            buf.clear();
276            pos = 0;
277        }
278    }
279
280    // Final flush
281    if pos > 0 {
282        unsafe { buf.set_len(pos) };
283        raw_write_all(&buf)?;
284    }
285
286    Ok(())
287}
288
289/// General N-file streaming paste using memchr_iter iterators in lockstep.
290/// Each file has its own memchr_iter, cursor position, and done flag.
291fn paste_n_files_streaming(
292    file_data: &[&[u8]],
293    delims: &[u8],
294    has_delims: bool,
295    nfiles: usize,
296    terminator: u8,
297) -> std::io::Result<()> {
298    // The saved_pos rewind relies on delimiter-only writes (at most nfiles-1
299    // bytes per iteration) never exceeding the 65536-byte capacity headroom.
300    // When has_delims is false, no delimiter bytes are written, so the limit
301    // does not apply. nfiles files produce nfiles-1 delimiters per row.
302    if nfiles > 65536 {
303        return Err(std::io::Error::other("too many files"));
304    }
305
306    let mut cursors: Vec<usize> = vec![0; nfiles];
307    let mut done: Vec<bool> = file_data.iter().map(|d| d.is_empty()).collect();
308    let mut files_remaining = done.iter().filter(|&&d| !d).count();
309
310    let buf_cap = BUF_SIZE;
311    let mut buf: Vec<u8> = Vec::with_capacity(buf_cap + 65536);
312    let mut pos: usize = 0;
313
314    // Create memchr_iter for each file
315    let mut iters: Vec<memchr::Memchr<'_>> = file_data
316        .iter()
317        .map(|d| memchr::memchr_iter(terminator, d))
318        .collect();
319
320    while files_remaining > 0 {
321        // Save buffer position so we can rewind if no file produces a line.
322        // Rewind safety depends on three invariants:
323        // (1) Data flushes (line-copy paths) only fire when line_len > 0 or rem > 0,
324        //     both of which set any_iter_advanced = true. So if !any_iter_advanced,
325        //     no data flush happened and saved_pos was never invalidated.
326        // (2) Delimiter flush cannot fire: the nfiles guard + 65536-byte headroom
327        //     ensures pos + 1 <= buf.capacity() for delimiter-only writes.
328        // (3) After a data flush, pos resets to 0, so the delimiter-flush guard
329        //     becomes trivially false again.
330        debug_assert!(
331            pos < buf_cap,
332            "saved_pos invariant: pos must be < buf_cap at iteration start"
333        );
334        let saved_pos = pos;
335        let mut any_iter_advanced = false;
336
337        for file_idx in 0..nfiles {
338            // Delimiter before columns 1..N
339            if file_idx > 0 && has_delims {
340                // SAFETY: has_delims guarantees delims.len() > 0, making modulo index valid
341                let d = unsafe { *delims.get_unchecked((file_idx - 1) % delims.len()) };
342                // Safety of saved_pos rewind:
343                // 1. nfiles <= 65536 (checked above) so delimiter-only writes <= 65535 bytes
344                // 2. buf capacity = buf_cap + 65536, so delimiter writes never trigger flush
345                // 3. Therefore saved_pos remains valid throughout the iteration
346                debug_assert!(
347                    pos < buf.capacity(),
348                    "delimiter flush should be unreachable under nfiles invariant"
349                );
350                if pos >= buf.capacity() {
351                    unsafe { buf.set_len(pos) };
352                    raw_write_all(&buf)?;
353                    buf.clear();
354                    pos = 0;
355                }
356                unsafe { *buf.as_mut_ptr().add(pos) = d };
357                pos += 1;
358            }
359
360            if !done[file_idx] {
361                let data = file_data[file_idx];
362                let cur = cursors[file_idx];
363
364                match iters[file_idx].next() {
365                    Some(nl_pos) => {
366                        let line_len = nl_pos - cur;
367                        any_iter_advanced = true;
368                        if line_len > 0 {
369                            if pos + line_len > buf.capacity() {
370                                unsafe { buf.set_len(pos) };
371                                raw_write_all(&buf)?;
372                                buf.clear();
373                                pos = 0;
374                                if line_len > buf.capacity() {
375                                    buf.reserve(line_len + 4096);
376                                }
377                            }
378                            unsafe {
379                                std::ptr::copy_nonoverlapping(
380                                    data.as_ptr().add(cur),
381                                    buf.as_mut_ptr().add(pos),
382                                    line_len,
383                                );
384                            }
385                            pos += line_len;
386                        }
387                        cursors[file_idx] = nl_pos + 1;
388                    }
389                    None => {
390                        // No more newlines — check for trailing data
391                        let rem = data.len() - cur;
392                        if rem > 0 {
393                            any_iter_advanced = true;
394                            if pos + rem > buf.capacity() {
395                                unsafe { buf.set_len(pos) };
396                                raw_write_all(&buf)?;
397                                buf.clear();
398                                pos = 0;
399                                if rem > buf.capacity() {
400                                    buf.reserve(rem + 4096);
401                                }
402                            }
403                            unsafe {
404                                std::ptr::copy_nonoverlapping(
405                                    data.as_ptr().add(cur),
406                                    buf.as_mut_ptr().add(pos),
407                                    rem,
408                                );
409                            }
410                            pos += rem;
411                        }
412                        done[file_idx] = true;
413                        files_remaining -= 1;
414                        cursors[file_idx] = data.len();
415                    }
416                }
417            }
418        }
419
420        if !any_iter_advanced {
421            // Invariant: every remaining active file just exhausted with rem == 0,
422            // so files_remaining == 0 here. No content was produced this iteration;
423            // rewind the delimiters and break without writing a terminator.
424            // Rewind is safe: the nfiles guard (above) ensures delimiter-only
425            // writes cannot trigger a flush, and saved_pos remains valid.
426            debug_assert_eq!(files_remaining, 0);
427            pos = saved_pos;
428            break;
429        }
430
431        // Terminator
432        if pos >= buf.capacity() {
433            unsafe { buf.set_len(pos) };
434            raw_write_all(&buf)?;
435            buf.clear();
436            pos = 0;
437        }
438        unsafe { *buf.as_mut_ptr().add(pos) = terminator };
439        pos += 1;
440
441        // Flush when buffer is full
442        if pos >= buf_cap {
443            unsafe { buf.set_len(pos) };
444            raw_write_all(&buf)?;
445            buf.clear();
446            pos = 0;
447        }
448    }
449
450    // Final flush
451    if pos > 0 {
452        unsafe { buf.set_len(pos) };
453        raw_write_all(&buf)?;
454    }
455
456    Ok(())
457}
458
459/// Streaming paste for serial mode.
460/// For each file, join all lines with the delimiter list (cycling).
461pub fn paste_serial_stream(file_data: &[&[u8]], config: &PasteConfig) -> std::io::Result<()> {
462    let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
463    let delims = &config.delimiters;
464    let has_delims = !delims.is_empty();
465
466    // Fast path: single-delimiter serial mode — bulk copy with optional scatter replace.
467    // When delimiter != terminator: copy chunks and replace terminators with the delimiter.
468    // When delimiter == terminator (identity): copy chunks as-is, no replacement needed.
469    // Processes in BUF_SIZE chunks to avoid full-file allocation.
470    if has_delims && delims.len() == 1 {
471        let replacement = delims[0];
472        let needs_replace = replacement != terminator;
473        let mut buf: Vec<u8> = Vec::with_capacity(BUF_SIZE + 4096);
474
475        for data in file_data {
476            if data.is_empty() {
477                buf.push(terminator);
478                if buf.len() >= BUF_SIZE {
479                    raw_write_all(&buf)?;
480                    buf.clear();
481                }
482                continue;
483            }
484
485            // Strip trailing terminator — we'll add one at the end.
486            let effective = if data.last() == Some(&terminator) {
487                &data[..data.len() - 1]
488            } else {
489                data
490            };
491
492            // Process in chunks to avoid full-file allocation + page faults.
493            let mut cursor = 0usize;
494            while cursor < effective.len() {
495                let chunk_end = (cursor + BUF_SIZE).min(effective.len());
496                let chunk = &effective[cursor..chunk_end];
497                let start = buf.len();
498                buf.extend_from_slice(chunk);
499                // Replace terminators with delimiter only when they differ.
500                if needs_replace {
501                    for pos in memchr::memchr_iter(terminator, chunk) {
502                        buf[start + pos] = replacement;
503                    }
504                }
505                cursor = chunk_end;
506
507                if buf.len() >= BUF_SIZE {
508                    raw_write_all(&buf)?;
509                    buf.clear();
510                }
511            }
512
513            buf.push(terminator);
514            if buf.len() >= BUF_SIZE {
515                raw_write_all(&buf)?;
516                buf.clear();
517            }
518        }
519
520        if !buf.is_empty() {
521            raw_write_all(&buf)?;
522        }
523        return Ok(());
524    }
525
526    let mut buf: Vec<u8> = Vec::with_capacity(BUF_SIZE + 4096);
527
528    for data in file_data {
529        if data.is_empty() {
530            buf.push(terminator);
531            if buf.len() >= BUF_SIZE {
532                raw_write_all(&buf)?;
533                buf.clear();
534            }
535            continue;
536        }
537
538        let mut cursor = 0usize;
539        let mut line_idx = 0usize;
540        let mut iter = memchr::memchr_iter(terminator, data);
541
542        loop {
543            // Delimiter before lines 1..N
544            if line_idx > 0 && has_delims {
545                buf.push(delims[(line_idx - 1) % delims.len()]);
546            }
547
548            match iter.next() {
549                Some(nl_pos) => {
550                    let line = &data[cursor..nl_pos];
551                    if !line.is_empty() {
552                        if buf.len() + line.len() > buf.capacity() {
553                            raw_write_all(&buf)?;
554                            buf.clear();
555                            if line.len() > buf.capacity() {
556                                buf.reserve(line.len() + 4096);
557                            }
558                        }
559                        buf.extend_from_slice(line);
560                    }
561                    cursor = nl_pos + 1;
562                }
563                None => {
564                    // No more terminators — check for trailing data
565                    if cursor < data.len() {
566                        let remaining = &data[cursor..];
567                        if buf.len() + remaining.len() > buf.capacity() {
568                            raw_write_all(&buf)?;
569                            buf.clear();
570                            if remaining.len() > buf.capacity() {
571                                buf.reserve(remaining.len() + 4096);
572                            }
573                        }
574                        buf.extend_from_slice(remaining);
575                    }
576                    break;
577                }
578            }
579
580            line_idx += 1;
581
582            if buf.len() >= BUF_SIZE {
583                raw_write_all(&buf)?;
584                buf.clear();
585            }
586        }
587
588        buf.push(terminator);
589        if buf.len() >= BUF_SIZE {
590            raw_write_all(&buf)?;
591            buf.clear();
592        }
593    }
594
595    // Final flush
596    if !buf.is_empty() {
597        raw_write_all(&buf)?;
598    }
599
600    Ok(())
601}
602
603/// Streaming paste entry point. Writes directly to stdout using raw fd writes.
604pub fn paste_stream(file_data: &[&[u8]], config: &PasteConfig) -> std::io::Result<()> {
605    if config.serial {
606        paste_serial_stream(file_data, config)
607    } else {
608        paste_parallel_stream(file_data, config)
609    }
610}
611
612/// Pre-split a file into line offset pairs using a single SIMD memchr_iter pass.
613/// Returns a Vec of (start, end) byte offsets — one per line.
614#[inline]
615fn presplit_lines(data: &[u8], terminator: u8) -> Vec<(u32, u32)> {
616    if data.is_empty() {
617        return Vec::new();
618    }
619    assert!(
620        data.len() <= u32::MAX as usize,
621        "presplit_lines: data exceeds 4 GiB"
622    );
623    // Heuristic: assume average line length ~40 bytes to avoid a count pre-scan.
624    let estimated_lines = data.len() / 40 + 1;
625    let mut offsets = Vec::with_capacity(estimated_lines);
626    let mut start = 0u32;
627    for pos in memchr::memchr_iter(terminator, data) {
628        offsets.push((start, pos as u32));
629        start = pos as u32 + 1;
630    }
631    if data.last() != Some(&terminator) && (start as usize) < data.len() {
632        offsets.push((start, data.len() as u32));
633    }
634    offsets
635}
636
637/// Paste files in normal (parallel) mode and return the output buffer.
638/// Pre-splits files into line offsets (one SIMD pass each), then the main
639/// loop uses O(1) array indexing instead of per-line memchr calls.
640/// Uses unsafe raw pointer writes to eliminate bounds-check overhead.
641pub fn paste_parallel_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
642    let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
643    let delims = &config.delimiters;
644
645    if file_data.is_empty() || file_data.iter().all(|d| d.is_empty()) {
646        return Vec::new();
647    }
648
649    // Pre-split each file into line offsets — single SIMD pass per file.
650    let file_lines: Vec<Vec<(u32, u32)>> = file_data
651        .iter()
652        .map(|data| presplit_lines(data, terminator))
653        .collect();
654
655    let max_lines = file_lines.iter().map(|l| l.len()).max().unwrap_or(0);
656    if max_lines == 0 {
657        return Vec::new();
658    }
659
660    // Compute exact output size to avoid reallocation.
661    let nfiles = file_data.len();
662    let has_delims = !delims.is_empty();
663    let delims_per_line = if has_delims && nfiles > 1 {
664        nfiles - 1
665    } else {
666        0
667    };
668
669    let mut exact_size = max_lines * (delims_per_line + 1); // delimiters + terminators
670    for fl in &file_lines {
671        for &(s, e) in fl.iter() {
672            exact_size += (e - s) as usize;
673        }
674    }
675    // Empty-file lines contribute nothing but delimiter slots are already counted
676
677    let mut output = Vec::with_capacity(exact_size);
678
679    // SAFETY: We computed exact_size above. All writes go through raw pointers
680    // with total bytes written == exact_size. We set_len at the end.
681    unsafe {
682        let base: *mut u8 = output.as_mut_ptr();
683        let mut pos = 0usize;
684
685        for line_idx in 0..max_lines {
686            for file_idx in 0..nfiles {
687                if file_idx > 0 && has_delims {
688                    *base.add(pos) = delims[(file_idx - 1) % delims.len()];
689                    pos += 1;
690                }
691                let lines = &file_lines[file_idx];
692                if line_idx < lines.len() {
693                    let (s, e) = *lines.get_unchecked(line_idx);
694                    let len = (e - s) as usize;
695                    if len > 0 {
696                        std::ptr::copy_nonoverlapping(
697                            file_data.get_unchecked(file_idx).as_ptr().add(s as usize),
698                            base.add(pos),
699                            len,
700                        );
701                        pos += len;
702                    }
703                }
704            }
705            *base.add(pos) = terminator;
706            pos += 1;
707        }
708
709        assert_eq!(pos, exact_size, "exact_size miscalculated");
710        output.set_len(pos);
711    }
712
713    output
714}
715
716/// Paste files in serial mode and return the output buffer.
717/// For each file, join all lines with the delimiter list (cycling).
718pub fn paste_serial_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
719    let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
720    let delims = &config.delimiters;
721    let has_delims = !delims.is_empty();
722
723    let total_input: usize = file_data.iter().map(|d| d.len()).sum();
724    let mut output = Vec::with_capacity(total_input + file_data.len());
725
726    // Fast path: single delimiter — bulk copy with optional scatter replace.
727    // When delimiter != terminator: copy the file and replace terminators with the delimiter.
728    // When delimiter == terminator (identity): copy the file as-is, no replacement needed.
729    // Either way, avoids line-by-line presplit + extend_from_slice (~150K calls for 10MB).
730    if has_delims && delims.len() == 1 {
731        let delim = delims[0];
732        let needs_replace = delim != terminator;
733        for data in file_data {
734            if data.is_empty() {
735                output.push(terminator);
736                continue;
737            }
738            let effective = if data.last() == Some(&terminator) {
739                &data[..data.len() - 1]
740            } else {
741                *data
742            };
743            if effective.is_empty() {
744                output.push(terminator);
745                continue;
746            }
747            let start = output.len();
748            output.extend_from_slice(effective);
749            if needs_replace {
750                for pos in memchr::memchr_iter(terminator, effective) {
751                    output[start + pos] = delim;
752                }
753            }
754            output.push(terminator);
755        }
756        return output;
757    }
758
759    for data in file_data {
760        if data.is_empty() {
761            output.push(terminator);
762            continue;
763        }
764        let lines = presplit_lines(data, terminator);
765        if lines.is_empty() {
766            output.push(terminator);
767            continue;
768        }
769        let (s, e) = lines[0];
770        output.extend_from_slice(&data[s as usize..e as usize]);
771        for (i, &(s, e)) in lines[1..].iter().enumerate() {
772            if has_delims {
773                output.push(delims[i % delims.len()]);
774            }
775            output.extend_from_slice(&data[s as usize..e as usize]);
776        }
777        output.push(terminator);
778    }
779
780    output
781}
782
783/// Main paste entry point. Writes directly to the provided writer.
784pub fn paste(
785    file_data: &[&[u8]],
786    config: &PasteConfig,
787    out: &mut impl Write,
788) -> std::io::Result<()> {
789    let output = if config.serial {
790        paste_serial_to_vec(file_data, config)
791    } else {
792        paste_parallel_to_vec(file_data, config)
793    };
794    out.write_all(&output)
795}
796
797/// Build the paste output as a Vec, then return it for the caller to write.
798/// This allows the binary to use raw write() for maximum throughput.
799pub fn paste_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
800    if config.serial {
801        paste_serial_to_vec(file_data, config)
802    } else {
803        paste_parallel_to_vec(file_data, config)
804    }
805}