coreutils_rs/paste/
core.rs1use std::io::Write;
2
3pub struct PasteConfig {
5 pub delimiters: Vec<u8>,
7 pub serial: bool,
9 pub zero_terminated: bool,
11}
12
13impl Default for PasteConfig {
14 fn default() -> Self {
15 Self {
16 delimiters: vec![b'\t'],
17 serial: false,
18 zero_terminated: false,
19 }
20 }
21}
22
23pub fn parse_delimiters(s: &str) -> Vec<u8> {
26 if s.is_empty() {
27 return Vec::new();
28 }
29 let bytes = s.as_bytes();
30 let mut result = Vec::with_capacity(bytes.len());
31 let mut i = 0;
32 while i < bytes.len() {
33 if bytes[i] == b'\\' && i + 1 < bytes.len() {
34 match bytes[i + 1] {
35 b'n' => {
36 result.push(b'\n');
37 i += 2;
38 }
39 b't' => {
40 result.push(b'\t');
41 i += 2;
42 }
43 b'\\' => {
44 result.push(b'\\');
45 i += 2;
46 }
47 b'0' => {
48 result.push(0);
49 i += 2;
50 }
51 _ => {
52 result.push(b'\\');
54 i += 1;
55 }
56 }
57 } else {
58 result.push(bytes[i]);
59 i += 1;
60 }
61 }
62 result
63}
64
65#[inline]
68fn build_line_offsets(data: &[u8], terminator: u8) -> Vec<(usize, usize)> {
69 let mut offsets = Vec::new();
70 if data.is_empty() {
71 return offsets;
72 }
73 let count = memchr::memchr_iter(terminator, data).count()
75 + if data.last() != Some(&terminator) {
76 1
77 } else {
78 0
79 };
80 offsets.reserve_exact(count);
81 let mut start = 0;
82 for pos in memchr::memchr_iter(terminator, data) {
83 offsets.push((start, pos));
84 start = pos + 1;
85 }
86 if start < data.len() {
88 offsets.push((start, data.len()));
89 }
90 offsets
91}
92
93pub fn paste_parallel_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
96 let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
97
98 let all_offsets: Vec<Vec<(usize, usize)>> = file_data
100 .iter()
101 .map(|d| build_line_offsets(d, terminator))
102 .collect();
103
104 let max_lines = all_offsets.iter().map(|o| o.len()).max().unwrap_or(0);
105 if max_lines == 0 && file_data.iter().all(|d| d.is_empty()) {
106 return Vec::new();
107 }
108
109 let total_input: usize = file_data.iter().map(|d| d.len()).sum();
111 let delim_overhead = max_lines * file_data.len();
112 let mut output = Vec::with_capacity(total_input + delim_overhead);
113
114 let delims = &config.delimiters;
115
116 for line_idx in 0..max_lines {
117 for (file_idx, (offsets, data)) in all_offsets.iter().zip(file_data.iter()).enumerate() {
118 if file_idx > 0 && !delims.is_empty() {
119 output.push(delims[(file_idx - 1) % delims.len()]);
120 }
121 if line_idx < offsets.len() {
122 let (start, end) = offsets[line_idx];
123 output.extend_from_slice(&data[start..end]);
124 }
125 }
126 output.push(terminator);
127 }
128
129 output
130}
131
132pub fn paste_serial_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
135 let terminator = if config.zero_terminated { 0u8 } else { b'\n' };
136 let delims = &config.delimiters;
137
138 let total_input: usize = file_data.iter().map(|d| d.len()).sum();
140 let mut output = Vec::with_capacity(total_input + file_data.len());
141
142 for data in file_data {
143 let offsets = build_line_offsets(data, terminator);
144 for (i, &(start, end)) in offsets.iter().enumerate() {
145 if i > 0 && !delims.is_empty() {
146 output.push(delims[(i - 1) % delims.len()]);
147 }
148 output.extend_from_slice(&data[start..end]);
149 }
150 output.push(terminator);
151 }
152
153 output
154}
155
156pub fn paste(
158 file_data: &[&[u8]],
159 config: &PasteConfig,
160 out: &mut impl Write,
161) -> std::io::Result<()> {
162 let output = if config.serial {
163 paste_serial_to_vec(file_data, config)
164 } else {
165 paste_parallel_to_vec(file_data, config)
166 };
167 out.write_all(&output)
168}
169
170pub fn paste_to_vec(file_data: &[&[u8]], config: &PasteConfig) -> Vec<u8> {
173 if config.serial {
174 paste_serial_to_vec(file_data, config)
175 } else {
176 paste_parallel_to_vec(file_data, config)
177 }
178}