1use clap::{Arg, ArgAction, Command};
7use std::cell::{OnceCell, RefCell};
8use std::ffi::OsString;
9use std::fs::File;
10use std::io::{BufRead, BufReader, Read, Stdin, Write, stdin, stdout};
11use std::iter::Cycle;
12use std::path::Path;
13use std::rc::Rc;
14use std::slice::Iter;
15use uucore::error::{UResult, USimpleError};
16use uucore::format_usage;
17use uucore::i18n::charmap::mb_char_len;
18use uucore::line_ending::LineEnding;
19use uucore::translate;
20
21mod options {
22 pub const DELIMITER: &str = "delimiters";
23 pub const SERIAL: &str = "serial";
24 pub const FILE: &str = "file";
25 pub const ZERO_TERMINATED: &str = "zero-terminated";
26}
27
28#[uucore::main]
29pub fn uumain(args: impl uucore::Args) -> UResult<()> {
30 let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;
31
32 let serial = matches.get_flag(options::SERIAL);
33 let delimiters = matches.get_one::<OsString>(options::DELIMITER).unwrap();
34 let files = matches
35 .get_many::<OsString>(options::FILE)
36 .unwrap()
37 .cloned()
38 .collect();
39 let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
40
41 paste(files, serial, delimiters, line_ending)
42}
43
44pub fn uu_app() -> Command {
45 Command::new(uucore::util_name())
46 .version(uucore::crate_version!())
47 .help_template(uucore::localized_help_template(uucore::util_name()))
48 .about(translate!("paste-about"))
49 .override_usage(format_usage(&translate!("paste-usage")))
50 .infer_long_args(true)
51 .arg(
52 Arg::new(options::SERIAL)
53 .long(options::SERIAL)
54 .short('s')
55 .help(translate!("paste-help-serial"))
56 .action(ArgAction::SetTrue),
57 )
58 .arg(
59 Arg::new(options::DELIMITER)
60 .long(options::DELIMITER)
61 .short('d')
62 .help(translate!("paste-help-delimiter"))
63 .value_name("LIST")
64 .default_value("\t")
65 .hide_default_value(true)
66 .value_parser(clap::value_parser!(OsString)),
67 )
68 .arg(
69 Arg::new(options::FILE)
70 .value_name("FILE")
71 .action(ArgAction::Append)
72 .default_value("-")
73 .value_hint(clap::ValueHint::FilePath)
74 .value_parser(clap::value_parser!(OsString)),
75 )
76 .arg(
77 Arg::new(options::ZERO_TERMINATED)
78 .long(options::ZERO_TERMINATED)
79 .short('z')
80 .help(translate!("paste-help-zero-terminated"))
81 .action(ArgAction::SetTrue),
82 )
83}
84
85#[allow(clippy::cognitive_complexity)]
86fn paste(
87 filenames: Vec<OsString>,
88 serial: bool,
89 delimiters: &OsString,
90 line_ending: LineEnding,
91) -> UResult<()> {
92 let unescaped_and_encoded_delimiters = parse_delimiters(delimiters)?;
93
94 let stdin_once_cell = OnceCell::<Rc<RefCell<Stdin>>>::new();
95
96 let mut input_source_vec = Vec::with_capacity(filenames.len());
97
98 for filename in filenames {
99 let input_source = if filename == "-" {
100 InputSource::StandardInput(
101 stdin_once_cell
102 .get_or_init(|| Rc::new(RefCell::new(stdin())))
103 .clone(),
104 )
105 } else {
106 let path = Path::new(&filename);
107 let file = File::open(path)?;
108 InputSource::File(BufReader::new(file))
109 };
110
111 input_source_vec.push(input_source);
112 }
113
114 let line_ending_byte = u8::from(line_ending);
115 let input_source_vec_len = input_source_vec.len();
116 let mut stdout = stdout().lock();
117
118 if !serial && input_source_vec_len == 1 {
119 return write_single_input_source(
123 &mut stdout,
124 input_source_vec
125 .pop()
126 .expect("input_source_vec_len was checked to be exactly one"),
127 line_ending_byte,
128 );
129 }
130
131 let line_ending_byte_array_ref = &[line_ending_byte];
132
133 let mut delimiter_state = DelimiterState::new(&unescaped_and_encoded_delimiters);
134
135 let mut output = Vec::new();
136
137 if serial {
138 for input_source in &mut input_source_vec {
139 output.clear();
140
141 loop {
142 if input_source.read_until(line_ending_byte, &mut output)? == 0 {
143 break;
144 }
145 remove_trailing_line_ending_byte(line_ending_byte, &mut output);
146
147 delimiter_state.write_delimiter(&mut output);
148 }
149
150 delimiter_state.remove_trailing_delimiter(&mut output);
151
152 stdout.write_all(&output)?;
153 stdout.write_all(line_ending_byte_array_ref)?;
154 }
155 } else {
156 let mut eof = vec![false; input_source_vec_len];
157
158 loop {
159 output.clear();
160
161 let mut eof_count = 0;
162
163 for (i, input_source) in input_source_vec.iter_mut().enumerate() {
164 if eof[i] {
165 eof_count += 1;
166 } else {
167 match input_source.read_until(line_ending_byte, &mut output)? {
168 0 => {
169 eof[i] = true;
170 eof_count += 1;
171 }
172 _ => {
173 remove_trailing_line_ending_byte(line_ending_byte, &mut output);
174 }
175 }
176 }
177
178 delimiter_state.write_delimiter(&mut output);
179 }
180
181 if eof_count == input_source_vec_len {
182 break;
183 }
184
185 delimiter_state.remove_trailing_delimiter(&mut output);
186
187 stdout.write_all(&output)?;
188 stdout.write_all(line_ending_byte_array_ref)?;
189
190 delimiter_state.reset_to_first_delimiter();
196 }
197 }
198
199 Ok(())
200}
201
202fn write_single_input_source(
203 writer: &mut impl Write,
204 mut input_source: InputSource,
205 line_ending_byte: u8,
206) -> UResult<()> {
207 let mut buffer = [0_u8; 8 * 1024];
208 let mut has_data = false;
209 let mut last_byte = line_ending_byte;
210
211 loop {
212 let bytes_read = input_source.read(&mut buffer)?;
213
214 if bytes_read == 0 {
215 break;
216 }
217
218 has_data = true;
219 last_byte = buffer[bytes_read - 1];
220
221 writer.write_all(&buffer[..bytes_read])?;
222 }
223
224 if has_data && last_byte != line_ending_byte {
225 writer.write_all(&[line_ending_byte])?;
226 }
227
228 Ok(())
229}
230
231fn parse_delimiters(delimiters: &OsString) -> UResult<Box<[Box<[u8]>]>> {
232 let bytes = uucore::os_str_as_bytes(delimiters)?;
233 let mut vec = Vec::<Box<[u8]>>::with_capacity(bytes.len());
234 let mut i = 0;
235
236 while i < bytes.len() {
237 if bytes[i] == b'\\' {
238 i += 1;
239 if i >= bytes.len() {
240 return Err(USimpleError::new(
241 1,
242 translate!("paste-error-delimiter-unescaped-backslash", "delimiters" => delimiters.to_string_lossy()),
243 ));
244 }
245 match bytes[i] {
246 b'0' => vec.push(Box::new([])),
247 b'\\' => vec.push(Box::new([b'\\'])),
248 b'n' => vec.push(Box::new([b'\n'])),
249 b't' => vec.push(Box::new([b'\t'])),
250 b'b' => vec.push(Box::new([b'\x08'])),
251 b'f' => vec.push(Box::new([b'\x0C'])),
252 b'r' => vec.push(Box::new([b'\r'])),
253 b'v' => vec.push(Box::new([b'\x0B'])),
254 _ => {
255 let remaining = &bytes[i..];
257 let len = mb_char_len(remaining).min(remaining.len());
258 vec.push(Box::from(&bytes[i..i + len]));
259 i += len;
260 continue;
261 }
262 }
263 i += 1;
264 } else {
265 let remaining = &bytes[i..];
266 let len = mb_char_len(remaining).min(remaining.len());
267 vec.push(Box::from(&bytes[i..i + len]));
268 i += len;
269 }
270 }
271
272 Ok(vec.into_boxed_slice())
273}
274
275fn remove_trailing_line_ending_byte(line_ending_byte: u8, output: &mut Vec<u8>) {
276 if let Some(&byte) = output.last() {
277 if byte == line_ending_byte {
278 assert_eq!(output.pop(), Some(line_ending_byte));
279 }
280 }
281}
282
283enum DelimiterState<'a> {
284 NoDelimiters,
285 OneDelimiter(&'a [u8]),
286 MultipleDelimiters {
287 current_delimiter: &'a [u8],
288 delimiters: &'a [Box<[u8]>],
289 delimiters_iterator: Cycle<Iter<'a, Box<[u8]>>>,
290 },
291}
292
293impl<'a> DelimiterState<'a> {
294 fn new(unescaped_and_encoded_delimiters: &'a [Box<[u8]>]) -> Self {
295 match unescaped_and_encoded_delimiters {
296 [] => DelimiterState::NoDelimiters,
297 [only_delimiter] => {
298 if only_delimiter.is_empty() {
300 DelimiterState::NoDelimiters
301 } else {
302 DelimiterState::OneDelimiter(only_delimiter)
303 }
304 }
305 [first_delimiter, ..] => DelimiterState::MultipleDelimiters {
306 current_delimiter: first_delimiter,
307 delimiters: unescaped_and_encoded_delimiters,
308 delimiters_iterator: unescaped_and_encoded_delimiters.iter().cycle(),
309 },
310 }
311 }
312
313 fn reset_to_first_delimiter(&mut self) {
317 if let DelimiterState::MultipleDelimiters {
318 delimiters_iterator,
319 delimiters,
320 ..
321 } = self
322 {
323 *delimiters_iterator = delimiters.iter().cycle();
324 }
325 }
326
327 fn remove_trailing_delimiter(&mut self, output: &mut Vec<u8>) {
330 let delimiter_length = match self {
331 DelimiterState::OneDelimiter(only_delimiter) => only_delimiter.len(),
332 DelimiterState::MultipleDelimiters {
333 current_delimiter, ..
334 } => current_delimiter.len(),
335 DelimiterState::NoDelimiters => {
336 return;
337 }
338 };
339
340 if delimiter_length > 0 {
342 let output_len = output.len();
343
344 if let Some(output_without_delimiter_length) = output_len.checked_sub(delimiter_length)
345 {
346 output.truncate(output_without_delimiter_length);
347 } else {
348 assert_eq!(output_len, 0);
351 }
352 }
353 }
354
355 fn write_delimiter(&mut self, output: &mut Vec<u8>) {
358 match self {
359 DelimiterState::OneDelimiter(only_delimiter) => {
360 output.extend_from_slice(only_delimiter);
361 }
362 DelimiterState::MultipleDelimiters {
363 current_delimiter,
364 delimiters_iterator,
365 ..
366 } => {
367 let bo = delimiters_iterator.next().unwrap();
369
370 output.extend_from_slice(bo);
371
372 *current_delimiter = bo;
373 }
374 DelimiterState::NoDelimiters => {}
375 }
376 }
377}
378
379enum InputSource {
380 File(BufReader<File>),
381 StandardInput(Rc<RefCell<Stdin>>),
382}
383
384impl InputSource {
385 fn read(&mut self, buf: &mut [u8]) -> UResult<usize> {
386 let us = match self {
387 Self::File(bu) => bu.read(buf)?,
388 Self::StandardInput(rc) => rc
389 .try_borrow()
390 .map_err(|bo| {
391 USimpleError::new(1, translate!("paste-error-stdin-borrow", "error" => bo))
392 })?
393 .lock()
394 .read(buf)?,
395 };
396
397 Ok(us)
398 }
399
400 fn read_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> UResult<usize> {
401 let us = match self {
402 Self::File(bu) => bu.read_until(byte, buf)?,
403 Self::StandardInput(rc) => rc
404 .try_borrow()
405 .map_err(|bo| {
406 USimpleError::new(1, translate!("paste-error-stdin-borrow", "error" => bo))
407 })?
408 .lock()
409 .read_until(byte, buf)?,
410 };
411
412 Ok(us)
413 }
414}