1use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
9use std::ffi::OsString;
10#[cfg(unix)]
11use std::fs::File;
12use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
13use std::num::TryFromIntError;
14#[cfg(unix)]
15use std::os::fd::{AsRawFd, FromRawFd};
16use thiserror::Error;
17use uucore::display::Quotable;
18use uucore::error::{FromIo, UError, UResult};
19use uucore::line_ending::LineEnding;
20use uucore::lines::lines;
21use uucore::{format_usage, help_about, help_usage, show};
22
23const BUF_SIZE: usize = 65536;
24
25const ABOUT: &str = help_about!("head.md");
26const USAGE: &str = help_usage!("head.md");
27
28mod options {
29 pub const BYTES_NAME: &str = "BYTES";
30 pub const LINES_NAME: &str = "LINES";
31 pub const QUIET_NAME: &str = "QUIET";
32 pub const VERBOSE_NAME: &str = "VERBOSE";
33 pub const ZERO_NAME: &str = "ZERO";
34 pub const FILES_NAME: &str = "FILE";
35 pub const PRESUME_INPUT_PIPE: &str = "-PRESUME-INPUT-PIPE";
36}
37
38mod parse;
39mod take;
40use take::take_all_but;
41use take::take_lines;
42
43#[derive(Error, Debug)]
44enum HeadError {
45 #[error("error reading {name}: {err}")]
47 Io { name: String, err: io::Error },
48
49 #[error("parse error: {0}")]
50 ParseError(String),
51
52 #[error("bad argument encoding")]
53 BadEncoding,
54
55 #[error("{0}: number of -bytes or -lines is too large")]
56 NumTooLarge(#[from] TryFromIntError),
57
58 #[error("clap error: {0}")]
59 Clap(#[from] clap::Error),
60
61 #[error("{0}")]
62 MatchOption(String),
63}
64
65impl UError for HeadError {
66 fn code(&self) -> i32 {
67 1
68 }
69}
70
71type HeadResult<T> = Result<T, HeadError>;
72
73pub fn uu_app() -> Command {
74 Command::new(uucore::util_name())
75 .version(crate_version!())
76 .about(ABOUT)
77 .override_usage(format_usage(USAGE))
78 .infer_long_args(true)
79 .arg(
80 Arg::new(options::BYTES_NAME)
81 .short('c')
82 .long("bytes")
83 .value_name("[-]NUM")
84 .help(
85 "\
86 print the first NUM bytes of each file;\n\
87 with the leading '-', print all but the last\n\
88 NUM bytes of each file\
89 ",
90 )
91 .overrides_with_all([options::BYTES_NAME, options::LINES_NAME])
92 .allow_hyphen_values(true),
93 )
94 .arg(
95 Arg::new(options::LINES_NAME)
96 .short('n')
97 .long("lines")
98 .value_name("[-]NUM")
99 .help(
100 "\
101 print the first NUM lines instead of the first 10;\n\
102 with the leading '-', print all but the last\n\
103 NUM lines of each file\
104 ",
105 )
106 .overrides_with_all([options::LINES_NAME, options::BYTES_NAME])
107 .allow_hyphen_values(true),
108 )
109 .arg(
110 Arg::new(options::QUIET_NAME)
111 .short('q')
112 .long("quiet")
113 .visible_alias("silent")
114 .help("never print headers giving file names")
115 .overrides_with_all([options::VERBOSE_NAME, options::QUIET_NAME])
116 .action(ArgAction::SetTrue),
117 )
118 .arg(
119 Arg::new(options::VERBOSE_NAME)
120 .short('v')
121 .long("verbose")
122 .help("always print headers giving file names")
123 .overrides_with_all([options::QUIET_NAME, options::VERBOSE_NAME])
124 .action(ArgAction::SetTrue),
125 )
126 .arg(
127 Arg::new(options::PRESUME_INPUT_PIPE)
128 .long("presume-input-pipe")
129 .alias("-presume-input-pipe")
130 .hide(true)
131 .action(ArgAction::SetTrue),
132 )
133 .arg(
134 Arg::new(options::ZERO_NAME)
135 .short('z')
136 .long("zero-terminated")
137 .help("line delimiter is NUL, not newline")
138 .overrides_with(options::ZERO_NAME)
139 .action(ArgAction::SetTrue),
140 )
141 .arg(
142 Arg::new(options::FILES_NAME)
143 .action(ArgAction::Append)
144 .value_hint(clap::ValueHint::FilePath),
145 )
146}
147
148#[derive(Debug, PartialEq)]
149enum Mode {
150 FirstLines(u64),
151 AllButLastLines(u64),
152 FirstBytes(u64),
153 AllButLastBytes(u64),
154}
155
156impl Default for Mode {
157 fn default() -> Self {
158 Self::FirstLines(10)
159 }
160}
161
162impl Mode {
163 fn from(matches: &ArgMatches) -> Result<Self, String> {
164 if let Some(v) = matches.get_one::<String>(options::BYTES_NAME) {
165 let (n, all_but_last) =
166 parse::parse_num(v).map_err(|err| format!("invalid number of bytes: {err}"))?;
167 if all_but_last {
168 Ok(Self::AllButLastBytes(n))
169 } else {
170 Ok(Self::FirstBytes(n))
171 }
172 } else if let Some(v) = matches.get_one::<String>(options::LINES_NAME) {
173 let (n, all_but_last) =
174 parse::parse_num(v).map_err(|err| format!("invalid number of lines: {err}"))?;
175 if all_but_last {
176 Ok(Self::AllButLastLines(n))
177 } else {
178 Ok(Self::FirstLines(n))
179 }
180 } else {
181 Ok(Self::default())
182 }
183 }
184}
185
186fn arg_iterate<'a>(
187 mut args: impl uucore::Args + 'a,
188) -> HeadResult<Box<dyn Iterator<Item = OsString> + 'a>> {
189 let first = args.next().unwrap();
191 if let Some(second) = args.next() {
192 if let Some(s) = second.to_str() {
193 match parse::parse_obsolete(s) {
194 Some(Ok(iter)) => Ok(Box::new(vec![first].into_iter().chain(iter).chain(args))),
195 Some(Err(e)) => match e {
196 parse::ParseError::Syntax => Err(HeadError::ParseError(format!(
197 "bad argument format: {}",
198 s.quote()
199 ))),
200 parse::ParseError::Overflow => Err(HeadError::ParseError(format!(
201 "invalid argument: {} Value too large for defined datatype",
202 s.quote()
203 ))),
204 },
205 None => Ok(Box::new(vec![first, second].into_iter().chain(args))),
206 }
207 } else {
208 Err(HeadError::BadEncoding)
209 }
210 } else {
211 Ok(Box::new(vec![first].into_iter()))
212 }
213}
214
215#[derive(Debug, PartialEq, Default)]
216struct HeadOptions {
217 pub quiet: bool,
218 pub verbose: bool,
219 pub line_ending: LineEnding,
220 pub presume_input_pipe: bool,
221 pub mode: Mode,
222 pub files: Vec<String>,
223}
224
225impl HeadOptions {
226 pub fn get_from(matches: &clap::ArgMatches) -> Result<Self, String> {
228 let mut options = Self::default();
229
230 options.quiet = matches.get_flag(options::QUIET_NAME);
231 options.verbose = matches.get_flag(options::VERBOSE_NAME);
232 options.line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_NAME));
233 options.presume_input_pipe = matches.get_flag(options::PRESUME_INPUT_PIPE);
234
235 options.mode = Mode::from(matches)?;
236
237 options.files = match matches.get_many::<String>(options::FILES_NAME) {
238 Some(v) => v.cloned().collect(),
239 None => vec!["-".to_owned()],
240 };
241
242 Ok(options)
243 }
244}
245
246#[inline]
247fn wrap_in_stdout_error(err: io::Error) -> io::Error {
248 io::Error::new(
249 err.kind(),
250 format!("error writing 'standard output': {}", err),
251 )
252}
253
254fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<u64> {
255 let mut reader = input.take(n);
257
258 let stdout = std::io::stdout();
260 let mut stdout = stdout.lock();
261
262 let bytes_written = io::copy(&mut reader, &mut stdout).map_err(wrap_in_stdout_error)?;
263
264 stdout.flush().map_err(wrap_in_stdout_error)?;
268
269 Ok(bytes_written)
270}
271
272fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result<u64> {
273 let mut reader = take_lines(input, n, separator);
275
276 let mut stdout = std::io::stdout();
278
279 let bytes_written = io::copy(&mut reader, &mut stdout).map_err(wrap_in_stdout_error)?;
280
281 stdout.flush().map_err(wrap_in_stdout_error)?;
285
286 Ok(bytes_written)
287}
288
289fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> {
290 match usize::try_from(n) {
291 Ok(value) => Some(value),
292 Err(e) => {
293 show!(HeadError::NumTooLarge(e));
294 None
295 }
296 }
297}
298
299fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<u64> {
300 let mut bytes_written = 0;
301 if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
302 let stdout = std::io::stdout();
303 let stdout = stdout.lock();
304 let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
308 for byte in take_all_but(input.bytes(), n) {
309 writer.write_all(&[byte?]).map_err(wrap_in_stdout_error)?;
310 bytes_written += 1;
311 }
312 writer.flush().map_err(wrap_in_stdout_error)?;
316 }
317 Ok(bytes_written)
318}
319
320fn read_but_last_n_lines(
321 input: impl std::io::BufRead,
322 n: u64,
323 separator: u8,
324) -> std::io::Result<u64> {
325 let mut bytes_written: u64 = 0;
326 if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
327 let stdout = std::io::stdout();
328 let mut stdout = stdout.lock();
329
330 for bytes in take_all_but(lines(input, separator), n) {
331 let bytes = bytes?;
332 bytes_written += u64::try_from(bytes.len()).unwrap();
333
334 stdout.write_all(&bytes).map_err(wrap_in_stdout_error)?;
335 }
336 stdout.flush().map_err(wrap_in_stdout_error)?;
340 }
341 Ok(bytes_written)
342}
343
344fn find_nth_line_from_end<R>(input: &mut R, n: u64, separator: u8) -> std::io::Result<u64>
378where
379 R: Read + Seek,
380{
381 let file_size = input.seek(SeekFrom::End(0))?;
382
383 let mut buffer = [0u8; BUF_SIZE];
384
385 let mut i = 0u64;
386 let mut lines = 0u64;
387
388 loop {
389 let bytes_remaining_to_search = file_size - i;
391 let bytes_to_read_this_loop = bytes_remaining_to_search.min(BUF_SIZE.try_into().unwrap());
392 let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
393 let buffer = &mut buffer[..bytes_to_read_this_loop.try_into().unwrap()];
394
395 input.seek(SeekFrom::Start(read_start_offset))?;
396 input.read_exact(buffer)?;
397 for byte in buffer.iter().rev() {
398 if byte == &separator {
399 lines += 1;
400 }
401 if lines == n + 1 {
403 input.rewind()?;
404 return Ok(file_size - i);
405 }
406 i += 1;
407 }
408 if file_size - i == 0 {
409 input.rewind()?;
410 return Ok(0);
411 }
412 }
413}
414
415fn is_seekable(input: &mut std::fs::File) -> bool {
416 let current_pos = input.stream_position();
417 current_pos.is_ok()
418 && input.seek(SeekFrom::End(0)).is_ok()
419 && input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok()
420}
421
422fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
423 let st = input.metadata()?;
424 let seekable = is_seekable(input);
425 let blksize_limit = uucore::fs::sane_blksize::sane_blksize_from_metadata(&st);
426 if !seekable || st.len() <= blksize_limit {
427 head_backwards_without_seek_file(input, options)
428 } else {
429 head_backwards_on_seekable_file(input, options)
430 }
431}
432
433fn head_backwards_without_seek_file(
434 input: &mut std::fs::File,
435 options: &HeadOptions,
436) -> std::io::Result<u64> {
437 let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
438 match options.mode {
439 Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n),
440 Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()),
441 _ => unreachable!(),
442 }
443}
444
445fn head_backwards_on_seekable_file(
446 input: &mut std::fs::File,
447 options: &HeadOptions,
448) -> std::io::Result<u64> {
449 match options.mode {
450 Mode::AllButLastBytes(n) => {
451 let size = input.metadata()?.len();
452 if n >= size {
453 Ok(0)
454 } else {
455 read_n_bytes(
456 &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
457 size - n,
458 )
459 }
460 }
461 Mode::AllButLastLines(n) => {
462 let found = find_nth_line_from_end(input, n, options.line_ending.into())?;
463 read_n_bytes(
464 &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
465 found,
466 )
467 }
468 _ => unreachable!(),
469 }
470}
471
472fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
473 match options.mode {
474 Mode::FirstBytes(n) => {
475 read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
476 }
477 Mode::FirstLines(n) => read_n_lines(
478 &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
479 n,
480 options.line_ending.into(),
481 ),
482 Mode::AllButLastBytes(_) | Mode::AllButLastLines(_) => head_backwards_file(input, options),
483 }
484}
485
486#[allow(clippy::cognitive_complexity)]
487fn uu_head(options: &HeadOptions) -> UResult<()> {
488 let mut first = true;
489 for file in &options.files {
490 let res = match (file.as_str(), options.presume_input_pipe) {
491 (_, true) | ("-", false) => {
492 if (options.files.len() > 1 && !options.quiet) || options.verbose {
493 if !first {
494 println!();
495 }
496 println!("==> standard input <==");
497 }
498 let stdin = std::io::stdin();
499
500 #[cfg(unix)]
501 {
502 let stdin_raw_fd = stdin.as_raw_fd();
503 let mut stdin_file = unsafe { File::from_raw_fd(stdin_raw_fd) };
504 let current_pos = stdin_file.stream_position();
505 if let Ok(current_pos) = current_pos {
506 let bytes_read = head_file(&mut stdin_file, options)?;
511 stdin_file.seek(SeekFrom::Start(current_pos + bytes_read))?;
512 } else {
513 let _bytes_read = head_file(&mut stdin_file, options)?;
514 }
515 }
516
517 #[cfg(not(unix))]
518 {
519 let mut stdin = stdin.lock();
520
521 match options.mode {
522 Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n),
523 Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n),
524 Mode::FirstLines(n) => {
525 read_n_lines(&mut stdin, n, options.line_ending.into())
526 }
527 Mode::AllButLastLines(n) => {
528 read_but_last_n_lines(&mut stdin, n, options.line_ending.into())
529 }
530 }?;
531 }
532
533 Ok(())
534 }
535 (name, false) => {
536 let mut file = match std::fs::File::open(name) {
537 Ok(f) => f,
538 Err(err) => {
539 show!(err.map_err_context(|| format!(
540 "cannot open {} for reading",
541 name.quote()
542 )));
543 continue;
544 }
545 };
546 if (options.files.len() > 1 && !options.quiet) || options.verbose {
547 if !first {
548 println!();
549 }
550 println!("==> {name} <==");
551 }
552 head_file(&mut file, options)?;
553 Ok(())
554 }
555 };
556 if let Err(e) = res {
557 let name = if file.as_str() == "-" {
558 "standard input"
559 } else {
560 file
561 };
562 return Err(HeadError::Io {
563 name: name.to_string(),
564 err: e,
565 }
566 .into());
567 }
568 first = false;
569 }
570 Ok(())
575}
576
577#[uucore::main]
578pub fn uumain(args: impl uucore::Args) -> UResult<()> {
579 let matches = uu_app().try_get_matches_from(arg_iterate(args)?)?;
580 let args = match HeadOptions::get_from(&matches) {
581 Ok(o) => o,
582 Err(s) => {
583 return Err(HeadError::MatchOption(s).into());
584 }
585 };
586 uu_head(&args)
587}
588
589#[cfg(test)]
590mod tests {
591 use std::ffi::OsString;
592 use std::io::Cursor;
593
594 use super::*;
595
596 fn options(args: &str) -> Result<HeadOptions, String> {
597 let combined = "head ".to_owned() + args;
598 let args = combined.split_whitespace().map(OsString::from);
599 let matches = uu_app()
600 .get_matches_from(arg_iterate(args).map_err(|_| String::from("Arg iterate failed"))?);
601 HeadOptions::get_from(&matches)
602 }
603
604 #[test]
605 fn test_args_modes() {
606 let args = options("-n -10M -vz").unwrap();
607 assert_eq!(args.line_ending, LineEnding::Nul);
608 assert!(args.verbose);
609 assert_eq!(args.mode, Mode::AllButLastLines(10 * 1024 * 1024));
610 }
611
612 #[test]
613 fn test_gnu_compatibility() {
614 let args = options("-n 1 -c 1 -n 5 -c kiB -vqvqv").unwrap(); assert!(args.mode == Mode::FirstBytes(1024));
616 assert!(args.verbose);
617 assert_eq!(options("-5").unwrap().mode, Mode::FirstLines(5));
618 assert_eq!(options("-2b").unwrap().mode, Mode::FirstBytes(1024));
619 assert_eq!(options("-5 -c 1").unwrap().mode, Mode::FirstBytes(1));
620 }
621
622 #[test]
623 #[allow(clippy::cognitive_complexity)]
624 fn all_args_test() {
625 assert!(options("--silent").unwrap().quiet);
626 assert!(options("--quiet").unwrap().quiet);
627 assert!(options("-q").unwrap().quiet);
628 assert!(options("--verbose").unwrap().verbose);
629 assert!(options("-v").unwrap().verbose);
630 assert_eq!(
631 options("--zero-terminated").unwrap().line_ending,
632 LineEnding::Nul
633 );
634 assert_eq!(options("-z").unwrap().line_ending, LineEnding::Nul);
635 assert_eq!(options("--lines 15").unwrap().mode, Mode::FirstLines(15));
636 assert_eq!(options("-n 15").unwrap().mode, Mode::FirstLines(15));
637 assert_eq!(options("--bytes 15").unwrap().mode, Mode::FirstBytes(15));
638 assert_eq!(options("-c 15").unwrap().mode, Mode::FirstBytes(15));
639 }
640
641 #[test]
642 fn test_options_errors() {
643 assert!(options("-n IsThisTheRealLife?").is_err());
644 assert!(options("-c IsThisJustFantasy").is_err());
645 }
646
647 #[test]
648 fn test_options_correct_defaults() {
649 let opts = HeadOptions::default();
650
651 assert!(!opts.verbose);
652 assert!(!opts.quiet);
653 assert_eq!(opts.line_ending, LineEnding::Newline);
654 assert_eq!(opts.mode, Mode::FirstLines(10));
655 assert!(opts.files.is_empty());
656 }
657
658 fn arg_outputs(src: &str) -> Result<String, ()> {
659 let split = src.split_whitespace().map(OsString::from);
660 match arg_iterate(split) {
661 Ok(args) => {
662 let vec = args
663 .map(|s| s.to_str().unwrap().to_owned())
664 .collect::<Vec<_>>();
665 Ok(vec.join(" "))
666 }
667 Err(_) => Err(()),
668 }
669 }
670
671 #[test]
672 fn test_arg_iterate() {
673 assert_eq!(
675 arg_outputs("head -n -5 -zv"),
676 Ok("head -n -5 -zv".to_owned())
677 );
678 assert_eq!(
680 arg_outputs("head -to_be_or_not_to_be,..."),
681 Ok("head -to_be_or_not_to_be,...".to_owned())
682 );
683 assert_eq!(
685 arg_outputs("head -123qvqvqzc"), Ok("head -q -z -c 123".to_owned())
687 );
688 assert!(arg_outputs("head -123FooBar").is_err());
690 assert!(arg_outputs("head -100000000000000000000000000000000000000000").is_err());
692 assert_eq!(arg_outputs("head"), Ok("head".to_owned()));
694 }
695
696 #[test]
697 #[cfg(target_os = "linux")]
698 fn test_arg_iterate_bad_encoding() {
699 use std::os::unix::ffi::OsStringExt;
700 let invalid = OsString::from_vec(vec![b'\x80', b'\x81']);
701 assert!(arg_iterate(vec![OsString::from("head"), invalid].into_iter()).is_err());
703 }
704
705 #[test]
706 fn read_early_exit() {
707 let mut empty = std::io::BufReader::new(std::io::Cursor::new(Vec::new()));
708 assert!(read_n_bytes(&mut empty, 0).is_ok());
709 assert!(read_n_lines(&mut empty, 0, b'\n').is_ok());
710 }
711
712 #[test]
713 fn test_find_nth_line_from_end() {
714 let minimum_buffer_size = BUF_SIZE * 4;
727 let mut input_buffer = vec![];
728 let mut loop_iteration: u64 = 0;
729 while input_buffer.len() < minimum_buffer_size {
730 for _n in 0..4 {
731 input_buffer.push(b'a');
732 }
733 loop_iteration += 1;
734 input_buffer.push(b'\n');
735 }
736
737 let lines_in_input_file = loop_iteration;
738 let input_length = lines_in_input_file * 5;
739 assert_eq!(input_length, input_buffer.len().try_into().unwrap());
740 let mut input = Cursor::new(input_buffer);
741 let step_size = 511;
746 for n in (0..lines_in_input_file).filter(|v| v % step_size == 0) {
747 assert_eq!(
749 find_nth_line_from_end(&mut input, n, b'\n').unwrap(),
750 input_length - 5 * n
751 );
752 }
753
754 assert_eq!(
757 find_nth_line_from_end(&mut input, lines_in_input_file, b'\n').unwrap(),
758 0
759 );
760 assert_eq!(
761 find_nth_line_from_end(&mut input, lines_in_input_file + 1, b'\n').unwrap(),
762 0
763 );
764 assert_eq!(
765 find_nth_line_from_end(&mut input, lines_in_input_file + 1000, b'\n').unwrap(),
766 0
767 );
768 }
769}