Skip to main content

coreutils_rs/head/
core.rs

1use std::io::{self, Read, Write};
2use std::path::Path;
3
4use memchr::{memchr_iter, memrchr_iter};
5
6use crate::common::io::{FileData, read_file, read_stdin};
7
8/// Mode for head operation
9#[derive(Clone, Debug)]
10pub enum HeadMode {
11    /// First N lines (default: 10)
12    Lines(u64),
13    /// All but last N lines
14    LinesFromEnd(u64),
15    /// First N bytes
16    Bytes(u64),
17    /// All but last N bytes
18    BytesFromEnd(u64),
19}
20
21/// Configuration for head
22#[derive(Clone, Debug)]
23pub struct HeadConfig {
24    pub mode: HeadMode,
25    pub zero_terminated: bool,
26}
27
28impl Default for HeadConfig {
29    fn default() -> Self {
30        Self {
31            mode: HeadMode::Lines(10),
32            zero_terminated: false,
33        }
34    }
35}
36
37/// Parse a numeric argument with optional suffix (K, M, G, etc.)
38/// Supports: b(512), kB(1000), K(1024), MB(1e6), M(1048576), GB(1e9), G(1<<30),
39/// TB, T, PB, P, EB, E, ZB, Z, YB, Y
40pub fn parse_size(s: &str) -> Result<u64, String> {
41    let s = s.trim();
42    if s.is_empty() {
43        return Err("empty size".to_string());
44    }
45
46    // Find where the numeric part ends
47    let mut num_end = 0;
48    for (i, c) in s.char_indices() {
49        if c.is_ascii_digit() || (i == 0 && (c == '+' || c == '-')) {
50            num_end = i + c.len_utf8();
51        } else {
52            break;
53        }
54    }
55
56    if num_end == 0 {
57        return Err(format!("invalid number: '{}'", s));
58    }
59
60    let num_str = &s[..num_end];
61    let suffix = &s[num_end..];
62
63    let num: u64 = match num_str.parse() {
64        Ok(n) => n,
65        Err(_) => {
66            // If the string is valid digits but overflows u64, clamp to u64::MAX
67            // like GNU coreutils does for huge counts
68            let digits = num_str
69                .strip_prefix('+')
70                .or_else(|| num_str.strip_prefix('-'))
71                .unwrap_or(num_str);
72            if !digits.is_empty() && digits.chars().all(|c| c.is_ascii_digit()) {
73                u64::MAX
74            } else {
75                return Err(format!("invalid number: '{}'", num_str));
76            }
77        }
78    };
79
80    let multiplier: u64 = match suffix {
81        "" => 1,
82        "b" => 512,
83        "kB" => 1000,
84        "k" | "K" | "KiB" => 1024,
85        "MB" => 1_000_000,
86        "M" | "MiB" => 1_048_576,
87        "GB" => 1_000_000_000,
88        "G" | "GiB" => 1_073_741_824,
89        "TB" => 1_000_000_000_000,
90        "T" | "TiB" => 1_099_511_627_776,
91        "PB" => 1_000_000_000_000_000,
92        "P" | "PiB" => 1_125_899_906_842_624,
93        "EB" => 1_000_000_000_000_000_000,
94        "E" | "EiB" => 1_152_921_504_606_846_976,
95        // ZB/Z/YB/Y would overflow u64, treat as max
96        "ZB" | "Z" | "ZiB" | "YB" | "Y" | "YiB" => {
97            if num > 0 {
98                return Ok(u64::MAX);
99            }
100            return Ok(0);
101        }
102        _ => return Err(format!("invalid suffix in '{}'", s)),
103    };
104
105    num.checked_mul(multiplier)
106        .ok_or_else(|| format!("number too large: '{}'", s))
107}
108
109/// Output first N lines from data
110pub fn head_lines(data: &[u8], n: u64, delimiter: u8, out: &mut impl Write) -> io::Result<()> {
111    if n == 0 || data.is_empty() {
112        return Ok(());
113    }
114
115    let mut count = 0u64;
116    for pos in memchr_iter(delimiter, data) {
117        count += 1;
118        if count == n {
119            return out.write_all(&data[..=pos]);
120        }
121    }
122
123    // Fewer than N lines — output everything
124    out.write_all(data)
125}
126
127/// Output all but last N lines from data.
128/// Uses reverse scanning (memrchr_iter) for single-pass O(n) instead of 2-pass.
129pub fn head_lines_from_end(
130    data: &[u8],
131    n: u64,
132    delimiter: u8,
133    out: &mut impl Write,
134) -> io::Result<()> {
135    if n == 0 {
136        return out.write_all(data);
137    }
138    if data.is_empty() {
139        return Ok(());
140    }
141
142    // Scan backward: skip N delimiters (= N lines), then the next delimiter
143    // marks the end of the last line to keep.
144    // If the data does not end with a delimiter, the unterminated last "line"
145    // counts as one line to skip.
146    let mut count = if !data.is_empty() && *data.last().unwrap() != delimiter {
147        1u64
148    } else {
149        0u64
150    };
151    for pos in memrchr_iter(delimiter, data) {
152        count += 1;
153        if count > n {
154            return out.write_all(&data[..=pos]);
155        }
156    }
157
158    // Fewer than N+1 lines → N >= total lines → output nothing
159    Ok(())
160}
161
162/// Output first N bytes from data
163pub fn head_bytes(data: &[u8], n: u64, out: &mut impl Write) -> io::Result<()> {
164    let n = n.min(data.len() as u64) as usize;
165    if n > 0 {
166        out.write_all(&data[..n])?;
167    }
168    Ok(())
169}
170
171/// Output all but last N bytes from data
172pub fn head_bytes_from_end(data: &[u8], n: u64, out: &mut impl Write) -> io::Result<()> {
173    if n >= data.len() as u64 {
174        return Ok(());
175    }
176    let end = data.len() - n as usize;
177    if end > 0 {
178        out.write_all(&data[..end])?;
179    }
180    Ok(())
181}
182
183/// Raw write(2) to stdout, bypassing all Rust I/O layers.
184/// Avoids stdout.lock(), BufWriter allocation, and Write trait overhead.
185#[cfg(target_os = "linux")]
186fn write_all_raw(mut data: &[u8]) -> io::Result<()> {
187    while !data.is_empty() {
188        let ret = unsafe { libc::write(1, data.as_ptr() as *const libc::c_void, data.len()) };
189        if ret > 0 {
190            data = &data[ret as usize..];
191        } else if ret == 0 {
192            return Err(io::Error::new(io::ErrorKind::WriteZero, "write returned 0"));
193        } else {
194            let err = io::Error::last_os_error();
195            if err.kind() == io::ErrorKind::Interrupted {
196                continue;
197            }
198            return Err(err);
199        }
200    }
201    Ok(())
202}
203
204/// Ultra-fast direct path: single file, positive line count, writes directly
205/// to stdout fd without BufWriter overhead. Uses raw write(2) on Linux;
206/// on other platforms uses a small stack-buffered stdout.
207/// Returns Ok(true) on success, Ok(false) on file error (already printed).
208pub fn head_file_direct(filename: &str, n: u64, delimiter: u8) -> io::Result<bool> {
209    if n == 0 {
210        return Ok(true);
211    }
212
213    let path = Path::new(filename);
214
215    #[cfg(target_os = "linux")]
216    {
217        use std::os::unix::fs::OpenOptionsExt;
218        let file = std::fs::OpenOptions::new()
219            .read(true)
220            .custom_flags(libc::O_NOATIME)
221            .open(path)
222            .or_else(|_| std::fs::File::open(path));
223        let mut file = match file {
224            Ok(f) => f,
225            Err(e) => {
226                eprintln!(
227                    "head: cannot open '{}' for reading: {}",
228                    filename,
229                    crate::common::io_error_msg(&e)
230                );
231                return Ok(false);
232            }
233        };
234
235        // Hint sequential readahead for better throughput on large-N line counts.
236        {
237            use std::os::unix::io::AsRawFd;
238            unsafe {
239                libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
240            }
241        }
242
243        let mut buf = [0u8; 65536];
244        let mut count = 0u64;
245
246        loop {
247            let bytes_read = match file.read(&mut buf) {
248                Ok(0) => break,
249                Ok(n) => n,
250                Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
251                Err(e) => return Err(e),
252            };
253
254            let chunk = &buf[..bytes_read];
255
256            for pos in memchr_iter(delimiter, chunk) {
257                count += 1;
258                if count == n {
259                    write_all_raw(&chunk[..=pos])?;
260                    return Ok(true);
261                }
262            }
263
264            write_all_raw(chunk)?;
265        }
266
267        return Ok(true);
268    }
269
270    #[cfg(not(target_os = "linux"))]
271    {
272        let stdout = io::stdout();
273        let mut out = io::BufWriter::with_capacity(8192, stdout.lock());
274        match head_lines_streaming_file(path, n, delimiter, &mut out) {
275            Ok(true) => {
276                out.flush()?;
277                Ok(true)
278            }
279            Ok(false) => Ok(false),
280            Err(e) => {
281                eprintln!(
282                    "head: cannot open '{}' for reading: {}",
283                    filename,
284                    crate::common::io_error_msg(&e)
285                );
286                Ok(false)
287            }
288        }
289    }
290}
291
292/// Use sendfile for zero-copy byte output on Linux.
293/// Falls back to read+write if sendfile fails (e.g., stdout is a terminal).
294#[cfg(target_os = "linux")]
295pub fn sendfile_bytes(path: &Path, n: u64, out_fd: i32) -> io::Result<bool> {
296    use std::os::unix::fs::OpenOptionsExt;
297
298    let file = std::fs::OpenOptions::new()
299        .read(true)
300        .custom_flags(libc::O_NOATIME)
301        .open(path)
302        .or_else(|_| std::fs::File::open(path))?;
303
304    // Hint sequential readahead for sendfile throughput.
305    {
306        use std::os::unix::io::AsRawFd;
307        unsafe {
308            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
309        }
310    }
311
312    let metadata = file.metadata()?;
313    let file_size = metadata.len();
314    let to_send = n.min(file_size) as usize;
315
316    if to_send == 0 {
317        return Ok(true);
318    }
319
320    use std::os::unix::io::AsRawFd;
321    let in_fd = file.as_raw_fd();
322    let mut offset: libc::off_t = 0;
323    let mut remaining = to_send;
324    let total = to_send;
325
326    while remaining > 0 {
327        let chunk = remaining.min(0x7ffff000); // sendfile max per call
328        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
329        if ret > 0 {
330            remaining -= ret as usize;
331        } else if ret == 0 {
332            break;
333        } else {
334            let err = io::Error::last_os_error();
335            if err.kind() == io::ErrorKind::Interrupted {
336                continue;
337            }
338            // sendfile fails with EINVAL for terminal fds; fall back to read+write
339            if err.raw_os_error() == Some(libc::EINVAL) && remaining == total {
340                let mut file = file;
341                let mut buf = [0u8; 65536];
342                let mut left = to_send;
343                while left > 0 {
344                    let to_read = left.min(buf.len());
345                    let nr = match file.read(&mut buf[..to_read]) {
346                        Ok(0) => break,
347                        Ok(nr) => nr,
348                        Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
349                        Err(e) => return Err(e),
350                    };
351                    write_all_raw(&buf[..nr])?;
352                    left -= nr;
353                }
354                return Ok(true);
355            }
356            return Err(err);
357        }
358    }
359
360    Ok(true)
361}
362
363/// Streaming head for positive line count on a regular file.
364/// Reads small chunks from the start, never mmaps the whole file.
365/// This is the critical fast path: `head -n 10` on a 100MB file
366/// reads only a few KB instead of mapping all 100MB.
367fn head_lines_streaming_file(
368    path: &Path,
369    n: u64,
370    delimiter: u8,
371    out: &mut impl Write,
372) -> io::Result<bool> {
373    if n == 0 {
374        return Ok(true);
375    }
376
377    #[cfg(target_os = "linux")]
378    let file = {
379        use std::os::unix::fs::OpenOptionsExt;
380        std::fs::OpenOptions::new()
381            .read(true)
382            .custom_flags(libc::O_NOATIME)
383            .open(path)
384            .or_else(|_| std::fs::File::open(path))?
385    };
386    #[cfg(not(target_os = "linux"))]
387    let file = std::fs::File::open(path)?;
388
389    let mut file = file;
390
391    // Hint sequential readahead for better throughput on large-N line counts.
392    #[cfg(target_os = "linux")]
393    {
394        use std::os::unix::io::AsRawFd;
395        unsafe {
396            libc::posix_fadvise(file.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL);
397        }
398    }
399
400    let mut buf = [0u8; 65536];
401    let mut count = 0u64;
402
403    loop {
404        let bytes_read = match file.read(&mut buf) {
405            Ok(0) => break,
406            Ok(n) => n,
407            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
408            Err(e) => return Err(e),
409        };
410
411        let chunk = &buf[..bytes_read];
412
413        for pos in memchr_iter(delimiter, chunk) {
414            count += 1;
415            if count == n {
416                out.write_all(&chunk[..=pos])?;
417                return Ok(true);
418            }
419        }
420
421        out.write_all(chunk)?;
422    }
423
424    Ok(true)
425}
426
427/// Process a single file/stdin for head
428pub fn head_file(
429    filename: &str,
430    config: &HeadConfig,
431    out: &mut impl Write,
432    tool_name: &str,
433) -> io::Result<bool> {
434    let delimiter = if config.zero_terminated { b'\0' } else { b'\n' };
435
436    if filename != "-" {
437        let path = Path::new(filename);
438
439        // Fast paths that avoid reading/mmapping the whole file
440        match &config.mode {
441            HeadMode::Lines(n) => {
442                // Streaming: read small chunks, stop after N lines
443                match head_lines_streaming_file(path, *n, delimiter, out) {
444                    Ok(true) => return Ok(true),
445                    Err(e) => {
446                        eprintln!(
447                            "{}: cannot open '{}' for reading: {}",
448                            tool_name,
449                            filename,
450                            crate::common::io_error_msg(&e)
451                        );
452                        return Ok(false);
453                    }
454                    _ => {}
455                }
456            }
457            HeadMode::Bytes(n) => {
458                // sendfile: zero-copy, reads only N bytes
459                #[cfg(target_os = "linux")]
460                {
461                    use std::os::unix::io::AsRawFd;
462                    let stdout = io::stdout();
463                    let out_fd = stdout.as_raw_fd();
464                    if let Ok(true) = sendfile_bytes(path, *n, out_fd) {
465                        return Ok(true);
466                    }
467                }
468                // Non-Linux: still avoid full mmap
469                #[cfg(not(target_os = "linux"))]
470                {
471                    if let Ok(true) = head_bytes_streaming_file(path, *n, out) {
472                        return Ok(true);
473                    }
474                }
475            }
476            _ => {
477                // LinesFromEnd and BytesFromEnd need the whole file — use mmap
478            }
479        }
480    }
481
482    // Fast path for stdin with positive line/byte counts — stream without buffering everything.
483    if filename == "-" {
484        match &config.mode {
485            HeadMode::Lines(n) => {
486                return match head_stdin_lines_streaming(*n, delimiter, out) {
487                    Ok(()) => Ok(true),
488                    Err(e) if e.kind() == io::ErrorKind::BrokenPipe => Ok(true),
489                    Err(e) => {
490                        eprintln!(
491                            "{}: standard input: {}",
492                            tool_name,
493                            crate::common::io_error_msg(&e)
494                        );
495                        Ok(false)
496                    }
497                };
498            }
499            HeadMode::Bytes(n) => {
500                return match head_stdin_bytes_streaming(*n, out) {
501                    Ok(()) => Ok(true),
502                    Err(e) if e.kind() == io::ErrorKind::BrokenPipe => Ok(true),
503                    Err(e) => {
504                        eprintln!(
505                            "{}: standard input: {}",
506                            tool_name,
507                            crate::common::io_error_msg(&e)
508                        );
509                        Ok(false)
510                    }
511                };
512            }
513            _ => {} // LinesFromEnd/BytesFromEnd need full buffer
514        }
515    }
516
517    // Slow path: read entire file (needed for -n -N, -c -N, or stdin from-end modes)
518    let data: FileData = if filename == "-" {
519        match read_stdin() {
520            Ok(d) => FileData::Owned(d),
521            Err(e) => {
522                eprintln!(
523                    "{}: standard input: {}",
524                    tool_name,
525                    crate::common::io_error_msg(&e)
526                );
527                return Ok(false);
528            }
529        }
530    } else {
531        match read_file(Path::new(filename)) {
532            Ok(d) => d,
533            Err(e) => {
534                eprintln!(
535                    "{}: cannot open '{}' for reading: {}",
536                    tool_name,
537                    filename,
538                    crate::common::io_error_msg(&e)
539                );
540                return Ok(false);
541            }
542        }
543    };
544
545    match &config.mode {
546        HeadMode::Lines(n) => head_lines(&data, *n, delimiter, out)?,
547        HeadMode::LinesFromEnd(n) => head_lines_from_end(&data, *n, delimiter, out)?,
548        HeadMode::Bytes(n) => head_bytes(&data, *n, out)?,
549        HeadMode::BytesFromEnd(n) => head_bytes_from_end(&data, *n, out)?,
550    }
551
552    Ok(true)
553}
554
555/// Streaming head for positive byte count on non-Linux.
556#[cfg(not(target_os = "linux"))]
557fn head_bytes_streaming_file(path: &Path, n: u64, out: &mut impl Write) -> io::Result<bool> {
558    let mut file = std::fs::File::open(path)?;
559    let mut remaining = n as usize;
560    let mut buf = [0u8; 65536];
561
562    while remaining > 0 {
563        let to_read = remaining.min(buf.len());
564        let bytes_read = match file.read(&mut buf[..to_read]) {
565            Ok(0) => break,
566            Ok(n) => n,
567            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
568            Err(e) => return Err(e),
569        };
570        out.write_all(&buf[..bytes_read])?;
571        remaining -= bytes_read;
572    }
573
574    Ok(true)
575}
576
577/// Process head for stdin streaming (line mode, positive count)
578/// Reads chunks and counts lines, stopping early once count reached.
579pub fn head_stdin_lines_streaming(n: u64, delimiter: u8, out: &mut impl Write) -> io::Result<()> {
580    if n == 0 {
581        return Ok(());
582    }
583
584    let stdin = io::stdin();
585    let mut reader = stdin.lock();
586    let mut buf = [0u8; 262144];
587    let mut count = 0u64;
588
589    loop {
590        let bytes_read = match reader.read(&mut buf) {
591            Ok(0) => break,
592            Ok(n) => n,
593            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
594            Err(e) => return Err(e),
595        };
596
597        let chunk = &buf[..bytes_read];
598
599        // Count delimiters in this chunk
600        for pos in memchr_iter(delimiter, chunk) {
601            count += 1;
602            if count == n {
603                out.write_all(&chunk[..=pos])?;
604                return Ok(());
605            }
606        }
607
608        // Haven't reached N lines yet, output entire chunk
609        out.write_all(chunk)?;
610    }
611
612    Ok(())
613}
614
615/// Process head for stdin streaming (byte mode, positive count).
616/// Reads chunks and outputs up to N bytes, stopping early.
617fn head_stdin_bytes_streaming(n: u64, out: &mut impl Write) -> io::Result<()> {
618    if n == 0 {
619        return Ok(());
620    }
621
622    let stdin = io::stdin();
623    let mut reader = stdin.lock();
624    let mut buf = [0u8; 262144];
625    let mut remaining = n;
626
627    loop {
628        let to_read = (remaining as usize).min(buf.len());
629        let bytes_read = match reader.read(&mut buf[..to_read]) {
630            Ok(0) => break,
631            Ok(n) => n,
632            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
633            Err(e) => return Err(e),
634        };
635        out.write_all(&buf[..bytes_read])?;
636        remaining -= bytes_read as u64;
637        if remaining == 0 {
638            break;
639        }
640    }
641
642    Ok(())
643}