Skip to main content

coreutils_rs/tail/
core.rs

1use std::io::{self, Read, Seek, Write};
2use std::path::Path;
3
4use memchr::{memchr_iter, memrchr_iter};
5
6use crate::common::io::{FileData, read_file, read_stdin};
7
8/// Open a file with O_NOATIME on Linux, falling back if not permitted.
9#[cfg(target_os = "linux")]
10fn open_noatime(path: &Path) -> io::Result<std::fs::File> {
11    use std::os::unix::fs::OpenOptionsExt;
12    std::fs::OpenOptions::new()
13        .read(true)
14        .custom_flags(libc::O_NOATIME)
15        .open(path)
16        .or_else(|_| std::fs::File::open(path))
17}
18
19/// Scan backward from EOF to find the byte offset where the last N delimited
20/// lines begin. Returns 0 when the file has fewer than N lines (output all).
21/// Platform-agnostic — tested on all CI targets.
22fn find_tail_start_byte(
23    reader: &mut (impl Read + Seek),
24    file_size: u64,
25    n: u64,
26    delimiter: u8,
27) -> io::Result<u64> {
28    const CHUNK: u64 = 262144;
29    let mut pos = file_size;
30    let mut count = 0u64;
31    let mut buf = vec![0u8; CHUNK as usize];
32
33    while pos > 0 {
34        let read_start = if pos > CHUNK { pos - CHUNK } else { 0 };
35        let read_len = (pos - read_start) as usize;
36
37        reader.seek(io::SeekFrom::Start(read_start))?;
38        reader.read_exact(&mut buf[..read_len])?;
39
40        // Skip trailing delimiter (don't count the file's final newline)
41        let search_end = if pos == file_size && read_len > 0 && buf[read_len - 1] == delimiter {
42            read_len - 1
43        } else {
44            read_len
45        };
46
47        for rpos in memrchr_iter(delimiter, &buf[..search_end]) {
48            count += 1;
49            if count == n {
50                return Ok(read_start + rpos as u64 + 1);
51            }
52        }
53
54        pos = read_start;
55    }
56
57    Ok(0)
58}
59
60/// Mode for tail operation
61#[derive(Clone, Debug)]
62pub enum TailMode {
63    /// Last N lines (default: 10)
64    Lines(u64),
65    /// Starting from line N (1-indexed)
66    LinesFrom(u64),
67    /// Last N bytes
68    Bytes(u64),
69    /// Starting from byte N (1-indexed)
70    BytesFrom(u64),
71}
72
73/// Follow mode
74#[derive(Clone, Debug, PartialEq)]
75pub enum FollowMode {
76    None,
77    Descriptor,
78    Name,
79}
80
81/// Configuration for tail
82#[derive(Clone, Debug)]
83pub struct TailConfig {
84    pub mode: TailMode,
85    pub follow: FollowMode,
86    pub retry: bool,
87    pub pid: Option<u32>,
88    pub sleep_interval: f64,
89    pub max_unchanged_stats: u64,
90    pub zero_terminated: bool,
91}
92
93impl Default for TailConfig {
94    fn default() -> Self {
95        Self {
96            mode: TailMode::Lines(10),
97            follow: FollowMode::None,
98            retry: false,
99            pid: None,
100            sleep_interval: 1.0,
101            max_unchanged_stats: 5,
102            zero_terminated: false,
103        }
104    }
105}
106
107/// Parse a numeric argument with optional suffix, same as head
108pub fn parse_size(s: &str) -> Result<u64, String> {
109    crate::head::parse_size(s)
110}
111
112/// Output last N lines from data using backward SIMD scanning
113pub fn tail_lines(data: &[u8], n: u64, delimiter: u8, out: &mut impl Write) -> io::Result<()> {
114    if n == 0 || data.is_empty() {
115        return Ok(());
116    }
117
118    // Use memrchr for backward scanning - SIMD accelerated
119    let mut count = 0u64;
120
121    // Check if data ends with delimiter - if so, skip the trailing one
122    let search_end = if !data.is_empty() && data[data.len() - 1] == delimiter {
123        data.len() - 1
124    } else {
125        data.len()
126    };
127
128    for pos in memrchr_iter(delimiter, &data[..search_end]) {
129        count += 1;
130        if count == n {
131            return out.write_all(&data[pos + 1..]);
132        }
133    }
134
135    // Fewer than N lines — output everything
136    out.write_all(data)
137}
138
139/// Output from line N onward (1-indexed)
140pub fn tail_lines_from(data: &[u8], n: u64, delimiter: u8, out: &mut impl Write) -> io::Result<()> {
141    if data.is_empty() {
142        return Ok(());
143    }
144
145    if n <= 1 {
146        return out.write_all(data);
147    }
148
149    // Skip first (n-1) lines
150    let skip = n - 1;
151    let mut count = 0u64;
152
153    for pos in memchr_iter(delimiter, data) {
154        count += 1;
155        if count == skip {
156            let start = pos + 1;
157            if start < data.len() {
158                return out.write_all(&data[start..]);
159            }
160            return Ok(());
161        }
162    }
163
164    // Fewer than N lines — output nothing
165    Ok(())
166}
167
168/// Output last N bytes from data
169pub fn tail_bytes(data: &[u8], n: u64, out: &mut impl Write) -> io::Result<()> {
170    if n == 0 || data.is_empty() {
171        return Ok(());
172    }
173
174    let n = n.min(data.len() as u64) as usize;
175    out.write_all(&data[data.len() - n..])
176}
177
178/// Output from byte N onward (1-indexed)
179pub fn tail_bytes_from(data: &[u8], n: u64, out: &mut impl Write) -> io::Result<()> {
180    if data.is_empty() {
181        return Ok(());
182    }
183
184    if n <= 1 {
185        return out.write_all(data);
186    }
187
188    let start = ((n - 1) as usize).min(data.len());
189    if start < data.len() {
190        out.write_all(&data[start..])
191    } else {
192        Ok(())
193    }
194}
195
196/// Use sendfile for zero-copy byte output on Linux (last N bytes)
197#[cfg(target_os = "linux")]
198pub fn sendfile_tail_bytes(path: &Path, n: u64, out_fd: i32) -> io::Result<bool> {
199    let file = open_noatime(path)?;
200
201    let metadata = file.metadata()?;
202    let file_size = metadata.len();
203
204    if file_size == 0 {
205        return Ok(true);
206    }
207
208    let n = n.min(file_size);
209    let start = file_size - n;
210
211    use std::os::unix::io::AsRawFd;
212    let in_fd = file.as_raw_fd();
213    let _ = unsafe {
214        libc::posix_fadvise(
215            in_fd,
216            start as libc::off_t,
217            n as libc::off_t,
218            libc::POSIX_FADV_SEQUENTIAL,
219        )
220    };
221    let mut offset: libc::off_t = start as libc::off_t;
222    let mut remaining = n;
223
224    while remaining > 0 {
225        let chunk = remaining.min(0x7fff_f000) as usize;
226        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
227        if ret > 0 {
228            remaining -= ret as u64;
229        } else if ret == 0 {
230            break;
231        } else {
232            let err = io::Error::last_os_error();
233            if err.kind() == io::ErrorKind::Interrupted {
234                continue;
235            }
236            return Err(err);
237        }
238    }
239
240    Ok(true)
241}
242
243/// Streaming tail -n N via sendfile on Linux. Caller opens the file so that
244/// open errors can be reported as "cannot open" and I/O errors as "error reading".
245#[cfg(target_os = "linux")]
246fn sendfile_tail_lines(
247    file: std::fs::File,
248    file_size: u64,
249    n: u64,
250    delimiter: u8,
251    out_fd: i32,
252) -> io::Result<bool> {
253    use std::os::unix::io::AsRawFd;
254
255    if n == 0 || file_size == 0 {
256        return Ok(true);
257    }
258
259    let in_fd = file.as_raw_fd();
260
261    // Disable forward readahead — we scan backward from EOF
262    let _ = unsafe { libc::posix_fadvise(in_fd, 0, 0, libc::POSIX_FADV_RANDOM) };
263
264    let mut reader = file;
265    let start_byte = find_tail_start_byte(&mut reader, file_size, n, delimiter)?;
266
267    // Enable forward readahead from the output start point
268    let remaining = file_size - start_byte;
269    let _ = unsafe {
270        libc::posix_fadvise(
271            in_fd,
272            start_byte as libc::off_t,
273            remaining as libc::off_t,
274            libc::POSIX_FADV_SEQUENTIAL,
275        )
276    };
277
278    // Zero-copy output via sendfile
279    let mut offset = start_byte as libc::off_t;
280    let mut left = remaining;
281    while left > 0 {
282        let chunk = left.min(0x7fff_f000) as usize;
283        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
284        if ret > 0 {
285            left -= ret as u64;
286        } else if ret == 0 {
287            break;
288        } else {
289            let err = io::Error::last_os_error();
290            if err.kind() == io::ErrorKind::Interrupted {
291                continue;
292            }
293            return Err(err);
294        }
295    }
296
297    Ok(true)
298}
299
300/// Streaming tail -n N for regular files: read backward from EOF, then
301/// seek forward and copy. Caller opens the file. Used on non-Linux platforms.
302#[cfg(not(target_os = "linux"))]
303fn tail_lines_streaming_file(
304    mut file: std::fs::File,
305    file_size: u64,
306    n: u64,
307    delimiter: u8,
308    out: &mut impl Write,
309) -> io::Result<bool> {
310    if n == 0 || file_size == 0 {
311        return Ok(true);
312    }
313
314    let start_byte = find_tail_start_byte(&mut file, file_size, n, delimiter)?;
315    file.seek(io::SeekFrom::Start(start_byte))?;
316    io::copy(&mut file, out)?;
317
318    Ok(true)
319}
320
321/// Streaming tail -n +N for regular files: skip N-1 lines from start.
322/// Caller opens the file.
323///
324/// **Precondition**: On Linux, the `n <= 1` path uses `sendfile` which writes
325/// directly to stdout (bypassing `out`). The caller MUST `out.flush()` before
326/// calling this function to avoid interleaved output.
327fn tail_lines_from_streaming_file(
328    file: std::fs::File,
329    n: u64,
330    delimiter: u8,
331    out: &mut impl Write,
332) -> io::Result<bool> {
333    if n <= 1 {
334        // Output entire file via sendfile
335        #[cfg(target_os = "linux")]
336        {
337            use std::os::unix::io::AsRawFd;
338            let in_fd = file.as_raw_fd();
339            let stdout = io::stdout();
340            let out_fd = stdout.as_raw_fd();
341            let file_size = file.metadata()?.len();
342            return sendfile_to_stdout_raw(in_fd, file_size, out_fd);
343        }
344        #[cfg(not(target_os = "linux"))]
345        {
346            let mut reader = io::BufReader::with_capacity(1024 * 1024, file);
347            let mut buf = [0u8; 262144];
348            loop {
349                let n = match reader.read(&mut buf) {
350                    Ok(0) => break,
351                    Ok(n) => n,
352                    Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
353                    Err(e) => return Err(e),
354                };
355                out.write_all(&buf[..n])?;
356            }
357            return Ok(true);
358        }
359    }
360
361    let skip = n - 1;
362    let mut reader = io::BufReader::with_capacity(1024 * 1024, file);
363    let mut buf = [0u8; 262144];
364    let mut count = 0u64;
365    let mut skipping = true;
366
367    loop {
368        let bytes_read = match reader.read(&mut buf) {
369            Ok(0) => break,
370            Ok(n) => n,
371            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
372            Err(e) => return Err(e),
373        };
374
375        let chunk = &buf[..bytes_read];
376
377        if skipping {
378            for pos in memchr_iter(delimiter, chunk) {
379                count += 1;
380                if count == skip {
381                    // Found the start — output rest of this chunk and stop skipping
382                    let start = pos + 1;
383                    if start < chunk.len() {
384                        out.write_all(&chunk[start..])?;
385                    }
386                    skipping = false;
387                    break;
388                }
389            }
390        } else {
391            out.write_all(chunk)?;
392        }
393    }
394
395    Ok(true)
396}
397
398/// Raw sendfile helper
399#[cfg(target_os = "linux")]
400fn sendfile_to_stdout_raw(in_fd: i32, file_size: u64, out_fd: i32) -> io::Result<bool> {
401    let mut offset: libc::off_t = 0;
402    let mut remaining = file_size;
403    while remaining > 0 {
404        let chunk = remaining.min(0x7fff_f000) as usize;
405        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
406        if ret > 0 {
407            remaining -= ret as u64;
408        } else if ret == 0 {
409            break;
410        } else {
411            let err = io::Error::last_os_error();
412            if err.kind() == io::ErrorKind::Interrupted {
413                continue;
414            }
415            return Err(err);
416        }
417    }
418    Ok(true)
419}
420
421/// Process a single file/stdin for tail.
422///
423/// On Linux, the sendfile fast paths bypass `out` and write directly to stdout
424/// (fd 1). Callers MUST ensure `out` wraps stdout when these paths are active.
425/// The `out.flush()` call drains any buffered data before sendfile takes over.
426pub fn tail_file(
427    filename: &str,
428    config: &TailConfig,
429    out: &mut impl Write,
430    tool_name: &str,
431) -> io::Result<bool> {
432    let delimiter = if config.zero_terminated { b'\0' } else { b'\n' };
433
434    if filename != "-" {
435        let path = Path::new(filename);
436
437        match &config.mode {
438            TailMode::Lines(n) => {
439                // Open the file first so open errors get the right message
440                #[cfg(target_os = "linux")]
441                let file = match open_noatime(path) {
442                    Ok(f) => f,
443                    Err(e) => {
444                        eprintln!(
445                            "{}: cannot open '{}' for reading: {}",
446                            tool_name,
447                            filename,
448                            crate::common::io_error_msg(&e)
449                        );
450                        return Ok(false);
451                    }
452                };
453                #[cfg(not(target_os = "linux"))]
454                let file = match std::fs::File::open(path) {
455                    Ok(f) => f,
456                    Err(e) => {
457                        eprintln!(
458                            "{}: cannot open '{}' for reading: {}",
459                            tool_name,
460                            filename,
461                            crate::common::io_error_msg(&e)
462                        );
463                        return Ok(false);
464                    }
465                };
466                let file_size = match file.metadata() {
467                    Ok(m) => m.len(),
468                    Err(e) => {
469                        eprintln!(
470                            "{}: error reading '{}': {}",
471                            tool_name,
472                            filename,
473                            crate::common::io_error_msg(&e)
474                        );
475                        return Ok(false);
476                    }
477                };
478                #[cfg(target_os = "linux")]
479                {
480                    use std::os::unix::io::AsRawFd;
481                    out.flush()?;
482                    let stdout = io::stdout();
483                    let out_fd = stdout.as_raw_fd();
484                    match sendfile_tail_lines(file, file_size, *n, delimiter, out_fd) {
485                        Ok(_) => return Ok(true),
486                        Err(e) => {
487                            eprintln!(
488                                "{}: error reading '{}': {}",
489                                tool_name,
490                                filename,
491                                crate::common::io_error_msg(&e)
492                            );
493                            return Ok(false);
494                        }
495                    }
496                }
497                #[cfg(not(target_os = "linux"))]
498                {
499                    match tail_lines_streaming_file(file, file_size, *n, delimiter, out) {
500                        Ok(_) => return Ok(true),
501                        Err(e) => {
502                            eprintln!(
503                                "{}: error reading '{}': {}",
504                                tool_name,
505                                filename,
506                                crate::common::io_error_msg(&e)
507                            );
508                            return Ok(false);
509                        }
510                    }
511                }
512            }
513            TailMode::LinesFrom(n) => {
514                out.flush()?;
515                #[cfg(target_os = "linux")]
516                let file = match open_noatime(path) {
517                    Ok(f) => f,
518                    Err(e) => {
519                        eprintln!(
520                            "{}: cannot open '{}' for reading: {}",
521                            tool_name,
522                            filename,
523                            crate::common::io_error_msg(&e)
524                        );
525                        return Ok(false);
526                    }
527                };
528                #[cfg(not(target_os = "linux"))]
529                let file = match std::fs::File::open(path) {
530                    Ok(f) => f,
531                    Err(e) => {
532                        eprintln!(
533                            "{}: cannot open '{}' for reading: {}",
534                            tool_name,
535                            filename,
536                            crate::common::io_error_msg(&e)
537                        );
538                        return Ok(false);
539                    }
540                };
541                match tail_lines_from_streaming_file(file, *n, delimiter, out) {
542                    Ok(_) => return Ok(true),
543                    Err(e) => {
544                        eprintln!(
545                            "{}: error reading '{}': {}",
546                            tool_name,
547                            filename,
548                            crate::common::io_error_msg(&e)
549                        );
550                        return Ok(false);
551                    }
552                }
553            }
554            TailMode::Bytes(_n) => {
555                #[cfg(target_os = "linux")]
556                {
557                    use std::os::unix::io::AsRawFd;
558                    out.flush()?;
559                    let stdout = io::stdout();
560                    let out_fd = stdout.as_raw_fd();
561                    match sendfile_tail_bytes(path, *_n, out_fd) {
562                        Ok(true) => return Ok(true),
563                        Ok(false) => {}
564                        Err(e) => {
565                            eprintln!(
566                                "{}: error reading '{}': {}",
567                                tool_name,
568                                filename,
569                                crate::common::io_error_msg(&e)
570                            );
571                            return Ok(false);
572                        }
573                    }
574                }
575            }
576            TailMode::BytesFrom(_n) => {
577                #[cfg(target_os = "linux")]
578                {
579                    use std::os::unix::io::AsRawFd;
580                    out.flush()?;
581                    let stdout = io::stdout();
582                    let out_fd = stdout.as_raw_fd();
583                    match sendfile_tail_bytes_from(path, *_n, out_fd) {
584                        Ok(true) => return Ok(true),
585                        Ok(false) => {}
586                        Err(e) => {
587                            eprintln!(
588                                "{}: error reading '{}': {}",
589                                tool_name,
590                                filename,
591                                crate::common::io_error_msg(&e)
592                            );
593                            return Ok(false);
594                        }
595                    }
596                }
597            }
598        }
599    }
600
601    // Slow path: read entire input (stdin or fallback)
602    let data: FileData = if filename == "-" {
603        match read_stdin() {
604            Ok(d) => FileData::Owned(d),
605            Err(e) => {
606                eprintln!(
607                    "{}: standard input: {}",
608                    tool_name,
609                    crate::common::io_error_msg(&e)
610                );
611                return Ok(false);
612            }
613        }
614    } else {
615        match read_file(Path::new(filename)) {
616            Ok(d) => d,
617            Err(e) => {
618                eprintln!(
619                    "{}: cannot open '{}' for reading: {}",
620                    tool_name,
621                    filename,
622                    crate::common::io_error_msg(&e)
623                );
624                return Ok(false);
625            }
626        }
627    };
628
629    match &config.mode {
630        TailMode::Lines(n) => tail_lines(&data, *n, delimiter, out)?,
631        TailMode::LinesFrom(n) => tail_lines_from(&data, *n, delimiter, out)?,
632        TailMode::Bytes(n) => tail_bytes(&data, *n, out)?,
633        TailMode::BytesFrom(n) => tail_bytes_from(&data, *n, out)?,
634    }
635
636    Ok(true)
637}
638
639/// sendfile from byte N onward (1-indexed)
640#[cfg(target_os = "linux")]
641fn sendfile_tail_bytes_from(path: &Path, n: u64, out_fd: i32) -> io::Result<bool> {
642    let file = open_noatime(path)?;
643
644    let metadata = file.metadata()?;
645    let file_size = metadata.len();
646
647    if file_size == 0 {
648        return Ok(true);
649    }
650
651    let start = if n <= 1 { 0 } else { (n - 1).min(file_size) };
652
653    if start >= file_size {
654        return Ok(true);
655    }
656
657    use std::os::unix::io::AsRawFd;
658    let in_fd = file.as_raw_fd();
659    let output_len = file_size - start;
660    let _ = unsafe {
661        libc::posix_fadvise(
662            in_fd,
663            start as libc::off_t,
664            output_len as libc::off_t,
665            libc::POSIX_FADV_SEQUENTIAL,
666        )
667    };
668    let mut offset: libc::off_t = start as libc::off_t;
669    let mut remaining = output_len;
670
671    while remaining > 0 {
672        let chunk = remaining.min(0x7fff_f000) as usize;
673        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
674        if ret > 0 {
675            remaining -= ret as u64;
676        } else if ret == 0 {
677            break;
678        } else {
679            let err = io::Error::last_os_error();
680            if err.kind() == io::ErrorKind::Interrupted {
681                continue;
682            }
683            return Err(err);
684        }
685    }
686
687    Ok(true)
688}
689
690/// Follow a file for new data (basic implementation)
691#[cfg(target_os = "linux")]
692pub fn follow_file(filename: &str, config: &TailConfig, out: &mut impl Write) -> io::Result<()> {
693    use std::thread;
694    use std::time::Duration;
695
696    let sleep_duration = Duration::from_secs_f64(config.sleep_interval);
697    let path = Path::new(filename);
698
699    let mut last_size = match std::fs::metadata(path) {
700        Ok(m) => m.len(),
701        Err(_) => 0,
702    };
703
704    loop {
705        // Check PID if set
706        if let Some(pid) = config.pid {
707            if unsafe { libc::kill(pid as i32, 0) } != 0 {
708                break;
709            }
710        }
711
712        thread::sleep(sleep_duration);
713
714        let current_size = match std::fs::metadata(path) {
715            Ok(m) => m.len(),
716            Err(_) => {
717                if config.retry {
718                    continue;
719                }
720                break;
721            }
722        };
723
724        if current_size > last_size {
725            // Read new data
726            let file = std::fs::File::open(path)?;
727            use std::os::unix::io::AsRawFd;
728            let in_fd = file.as_raw_fd();
729            let stdout = io::stdout();
730            let out_fd = stdout.as_raw_fd();
731            let mut offset = last_size as libc::off_t;
732            let mut remaining = current_size - last_size; // u64, safe on 32-bit
733
734            while remaining > 0 {
735                let chunk = remaining.min(0x7fff_f000) as usize;
736                let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
737                if ret > 0 {
738                    remaining -= ret as u64;
739                } else if ret == 0 {
740                    break;
741                } else {
742                    let err = io::Error::last_os_error();
743                    if err.kind() == io::ErrorKind::Interrupted {
744                        continue;
745                    }
746                    return Err(err);
747                }
748            }
749            let _ = out.flush();
750            last_size = current_size;
751        } else if current_size < last_size {
752            // File was truncated
753            last_size = current_size;
754        }
755    }
756
757    Ok(())
758}
759
760#[cfg(not(target_os = "linux"))]
761pub fn follow_file(filename: &str, config: &TailConfig, out: &mut impl Write) -> io::Result<()> {
762    use std::io::{Read, Seek};
763    use std::thread;
764    use std::time::Duration;
765
766    let sleep_duration = Duration::from_secs_f64(config.sleep_interval);
767    let path = Path::new(filename);
768
769    let mut last_size = match std::fs::metadata(path) {
770        Ok(m) => m.len(),
771        Err(_) => 0,
772    };
773
774    loop {
775        thread::sleep(sleep_duration);
776
777        let current_size = match std::fs::metadata(path) {
778            Ok(m) => m.len(),
779            Err(_) => {
780                if config.retry {
781                    continue;
782                }
783                break;
784            }
785        };
786
787        if current_size > last_size {
788            let mut file = std::fs::File::open(path)?;
789            file.seek(io::SeekFrom::Start(last_size))?;
790            let mut buf = vec![0u8; (current_size - last_size) as usize];
791            file.read_exact(&mut buf)?;
792            out.write_all(&buf)?;
793            out.flush()?;
794            last_size = current_size;
795        } else if current_size < last_size {
796            last_size = current_size;
797        }
798    }
799
800    Ok(())
801}