Skip to main content

coreutils_rs/cat/
core.rs

1use std::io::{self, Read, Write};
2use std::path::Path;
3
4use crate::common::io::{read_file, read_stdin};
5
6/// Configuration for cat
7#[derive(Clone, Debug, Default)]
8pub struct CatConfig {
9    pub number: bool,
10    pub number_nonblank: bool,
11    pub show_ends: bool,
12    pub show_tabs: bool,
13    pub show_nonprinting: bool,
14    pub squeeze_blank: bool,
15}
16
17impl CatConfig {
18    /// Returns true if no special processing is needed (plain cat)
19    pub fn is_plain(&self) -> bool {
20        !self.number
21            && !self.number_nonblank
22            && !self.show_ends
23            && !self.show_tabs
24            && !self.show_nonprinting
25            && !self.squeeze_blank
26    }
27}
28
29/// Use splice for zero-copy file→stdout on Linux (file → pipe)
30#[cfg(target_os = "linux")]
31pub fn splice_file_to_stdout(path: &Path) -> io::Result<bool> {
32    use std::os::unix::fs::OpenOptionsExt;
33    use std::os::unix::io::AsRawFd;
34
35    // Check if stdout is a pipe (splice only works with pipes)
36    let stdout = io::stdout();
37    let out_fd = stdout.as_raw_fd();
38    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
39    if unsafe { libc::fstat(out_fd, &mut stat) } != 0 {
40        return Ok(false);
41    }
42    let stdout_is_pipe = (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO;
43
44    let file = std::fs::OpenOptions::new()
45        .read(true)
46        .custom_flags(libc::O_NOATIME)
47        .open(path)
48        .or_else(|_| std::fs::File::open(path))?;
49
50    let in_fd = file.as_raw_fd();
51    let metadata = file.metadata()?;
52    let file_size = metadata.len() as usize;
53
54    if file_size == 0 {
55        return Ok(true);
56    }
57
58    if stdout_is_pipe {
59        // splice: zero-copy file→pipe
60        let mut remaining = file_size;
61        while remaining > 0 {
62            let chunk = remaining.min(1024 * 1024 * 1024);
63            let ret = unsafe {
64                libc::splice(
65                    in_fd,
66                    std::ptr::null_mut(),
67                    out_fd,
68                    std::ptr::null_mut(),
69                    chunk,
70                    libc::SPLICE_F_MOVE,
71                )
72            };
73            if ret > 0 {
74                remaining -= ret as usize;
75            } else if ret == 0 {
76                break;
77            } else {
78                let err = io::Error::last_os_error();
79                if err.kind() == io::ErrorKind::Interrupted {
80                    continue;
81                }
82                // splice not supported — fall through to sendfile
83                return sendfile_to_stdout(in_fd, file_size, out_fd);
84            }
85        }
86        Ok(true)
87    } else {
88        // sendfile: zero-copy file→socket/file
89        sendfile_to_stdout(in_fd, file_size, out_fd)
90    }
91}
92
93#[cfg(target_os = "linux")]
94fn sendfile_to_stdout(in_fd: i32, file_size: usize, out_fd: i32) -> io::Result<bool> {
95    let mut offset: libc::off_t = 0;
96    let mut remaining = file_size;
97
98    while remaining > 0 {
99        let chunk = remaining.min(0x7ffff000);
100        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
101        if ret > 0 {
102            remaining -= ret as usize;
103        } else if ret == 0 {
104            break;
105        } else {
106            let err = io::Error::last_os_error();
107            if err.kind() == io::ErrorKind::Interrupted {
108                continue;
109            }
110            return Err(err);
111        }
112    }
113
114    Ok(true)
115}
116
117/// Plain cat for a single file — tries splice/sendfile, then falls back to mmap+write
118pub fn cat_plain_file(path: &Path, out: &mut impl Write) -> io::Result<bool> {
119    // Try zero-copy first on Linux
120    #[cfg(target_os = "linux")]
121    {
122        match splice_file_to_stdout(path) {
123            Ok(true) => return Ok(true),
124            Ok(false) => {}
125            Err(_) => {} // fall through
126        }
127    }
128
129    // Fallback: mmap + write
130    let data = read_file(path)?;
131    if !data.is_empty() {
132        out.write_all(&data)?;
133    }
134    Ok(true)
135}
136
137/// Plain cat for stdin — try splice on Linux, otherwise bulk read+write
138pub fn cat_plain_stdin(out: &mut impl Write) -> io::Result<()> {
139    #[cfg(target_os = "linux")]
140    {
141        // Try splice stdin→stdout if both are pipes
142        let stdin_fd = 0i32;
143        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
144        if unsafe { libc::fstat(1, &mut stat) } == 0
145            && (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO
146        {
147            // stdout is a pipe, try splice from stdin
148            loop {
149                let ret = unsafe {
150                    libc::splice(
151                        stdin_fd,
152                        std::ptr::null_mut(),
153                        1,
154                        std::ptr::null_mut(),
155                        1024 * 1024 * 1024,
156                        libc::SPLICE_F_MOVE,
157                    )
158                };
159                if ret > 0 {
160                    continue;
161                } else if ret == 0 {
162                    return Ok(());
163                } else {
164                    let err = io::Error::last_os_error();
165                    if err.kind() == io::ErrorKind::Interrupted {
166                        continue;
167                    }
168                    // splice not supported, fall through to read+write
169                    break;
170                }
171            }
172        }
173    }
174
175    // Fallback: read+write loop
176    let stdin = io::stdin();
177    let mut reader = stdin.lock();
178    let mut buf = [0u8; 131072]; // 128KB buffer
179    loop {
180        let n = match reader.read(&mut buf) {
181            Ok(0) => break,
182            Ok(n) => n,
183            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
184            Err(e) => return Err(e),
185        };
186        out.write_all(&buf[..n])?;
187    }
188    Ok(())
189}
190
191/// Build the 256-byte lookup table for non-printing character display.
192/// Returns (table, needs_expansion) where needs_expansion[b] is true if
193/// the byte maps to more than one output byte.
194fn _build_nonprinting_table(show_tabs: bool) -> ([u8; 256], [bool; 256]) {
195    let mut table = [0u8; 256];
196    let mut multi = [false; 256];
197
198    for i in 0..256u16 {
199        let b = i as u8;
200        match b {
201            b'\n' => {
202                table[i as usize] = b'\n';
203            }
204            b'\t' => {
205                if show_tabs {
206                    table[i as usize] = b'I';
207                    multi[i as usize] = true;
208                } else {
209                    table[i as usize] = b'\t';
210                }
211            }
212            0..=8 | 10..=31 => {
213                // Control chars: ^@ through ^_
214                table[i as usize] = b + 64;
215                multi[i as usize] = true;
216            }
217            32..=126 => {
218                table[i as usize] = b;
219            }
220            127 => {
221                // DEL: ^?
222                table[i as usize] = b'?';
223                multi[i as usize] = true;
224            }
225            128..=159 => {
226                // M-^@ through M-^_
227                table[i as usize] = b - 128 + 64;
228                multi[i as usize] = true;
229            }
230            160..=254 => {
231                // M-space through M-~
232                table[i as usize] = b - 128;
233                multi[i as usize] = true;
234            }
235            255 => {
236                // M-^?
237                table[i as usize] = b'?';
238                multi[i as usize] = true;
239            }
240        }
241    }
242
243    (table, multi)
244}
245
246/// Write a non-printing byte in cat -v notation
247#[inline]
248fn write_nonprinting(b: u8, show_tabs: bool, out: &mut Vec<u8>) {
249    match b {
250        b'\t' if !show_tabs => out.push(b'\t'),
251        b'\n' => out.push(b'\n'),
252        0..=8 | 10..=31 => {
253            out.push(b'^');
254            out.push(b + 64);
255        }
256        9 => {
257            // show_tabs must be true here
258            out.push(b'^');
259            out.push(b'I');
260        }
261        32..=126 => out.push(b),
262        127 => {
263            out.push(b'^');
264            out.push(b'?');
265        }
266        128..=159 => {
267            out.push(b'M');
268            out.push(b'-');
269            out.push(b'^');
270            out.push(b - 128 + 64);
271        }
272        160..=254 => {
273            out.push(b'M');
274            out.push(b'-');
275            out.push(b - 128);
276        }
277        255 => {
278            out.push(b'M');
279            out.push(b'-');
280            out.push(b'^');
281            out.push(b'?');
282        }
283    }
284}
285
286/// Fast path for cat -A (show-all) without line numbering or squeeze.
287/// Uses an internal buffer with bulk memcpy of printable ASCII runs.
288fn cat_show_all_fast(
289    data: &[u8],
290    show_tabs: bool,
291    show_ends: bool,
292    out: &mut impl Write,
293) -> io::Result<()> {
294    // Internal buffer — flush every 256KB to keep memory bounded
295    const BUF_SIZE: usize = 256 * 1024;
296    // Worst case expansion: every byte → 4 chars (M-^X), so reserve proportionally
297    let cap = data.len().min(BUF_SIZE) + data.len().min(BUF_SIZE) / 2;
298    let mut buf = Vec::with_capacity(cap);
299    let mut pos = 0;
300
301    while pos < data.len() {
302        // Find the next byte that needs transformation (outside 32..=126)
303        let start = pos;
304        while pos < data.len() && data[pos].wrapping_sub(32) <= 94 {
305            pos += 1;
306        }
307        // Bulk copy printable ASCII run via memcpy
308        if pos > start {
309            buf.extend_from_slice(&data[start..pos]);
310        }
311        if pos >= data.len() {
312            break;
313        }
314        // Handle the special byte
315        let b = data[pos];
316        pos += 1;
317        match b {
318            b'\n' => {
319                if show_ends {
320                    buf.extend_from_slice(b"$\n");
321                } else {
322                    buf.push(b'\n');
323                }
324            }
325            b'\t' if show_tabs => buf.extend_from_slice(b"^I"),
326            b'\t' => buf.push(b'\t'),
327            0..=8 | 10..=31 => {
328                buf.push(b'^');
329                buf.push(b + 64);
330            }
331            127 => buf.extend_from_slice(b"^?"),
332            128..=159 => {
333                buf.push(b'M');
334                buf.push(b'-');
335                buf.push(b'^');
336                buf.push(b - 128 + 64);
337            }
338            160..=254 => {
339                buf.push(b'M');
340                buf.push(b'-');
341                buf.push(b - 128);
342            }
343            255 => buf.extend_from_slice(b"M-^?"),
344            _ => unreachable!(),
345        }
346
347        // Flush when buffer is large enough
348        if buf.len() >= BUF_SIZE {
349            out.write_all(&buf)?;
350            buf.clear();
351        }
352    }
353
354    if !buf.is_empty() {
355        out.write_all(&buf)?;
356    }
357    Ok(())
358}
359
360/// Cat with options (numbering, show-ends, show-tabs, show-nonprinting, squeeze)
361pub fn cat_with_options(
362    data: &[u8],
363    config: &CatConfig,
364    line_num: &mut u64,
365    pending_cr: &mut bool,
366    out: &mut impl Write,
367) -> io::Result<()> {
368    if data.is_empty() {
369        return Ok(());
370    }
371
372    // Fast path: show-all without numbering or squeeze
373    if config.show_nonprinting && !config.number && !config.number_nonblank && !config.squeeze_blank
374    {
375        return cat_show_all_fast(data, config.show_tabs, config.show_ends, out);
376    }
377
378    // Pre-allocate output buffer (worst case: every byte expands to 4 chars for M-^X)
379    // In practice, most files are mostly printable, so 1.1x is a good estimate
380    let estimated = data.len() + data.len() / 10 + 1024;
381    let mut buf = Vec::with_capacity(estimated.min(16 * 1024 * 1024));
382
383    let mut prev_blank = false;
384    let mut pos = 0;
385    let mut itoa_buf = itoa::Buffer::new();
386
387    // Handle pending CR from previous file (only relevant for show_ends without show_nonprinting)
388    if *pending_cr {
389        *pending_cr = false;
390        if config.show_ends
391            && !(config.show_nonprinting || config.show_tabs)
392            && !data.is_empty()
393            && data[0] == b'\n'
394        {
395            // CR from previous file + this LF = CRLF line ending → ^M$\n
396            buf.extend_from_slice(b"^M$\n");
397            pos = 1;
398        } else {
399            // CR not followed by LF, emit literally
400            buf.push(b'\r');
401        }
402    }
403
404    while pos < data.len() {
405        // Find end of this line
406        let line_end = memchr::memchr(b'\n', &data[pos..])
407            .map(|p| pos + p + 1)
408            .unwrap_or(data.len());
409
410        let line = &data[pos..line_end];
411        let is_blank = line == b"\n" || line.is_empty();
412
413        // Squeeze blank lines
414        if config.squeeze_blank && is_blank && prev_blank {
415            pos = line_end;
416            continue;
417        }
418        prev_blank = is_blank;
419
420        // Line numbering - use itoa for fast integer formatting
421        if config.number_nonblank {
422            if !is_blank {
423                let s = itoa_buf.format(*line_num);
424                // Right-align in 6-char field
425                let pad = if s.len() < 6 { 6 - s.len() } else { 0 };
426                buf.extend(std::iter::repeat_n(b' ', pad));
427                buf.extend_from_slice(s.as_bytes());
428                buf.push(b'\t');
429                *line_num += 1;
430            }
431        } else if config.number {
432            let s = itoa_buf.format(*line_num);
433            let pad = if s.len() < 6 { 6 - s.len() } else { 0 };
434            buf.extend(std::iter::repeat_n(b' ', pad));
435            buf.extend_from_slice(s.as_bytes());
436            buf.push(b'\t');
437            *line_num += 1;
438        }
439
440        // Process line content
441        if config.show_nonprinting || config.show_tabs {
442            let content_end = if line.last() == Some(&b'\n') {
443                line.len() - 1
444            } else {
445                line.len()
446            };
447
448            for &b in &line[..content_end] {
449                if config.show_nonprinting {
450                    write_nonprinting(b, config.show_tabs, &mut buf);
451                } else if config.show_tabs && b == b'\t' {
452                    buf.extend_from_slice(b"^I");
453                } else {
454                    buf.push(b);
455                }
456            }
457
458            if config.show_ends && line.last() == Some(&b'\n') {
459                buf.push(b'$');
460            }
461            if line.last() == Some(&b'\n') {
462                buf.push(b'\n');
463            }
464        } else {
465            // No character transformation needed
466            if config.show_ends {
467                let has_newline = line.last() == Some(&b'\n');
468                let content_end = if has_newline {
469                    line.len() - 1
470                } else {
471                    line.len()
472                };
473                // GNU cat -E: only \r immediately before \n is shown as ^M.
474                // Other \r bytes are passed through as literal CR (0x0d).
475                let content = &line[..content_end];
476                if has_newline && !content.is_empty() && content[content.len() - 1] == b'\r' {
477                    // Content ends with \r (which is right before \n) → show as ^M$
478                    buf.extend_from_slice(&content[..content.len() - 1]);
479                    buf.extend_from_slice(b"^M");
480                } else if !has_newline && !content.is_empty() && content[content.len() - 1] == b'\r'
481                {
482                    // Trailing CR at end of data without following LF — hold as pending.
483                    // It might pair with next file's LF to form CRLF line ending.
484                    buf.extend_from_slice(&content[..content.len() - 1]);
485                    *pending_cr = true;
486                } else {
487                    buf.extend_from_slice(content);
488                }
489                if has_newline {
490                    buf.push(b'$');
491                    buf.push(b'\n');
492                }
493            } else {
494                buf.extend_from_slice(line);
495            }
496        }
497
498        // Flush buffer periodically to avoid excessive memory use
499        if buf.len() >= 8 * 1024 * 1024 {
500            out.write_all(&buf)?;
501            buf.clear();
502        }
503
504        pos = line_end;
505    }
506
507    if !buf.is_empty() {
508        out.write_all(&buf)?;
509    }
510
511    Ok(())
512}
513
514/// Process a single file for cat
515pub fn cat_file(
516    filename: &str,
517    config: &CatConfig,
518    line_num: &mut u64,
519    pending_cr: &mut bool,
520    out: &mut impl Write,
521    tool_name: &str,
522) -> io::Result<bool> {
523    if filename == "-" {
524        if config.is_plain() {
525            match cat_plain_stdin(out) {
526                Ok(()) => return Ok(true),
527                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
528                    std::process::exit(0);
529                }
530                Err(e) => {
531                    eprintln!(
532                        "{}: standard input: {}",
533                        tool_name,
534                        crate::common::io_error_msg(&e)
535                    );
536                    return Ok(false);
537                }
538            }
539        }
540        match read_stdin() {
541            Ok(data) => {
542                cat_with_options(&data, config, line_num, pending_cr, out)?;
543                Ok(true)
544            }
545            Err(e) => {
546                eprintln!(
547                    "{}: standard input: {}",
548                    tool_name,
549                    crate::common::io_error_msg(&e)
550                );
551                Ok(false)
552            }
553        }
554    } else {
555        let path = Path::new(filename);
556
557        // Check if it's a directory
558        match std::fs::metadata(path) {
559            Ok(meta) if meta.is_dir() => {
560                eprintln!("{}: {}: Is a directory", tool_name, filename);
561                return Ok(false);
562            }
563            _ => {}
564        }
565
566        // GNU cat: detect when input file is the same as stdout (e.g. cat file >> file)
567        #[cfg(unix)]
568        {
569            use std::os::unix::fs::MetadataExt;
570            if let Ok(file_meta) = std::fs::metadata(path) {
571                let mut stdout_stat: libc::stat = unsafe { std::mem::zeroed() };
572                if unsafe { libc::fstat(1, &mut stdout_stat) } == 0
573                    && file_meta.dev() == stdout_stat.st_dev as u64
574                    && file_meta.ino() == stdout_stat.st_ino as u64
575                {
576                    eprintln!("{}: {}: input file is output file", tool_name, filename);
577                    return Ok(false);
578                }
579            }
580        }
581
582        if config.is_plain() {
583            match cat_plain_file(path, out) {
584                Ok(true) => return Ok(true),
585                Ok(false) => {} // fall through
586                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
587                    std::process::exit(0);
588                }
589                Err(e) => {
590                    eprintln!(
591                        "{}: {}: {}",
592                        tool_name,
593                        filename,
594                        crate::common::io_error_msg(&e)
595                    );
596                    return Ok(false);
597                }
598            }
599        }
600
601        match read_file(path) {
602            Ok(data) => {
603                cat_with_options(&data, config, line_num, pending_cr, out)?;
604                Ok(true)
605            }
606            Err(e) => {
607                eprintln!(
608                    "{}: {}: {}",
609                    tool_name,
610                    filename,
611                    crate::common::io_error_msg(&e)
612                );
613                Ok(false)
614            }
615        }
616    }
617}