Skip to main content

coreutils_rs/cat/
core.rs

1use std::io::{self, Read, Write};
2use std::path::Path;
3
4use crate::common::io::{read_file, read_stdin};
5
6/// Configuration for cat
7#[derive(Clone, Debug, Default)]
8pub struct CatConfig {
9    pub number: bool,
10    pub number_nonblank: bool,
11    pub show_ends: bool,
12    pub show_tabs: bool,
13    pub show_nonprinting: bool,
14    pub squeeze_blank: bool,
15}
16
17impl CatConfig {
18    /// Returns true if no special processing is needed (plain cat)
19    pub fn is_plain(&self) -> bool {
20        !self.number
21            && !self.number_nonblank
22            && !self.show_ends
23            && !self.show_tabs
24            && !self.show_nonprinting
25            && !self.squeeze_blank
26    }
27}
28
29/// Use splice for zero-copy file→stdout on Linux (file → pipe)
30#[cfg(target_os = "linux")]
31pub fn splice_file_to_stdout(path: &Path) -> io::Result<bool> {
32    use std::os::unix::fs::OpenOptionsExt;
33    use std::os::unix::io::AsRawFd;
34
35    // Check if stdout is a pipe (splice only works with pipes)
36    let stdout = io::stdout();
37    let out_fd = stdout.as_raw_fd();
38    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
39    if unsafe { libc::fstat(out_fd, &mut stat) } != 0 {
40        return Ok(false);
41    }
42    let stdout_is_pipe = (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO;
43
44    let file = std::fs::OpenOptions::new()
45        .read(true)
46        .custom_flags(libc::O_NOATIME)
47        .open(path)
48        .or_else(|_| std::fs::File::open(path))?;
49
50    let in_fd = file.as_raw_fd();
51    let metadata = file.metadata()?;
52    let file_size = metadata.len() as usize;
53
54    if file_size == 0 {
55        return Ok(true);
56    }
57
58    if stdout_is_pipe {
59        // splice: zero-copy file→pipe
60        let mut remaining = file_size;
61        while remaining > 0 {
62            let chunk = remaining.min(1024 * 1024 * 1024);
63            let ret = unsafe {
64                libc::splice(
65                    in_fd,
66                    std::ptr::null_mut(),
67                    out_fd,
68                    std::ptr::null_mut(),
69                    chunk,
70                    libc::SPLICE_F_MOVE,
71                )
72            };
73            if ret > 0 {
74                remaining -= ret as usize;
75            } else if ret == 0 {
76                break;
77            } else {
78                let err = io::Error::last_os_error();
79                if err.kind() == io::ErrorKind::Interrupted {
80                    continue;
81                }
82                // splice not supported — fall through to sendfile
83                return sendfile_to_stdout(in_fd, file_size, out_fd);
84            }
85        }
86        Ok(true)
87    } else {
88        // sendfile: zero-copy file→socket/file
89        sendfile_to_stdout(in_fd, file_size, out_fd)
90    }
91}
92
93#[cfg(target_os = "linux")]
94fn sendfile_to_stdout(in_fd: i32, file_size: usize, out_fd: i32) -> io::Result<bool> {
95    let mut offset: libc::off_t = 0;
96    let mut remaining = file_size;
97
98    while remaining > 0 {
99        let chunk = remaining.min(0x7ffff000);
100        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
101        if ret > 0 {
102            remaining -= ret as usize;
103        } else if ret == 0 {
104            break;
105        } else {
106            let err = io::Error::last_os_error();
107            if err.kind() == io::ErrorKind::Interrupted {
108                continue;
109            }
110            return Err(err);
111        }
112    }
113
114    Ok(true)
115}
116
117/// Plain cat for a single file — tries splice/sendfile, then falls back to mmap+write
118pub fn cat_plain_file(path: &Path, out: &mut impl Write) -> io::Result<bool> {
119    // Try zero-copy first on Linux
120    #[cfg(target_os = "linux")]
121    {
122        match splice_file_to_stdout(path) {
123            Ok(true) => return Ok(true),
124            Ok(false) => {}
125            Err(_) => {} // fall through
126        }
127    }
128
129    // Fallback: mmap + write
130    let data = read_file(path)?;
131    if !data.is_empty() {
132        out.write_all(&data)?;
133    }
134    Ok(true)
135}
136
137/// Plain cat for stdin — try splice on Linux, otherwise bulk read+write
138pub fn cat_plain_stdin(out: &mut impl Write) -> io::Result<()> {
139    #[cfg(target_os = "linux")]
140    {
141        // Try splice stdin→stdout if both are pipes
142        let stdin_fd = 0i32;
143        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
144        if unsafe { libc::fstat(1, &mut stat) } == 0
145            && (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO
146        {
147            // stdout is a pipe, try splice from stdin
148            loop {
149                let ret = unsafe {
150                    libc::splice(
151                        stdin_fd,
152                        std::ptr::null_mut(),
153                        1,
154                        std::ptr::null_mut(),
155                        1024 * 1024 * 1024,
156                        libc::SPLICE_F_MOVE,
157                    )
158                };
159                if ret > 0 {
160                    continue;
161                } else if ret == 0 {
162                    return Ok(());
163                } else {
164                    let err = io::Error::last_os_error();
165                    if err.kind() == io::ErrorKind::Interrupted {
166                        continue;
167                    }
168                    // splice not supported, fall through to read+write
169                    break;
170                }
171            }
172        }
173    }
174
175    // Fallback: read+write loop
176    let stdin = io::stdin();
177    let mut reader = stdin.lock();
178    let mut buf = [0u8; 131072]; // 128KB buffer
179    loop {
180        let n = match reader.read(&mut buf) {
181            Ok(0) => break,
182            Ok(n) => n,
183            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
184            Err(e) => return Err(e),
185        };
186        out.write_all(&buf[..n])?;
187    }
188    Ok(())
189}
190
191/// Build the 256-byte lookup table for non-printing character display.
192/// Returns (table, needs_expansion) where needs_expansion[b] is true if
193/// the byte maps to more than one output byte.
194fn _build_nonprinting_table(show_tabs: bool) -> ([u8; 256], [bool; 256]) {
195    let mut table = [0u8; 256];
196    let mut multi = [false; 256];
197
198    for i in 0..256u16 {
199        let b = i as u8;
200        match b {
201            b'\n' => {
202                table[i as usize] = b'\n';
203            }
204            b'\t' => {
205                if show_tabs {
206                    table[i as usize] = b'I';
207                    multi[i as usize] = true;
208                } else {
209                    table[i as usize] = b'\t';
210                }
211            }
212            0..=8 | 10..=31 => {
213                // Control chars: ^@ through ^_
214                table[i as usize] = b + 64;
215                multi[i as usize] = true;
216            }
217            32..=126 => {
218                table[i as usize] = b;
219            }
220            127 => {
221                // DEL: ^?
222                table[i as usize] = b'?';
223                multi[i as usize] = true;
224            }
225            128..=159 => {
226                // M-^@ through M-^_
227                table[i as usize] = b - 128 + 64;
228                multi[i as usize] = true;
229            }
230            160..=254 => {
231                // M-space through M-~
232                table[i as usize] = b - 128;
233                multi[i as usize] = true;
234            }
235            255 => {
236                // M-^?
237                table[i as usize] = b'?';
238                multi[i as usize] = true;
239            }
240        }
241    }
242
243    (table, multi)
244}
245
246/// Write a non-printing byte in cat -v notation
247#[inline]
248fn write_nonprinting(b: u8, show_tabs: bool, out: &mut Vec<u8>) {
249    match b {
250        b'\t' if !show_tabs => out.push(b'\t'),
251        b'\n' => out.push(b'\n'),
252        0..=8 | 10..=31 => {
253            out.push(b'^');
254            out.push(b + 64);
255        }
256        9 => {
257            // show_tabs must be true here
258            out.push(b'^');
259            out.push(b'I');
260        }
261        32..=126 => out.push(b),
262        127 => {
263            out.push(b'^');
264            out.push(b'?');
265        }
266        128..=159 => {
267            out.push(b'M');
268            out.push(b'-');
269            out.push(b'^');
270            out.push(b - 128 + 64);
271        }
272        160..=254 => {
273            out.push(b'M');
274            out.push(b'-');
275            out.push(b - 128);
276        }
277        255 => {
278            out.push(b'M');
279            out.push(b'-');
280            out.push(b'^');
281            out.push(b'?');
282        }
283    }
284}
285
286/// Fast path for cat -A (show-all) without line numbering or squeeze.
287/// Uses an internal buffer with bulk memcpy of printable ASCII runs.
288fn cat_show_all_fast(
289    data: &[u8],
290    show_tabs: bool,
291    show_ends: bool,
292    out: &mut impl Write,
293) -> io::Result<()> {
294    // Internal buffer — flush every 256KB to keep memory bounded
295    const BUF_SIZE: usize = 256 * 1024;
296    // Worst case expansion: every byte → 4 chars (M-^X), so reserve proportionally
297    let cap = data.len().min(BUF_SIZE) + data.len().min(BUF_SIZE) / 2;
298    let mut buf = Vec::with_capacity(cap);
299    let mut pos = 0;
300
301    while pos < data.len() {
302        // Find the next byte that needs transformation (outside 32..=126)
303        let start = pos;
304        while pos < data.len() && data[pos].wrapping_sub(32) <= 94 {
305            pos += 1;
306        }
307        // Bulk copy printable ASCII run via memcpy
308        if pos > start {
309            buf.extend_from_slice(&data[start..pos]);
310        }
311        if pos >= data.len() {
312            break;
313        }
314        // Handle the special byte
315        let b = data[pos];
316        pos += 1;
317        match b {
318            b'\n' => {
319                if show_ends {
320                    buf.extend_from_slice(b"$\n");
321                } else {
322                    buf.push(b'\n');
323                }
324            }
325            b'\t' if show_tabs => buf.extend_from_slice(b"^I"),
326            b'\t' => buf.push(b'\t'),
327            0..=8 | 10..=31 => {
328                buf.push(b'^');
329                buf.push(b + 64);
330            }
331            127 => buf.extend_from_slice(b"^?"),
332            128..=159 => {
333                buf.push(b'M');
334                buf.push(b'-');
335                buf.push(b'^');
336                buf.push(b - 128 + 64);
337            }
338            160..=254 => {
339                buf.push(b'M');
340                buf.push(b'-');
341                buf.push(b - 128);
342            }
343            255 => buf.extend_from_slice(b"M-^?"),
344            _ => unreachable!(),
345        }
346
347        // Flush when buffer is large enough
348        if buf.len() >= BUF_SIZE {
349            out.write_all(&buf)?;
350            buf.clear();
351        }
352    }
353
354    if !buf.is_empty() {
355        out.write_all(&buf)?;
356    }
357    Ok(())
358}
359
360/// Cat with options (numbering, show-ends, show-tabs, show-nonprinting, squeeze)
361pub fn cat_with_options(
362    data: &[u8],
363    config: &CatConfig,
364    line_num: &mut u64,
365    out: &mut impl Write,
366) -> io::Result<()> {
367    if data.is_empty() {
368        return Ok(());
369    }
370
371    // Fast path: show-all without numbering or squeeze
372    if config.show_nonprinting && !config.number && !config.number_nonblank && !config.squeeze_blank
373    {
374        return cat_show_all_fast(data, config.show_tabs, config.show_ends, out);
375    }
376
377    // Pre-allocate output buffer (worst case: every byte expands to 4 chars for M-^X)
378    // In practice, most files are mostly printable, so 1.1x is a good estimate
379    let estimated = data.len() + data.len() / 10 + 1024;
380    let mut buf = Vec::with_capacity(estimated.min(16 * 1024 * 1024));
381
382    let mut prev_blank = false;
383    let mut pos = 0;
384    let mut itoa_buf = itoa::Buffer::new();
385
386    while pos < data.len() {
387        // Find end of this line
388        let line_end = memchr::memchr(b'\n', &data[pos..])
389            .map(|p| pos + p + 1)
390            .unwrap_or(data.len());
391
392        let line = &data[pos..line_end];
393        let is_blank = line == b"\n" || line.is_empty();
394
395        // Squeeze blank lines
396        if config.squeeze_blank && is_blank && prev_blank {
397            pos = line_end;
398            continue;
399        }
400        prev_blank = is_blank;
401
402        // Line numbering - use itoa for fast integer formatting
403        if config.number_nonblank {
404            if !is_blank {
405                let s = itoa_buf.format(*line_num);
406                // Right-align in 6-char field
407                let pad = if s.len() < 6 { 6 - s.len() } else { 0 };
408                buf.extend(std::iter::repeat_n(b' ', pad));
409                buf.extend_from_slice(s.as_bytes());
410                buf.push(b'\t');
411                *line_num += 1;
412            }
413        } else if config.number {
414            let s = itoa_buf.format(*line_num);
415            let pad = if s.len() < 6 { 6 - s.len() } else { 0 };
416            buf.extend(std::iter::repeat_n(b' ', pad));
417            buf.extend_from_slice(s.as_bytes());
418            buf.push(b'\t');
419            *line_num += 1;
420        }
421
422        // Process line content
423        if config.show_nonprinting || config.show_tabs {
424            let content_end = if line.last() == Some(&b'\n') {
425                line.len() - 1
426            } else {
427                line.len()
428            };
429
430            for &b in &line[..content_end] {
431                if config.show_nonprinting {
432                    write_nonprinting(b, config.show_tabs, &mut buf);
433                } else if config.show_tabs && b == b'\t' {
434                    buf.extend_from_slice(b"^I");
435                } else {
436                    buf.push(b);
437                }
438            }
439
440            if config.show_ends && line.last() == Some(&b'\n') {
441                buf.push(b'$');
442            }
443            if line.last() == Some(&b'\n') {
444                buf.push(b'\n');
445            }
446        } else {
447            // No character transformation needed
448            if config.show_ends {
449                let has_newline = line.last() == Some(&b'\n');
450                let content_end = if has_newline {
451                    line.len() - 1
452                } else {
453                    line.len()
454                };
455                // GNU cat -E: CR immediately before LF is shown as ^M
456                if has_newline && content_end > 0 && line[content_end - 1] == b'\r' {
457                    buf.extend_from_slice(&line[..content_end - 1]);
458                    buf.extend_from_slice(b"^M$\n");
459                } else {
460                    buf.extend_from_slice(&line[..content_end]);
461                    if has_newline {
462                        buf.push(b'$');
463                        buf.push(b'\n');
464                    }
465                }
466            } else {
467                buf.extend_from_slice(line);
468            }
469        }
470
471        // Flush buffer periodically to avoid excessive memory use
472        if buf.len() >= 8 * 1024 * 1024 {
473            out.write_all(&buf)?;
474            buf.clear();
475        }
476
477        pos = line_end;
478    }
479
480    if !buf.is_empty() {
481        out.write_all(&buf)?;
482    }
483
484    Ok(())
485}
486
487/// Process a single file for cat
488pub fn cat_file(
489    filename: &str,
490    config: &CatConfig,
491    line_num: &mut u64,
492    out: &mut impl Write,
493    tool_name: &str,
494) -> io::Result<bool> {
495    if filename == "-" {
496        if config.is_plain() {
497            match cat_plain_stdin(out) {
498                Ok(()) => return Ok(true),
499                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
500                    std::process::exit(0);
501                }
502                Err(e) => {
503                    eprintln!(
504                        "{}: standard input: {}",
505                        tool_name,
506                        crate::common::io_error_msg(&e)
507                    );
508                    return Ok(false);
509                }
510            }
511        }
512        match read_stdin() {
513            Ok(data) => {
514                cat_with_options(&data, config, line_num, out)?;
515                Ok(true)
516            }
517            Err(e) => {
518                eprintln!(
519                    "{}: standard input: {}",
520                    tool_name,
521                    crate::common::io_error_msg(&e)
522                );
523                Ok(false)
524            }
525        }
526    } else {
527        let path = Path::new(filename);
528
529        // Check if it's a directory
530        match std::fs::metadata(path) {
531            Ok(meta) if meta.is_dir() => {
532                eprintln!("{}: {}: Is a directory", tool_name, filename);
533                return Ok(false);
534            }
535            _ => {}
536        }
537
538        if config.is_plain() {
539            match cat_plain_file(path, out) {
540                Ok(true) => return Ok(true),
541                Ok(false) => {} // fall through
542                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
543                    std::process::exit(0);
544                }
545                Err(e) => {
546                    eprintln!(
547                        "{}: {}: {}",
548                        tool_name,
549                        filename,
550                        crate::common::io_error_msg(&e)
551                    );
552                    return Ok(false);
553                }
554            }
555        }
556
557        match read_file(path) {
558            Ok(data) => {
559                cat_with_options(&data, config, line_num, out)?;
560                Ok(true)
561            }
562            Err(e) => {
563                eprintln!(
564                    "{}: {}: {}",
565                    tool_name,
566                    filename,
567                    crate::common::io_error_msg(&e)
568                );
569                Ok(false)
570            }
571        }
572    }
573}