Skip to main content

coreutils_rs/cat/
core.rs

1use std::io::{self, Read, Write};
2use std::path::Path;
3
4use crate::common::io::{read_file, read_stdin};
5
6/// Configuration for cat
7#[derive(Clone, Debug, Default)]
8pub struct CatConfig {
9    pub number: bool,
10    pub number_nonblank: bool,
11    pub show_ends: bool,
12    pub show_tabs: bool,
13    pub show_nonprinting: bool,
14    pub squeeze_blank: bool,
15}
16
17impl CatConfig {
18    /// Returns true if no special processing is needed (plain cat)
19    pub fn is_plain(&self) -> bool {
20        !self.number
21            && !self.number_nonblank
22            && !self.show_ends
23            && !self.show_tabs
24            && !self.show_nonprinting
25            && !self.squeeze_blank
26    }
27}
28
29/// Use splice for zero-copy file→stdout on Linux (file → pipe)
30#[cfg(target_os = "linux")]
31pub fn splice_file_to_stdout(path: &Path) -> io::Result<bool> {
32    use std::os::unix::fs::OpenOptionsExt;
33    use std::os::unix::io::AsRawFd;
34
35    // Check if stdout is a pipe (splice only works with pipes)
36    let stdout = io::stdout();
37    let out_fd = stdout.as_raw_fd();
38    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
39    if unsafe { libc::fstat(out_fd, &mut stat) } != 0 {
40        return Ok(false);
41    }
42    let stdout_is_pipe = (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO;
43
44    let file = std::fs::OpenOptions::new()
45        .read(true)
46        .custom_flags(libc::O_NOATIME)
47        .open(path)
48        .or_else(|_| std::fs::File::open(path))?;
49
50    let in_fd = file.as_raw_fd();
51    let metadata = file.metadata()?;
52    let file_size = metadata.len() as usize;
53
54    if file_size == 0 {
55        return Ok(true);
56    }
57
58    if stdout_is_pipe {
59        // splice: zero-copy file→pipe
60        let mut remaining = file_size;
61        while remaining > 0 {
62            let chunk = remaining.min(1024 * 1024 * 1024);
63            let ret = unsafe {
64                libc::splice(
65                    in_fd,
66                    std::ptr::null_mut(),
67                    out_fd,
68                    std::ptr::null_mut(),
69                    chunk,
70                    libc::SPLICE_F_MOVE,
71                )
72            };
73            if ret > 0 {
74                remaining -= ret as usize;
75            } else if ret == 0 {
76                break;
77            } else {
78                let err = io::Error::last_os_error();
79                if err.kind() == io::ErrorKind::Interrupted {
80                    continue;
81                }
82                // splice not supported — fall through to sendfile
83                return sendfile_to_stdout(in_fd, file_size, out_fd);
84            }
85        }
86        Ok(true)
87    } else {
88        // sendfile: zero-copy file→socket/file
89        sendfile_to_stdout(in_fd, file_size, out_fd)
90    }
91}
92
93#[cfg(target_os = "linux")]
94fn sendfile_to_stdout(in_fd: i32, file_size: usize, out_fd: i32) -> io::Result<bool> {
95    let mut offset: libc::off_t = 0;
96    let mut remaining = file_size;
97
98    while remaining > 0 {
99        let chunk = remaining.min(0x7ffff000);
100        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
101        if ret > 0 {
102            remaining -= ret as usize;
103        } else if ret == 0 {
104            break;
105        } else {
106            let err = io::Error::last_os_error();
107            if err.kind() == io::ErrorKind::Interrupted {
108                continue;
109            }
110            return Err(err);
111        }
112    }
113
114    Ok(true)
115}
116
117/// Plain cat for a single file — tries splice/sendfile, then falls back to mmap+write
118pub fn cat_plain_file(path: &Path, out: &mut impl Write) -> io::Result<bool> {
119    // Try zero-copy first on Linux
120    #[cfg(target_os = "linux")]
121    {
122        match splice_file_to_stdout(path) {
123            Ok(true) => return Ok(true),
124            Ok(false) => {}
125            Err(_) => {} // fall through
126        }
127    }
128
129    // Fallback: mmap + write
130    let data = read_file(path)?;
131    if !data.is_empty() {
132        out.write_all(&data)?;
133    }
134    Ok(true)
135}
136
137/// Plain cat for stdin — try splice on Linux, otherwise bulk read+write
138pub fn cat_plain_stdin(out: &mut impl Write) -> io::Result<()> {
139    #[cfg(target_os = "linux")]
140    {
141        // Try splice stdin→stdout if both are pipes
142        let stdin_fd = 0i32;
143        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
144        if unsafe { libc::fstat(1, &mut stat) } == 0
145            && (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO
146        {
147            // stdout is a pipe, try splice from stdin
148            loop {
149                let ret = unsafe {
150                    libc::splice(
151                        stdin_fd,
152                        std::ptr::null_mut(),
153                        1,
154                        std::ptr::null_mut(),
155                        1024 * 1024 * 1024,
156                        libc::SPLICE_F_MOVE,
157                    )
158                };
159                if ret > 0 {
160                    continue;
161                } else if ret == 0 {
162                    return Ok(());
163                } else {
164                    let err = io::Error::last_os_error();
165                    if err.kind() == io::ErrorKind::Interrupted {
166                        continue;
167                    }
168                    // splice not supported, fall through to read+write
169                    break;
170                }
171            }
172        }
173    }
174
175    // Fallback: read+write loop
176    let stdin = io::stdin();
177    let mut reader = stdin.lock();
178    let mut buf = [0u8; 131072]; // 128KB buffer
179    loop {
180        let n = match reader.read(&mut buf) {
181            Ok(0) => break,
182            Ok(n) => n,
183            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
184            Err(e) => return Err(e),
185        };
186        out.write_all(&buf[..n])?;
187    }
188    Ok(())
189}
190
191/// Build the 256-byte lookup table for non-printing character display.
192/// Returns (table, needs_expansion) where needs_expansion[b] is true if
193/// the byte maps to more than one output byte.
194fn _build_nonprinting_table(show_tabs: bool) -> ([u8; 256], [bool; 256]) {
195    let mut table = [0u8; 256];
196    let mut multi = [false; 256];
197
198    for i in 0..256u16 {
199        let b = i as u8;
200        match b {
201            b'\n' => {
202                table[i as usize] = b'\n';
203            }
204            b'\t' => {
205                if show_tabs {
206                    table[i as usize] = b'I';
207                    multi[i as usize] = true;
208                } else {
209                    table[i as usize] = b'\t';
210                }
211            }
212            0..=8 | 10..=31 => {
213                // Control chars: ^@ through ^_
214                table[i as usize] = b + 64;
215                multi[i as usize] = true;
216            }
217            32..=126 => {
218                table[i as usize] = b;
219            }
220            127 => {
221                // DEL: ^?
222                table[i as usize] = b'?';
223                multi[i as usize] = true;
224            }
225            128..=159 => {
226                // M-^@ through M-^_
227                table[i as usize] = b - 128 + 64;
228                multi[i as usize] = true;
229            }
230            160..=254 => {
231                // M-space through M-~
232                table[i as usize] = b - 128;
233                multi[i as usize] = true;
234            }
235            255 => {
236                // M-^?
237                table[i as usize] = b'?';
238                multi[i as usize] = true;
239            }
240        }
241    }
242
243    (table, multi)
244}
245
246/// Write a non-printing byte in cat -v notation
247#[inline]
248fn write_nonprinting(b: u8, show_tabs: bool, out: &mut Vec<u8>) {
249    match b {
250        b'\t' if !show_tabs => out.push(b'\t'),
251        b'\n' => out.push(b'\n'),
252        0..=8 | 10..=31 => {
253            out.push(b'^');
254            out.push(b + 64);
255        }
256        9 => {
257            // show_tabs must be true here
258            out.push(b'^');
259            out.push(b'I');
260        }
261        32..=126 => out.push(b),
262        127 => {
263            out.push(b'^');
264            out.push(b'?');
265        }
266        128..=159 => {
267            out.push(b'M');
268            out.push(b'-');
269            out.push(b'^');
270            out.push(b - 128 + 64);
271        }
272        160..=254 => {
273            out.push(b'M');
274            out.push(b'-');
275            out.push(b - 128);
276        }
277        255 => {
278            out.push(b'M');
279            out.push(b'-');
280            out.push(b'^');
281            out.push(b'?');
282        }
283    }
284}
285
286/// Fast path for cat -A (show-all) without line numbering or squeeze.
287/// Uses an internal buffer with bulk memcpy of printable ASCII runs.
288fn cat_show_all_fast(
289    data: &[u8],
290    show_tabs: bool,
291    show_ends: bool,
292    out: &mut impl Write,
293) -> io::Result<()> {
294    // Internal buffer — flush every 256KB to keep memory bounded
295    const BUF_SIZE: usize = 256 * 1024;
296    // Worst case expansion: every byte → 4 chars (M-^X), so reserve proportionally
297    let cap = data.len().min(BUF_SIZE) + data.len().min(BUF_SIZE) / 2;
298    let mut buf = Vec::with_capacity(cap);
299    let mut pos = 0;
300
301    while pos < data.len() {
302        // Find the next byte that needs transformation (outside 32..=126)
303        let start = pos;
304        while pos < data.len() && data[pos].wrapping_sub(32) <= 94 {
305            pos += 1;
306        }
307        // Bulk copy printable ASCII run via memcpy
308        if pos > start {
309            buf.extend_from_slice(&data[start..pos]);
310        }
311        if pos >= data.len() {
312            break;
313        }
314        // Handle the special byte
315        let b = data[pos];
316        pos += 1;
317        match b {
318            b'\n' => {
319                if show_ends {
320                    buf.extend_from_slice(b"$\n");
321                } else {
322                    buf.push(b'\n');
323                }
324            }
325            b'\t' if show_tabs => buf.extend_from_slice(b"^I"),
326            b'\t' => buf.push(b'\t'),
327            0..=8 | 10..=31 => {
328                buf.push(b'^');
329                buf.push(b + 64);
330            }
331            127 => buf.extend_from_slice(b"^?"),
332            128..=159 => {
333                buf.push(b'M');
334                buf.push(b'-');
335                buf.push(b'^');
336                buf.push(b - 128 + 64);
337            }
338            160..=254 => {
339                buf.push(b'M');
340                buf.push(b'-');
341                buf.push(b - 128);
342            }
343            255 => buf.extend_from_slice(b"M-^?"),
344            _ => unreachable!(),
345        }
346
347        // Flush when buffer is large enough
348        if buf.len() >= BUF_SIZE {
349            out.write_all(&buf)?;
350            buf.clear();
351        }
352    }
353
354    if !buf.is_empty() {
355        out.write_all(&buf)?;
356    }
357    Ok(())
358}
359
360/// Cat with options (numbering, show-ends, show-tabs, show-nonprinting, squeeze)
361pub fn cat_with_options(
362    data: &[u8],
363    config: &CatConfig,
364    line_num: &mut u64,
365    out: &mut impl Write,
366) -> io::Result<()> {
367    if data.is_empty() {
368        return Ok(());
369    }
370
371    // Fast path: show-all without numbering or squeeze
372    if config.show_nonprinting && !config.number && !config.number_nonblank && !config.squeeze_blank
373    {
374        return cat_show_all_fast(data, config.show_tabs, config.show_ends, out);
375    }
376
377    // Pre-allocate output buffer (worst case: every byte expands to 4 chars for M-^X)
378    // In practice, most files are mostly printable, so 1.1x is a good estimate
379    let estimated = data.len() + data.len() / 10 + 1024;
380    let mut buf = Vec::with_capacity(estimated.min(16 * 1024 * 1024));
381
382    let mut prev_blank = false;
383    let mut pos = 0;
384    let mut itoa_buf = itoa::Buffer::new();
385
386    while pos < data.len() {
387        // Find end of this line
388        let line_end = memchr::memchr(b'\n', &data[pos..])
389            .map(|p| pos + p + 1)
390            .unwrap_or(data.len());
391
392        let line = &data[pos..line_end];
393        let is_blank = line == b"\n" || line.is_empty();
394
395        // Squeeze blank lines
396        if config.squeeze_blank && is_blank && prev_blank {
397            pos = line_end;
398            continue;
399        }
400        prev_blank = is_blank;
401
402        // Line numbering - use itoa for fast integer formatting
403        if config.number_nonblank {
404            if !is_blank {
405                let s = itoa_buf.format(*line_num);
406                // Right-align in 6-char field
407                let pad = if s.len() < 6 { 6 - s.len() } else { 0 };
408                buf.extend(std::iter::repeat_n(b' ', pad));
409                buf.extend_from_slice(s.as_bytes());
410                buf.push(b'\t');
411                *line_num += 1;
412            }
413        } else if config.number {
414            let s = itoa_buf.format(*line_num);
415            let pad = if s.len() < 6 { 6 - s.len() } else { 0 };
416            buf.extend(std::iter::repeat_n(b' ', pad));
417            buf.extend_from_slice(s.as_bytes());
418            buf.push(b'\t');
419            *line_num += 1;
420        }
421
422        // Process line content
423        if config.show_nonprinting || config.show_tabs {
424            let content_end = if line.last() == Some(&b'\n') {
425                line.len() - 1
426            } else {
427                line.len()
428            };
429
430            for &b in &line[..content_end] {
431                if config.show_nonprinting {
432                    write_nonprinting(b, config.show_tabs, &mut buf);
433                } else if config.show_tabs && b == b'\t' {
434                    buf.extend_from_slice(b"^I");
435                } else {
436                    buf.push(b);
437                }
438            }
439
440            if config.show_ends && line.last() == Some(&b'\n') {
441                buf.push(b'$');
442            }
443            if line.last() == Some(&b'\n') {
444                buf.push(b'\n');
445            }
446        } else {
447            // No character transformation needed
448            if config.show_ends {
449                let content_end = if line.last() == Some(&b'\n') {
450                    line.len() - 1
451                } else {
452                    line.len()
453                };
454                buf.extend_from_slice(&line[..content_end]);
455                if line.last() == Some(&b'\n') {
456                    buf.push(b'$');
457                    buf.push(b'\n');
458                }
459            } else {
460                buf.extend_from_slice(line);
461            }
462        }
463
464        // Flush buffer periodically to avoid excessive memory use
465        if buf.len() >= 8 * 1024 * 1024 {
466            out.write_all(&buf)?;
467            buf.clear();
468        }
469
470        pos = line_end;
471    }
472
473    if !buf.is_empty() {
474        out.write_all(&buf)?;
475    }
476
477    Ok(())
478}
479
480/// Process a single file for cat
481pub fn cat_file(
482    filename: &str,
483    config: &CatConfig,
484    line_num: &mut u64,
485    out: &mut impl Write,
486    tool_name: &str,
487) -> io::Result<bool> {
488    if filename == "-" {
489        if config.is_plain() {
490            match cat_plain_stdin(out) {
491                Ok(()) => return Ok(true),
492                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
493                    std::process::exit(0);
494                }
495                Err(e) => {
496                    eprintln!(
497                        "{}: standard input: {}",
498                        tool_name,
499                        crate::common::io_error_msg(&e)
500                    );
501                    return Ok(false);
502                }
503            }
504        }
505        match read_stdin() {
506            Ok(data) => {
507                cat_with_options(&data, config, line_num, out)?;
508                Ok(true)
509            }
510            Err(e) => {
511                eprintln!(
512                    "{}: standard input: {}",
513                    tool_name,
514                    crate::common::io_error_msg(&e)
515                );
516                Ok(false)
517            }
518        }
519    } else {
520        let path = Path::new(filename);
521
522        // Check if it's a directory
523        match std::fs::metadata(path) {
524            Ok(meta) if meta.is_dir() => {
525                eprintln!("{}: {}: Is a directory", tool_name, filename);
526                return Ok(false);
527            }
528            _ => {}
529        }
530
531        if config.is_plain() {
532            match cat_plain_file(path, out) {
533                Ok(true) => return Ok(true),
534                Ok(false) => {} // fall through
535                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
536                    std::process::exit(0);
537                }
538                Err(e) => {
539                    eprintln!(
540                        "{}: {}: {}",
541                        tool_name,
542                        filename,
543                        crate::common::io_error_msg(&e)
544                    );
545                    return Ok(false);
546                }
547            }
548        }
549
550        match read_file(path) {
551            Ok(data) => {
552                cat_with_options(&data, config, line_num, out)?;
553                Ok(true)
554            }
555            Err(e) => {
556                eprintln!(
557                    "{}: {}: {}",
558                    tool_name,
559                    filename,
560                    crate::common::io_error_msg(&e)
561                );
562                Ok(false)
563            }
564        }
565    }
566}