Skip to main content

coreutils_rs/cat/
core.rs

1use std::io::{self, Read, Write};
2use std::path::Path;
3
4use crate::common::io::{read_file, read_stdin};
5
6/// Configuration for cat
7#[derive(Clone, Debug, Default)]
8pub struct CatConfig {
9    pub number: bool,
10    pub number_nonblank: bool,
11    pub show_ends: bool,
12    pub show_tabs: bool,
13    pub show_nonprinting: bool,
14    pub squeeze_blank: bool,
15}
16
17impl CatConfig {
18    /// Returns true if no special processing is needed (plain cat)
19    pub fn is_plain(&self) -> bool {
20        !self.number
21            && !self.number_nonblank
22            && !self.show_ends
23            && !self.show_tabs
24            && !self.show_nonprinting
25            && !self.squeeze_blank
26    }
27}
28
29/// Use splice for zero-copy file→stdout on Linux (file → pipe)
30#[cfg(target_os = "linux")]
31pub fn splice_file_to_stdout(path: &Path) -> io::Result<bool> {
32    use std::os::unix::fs::OpenOptionsExt;
33    use std::os::unix::io::AsRawFd;
34
35    // Check if stdout is a pipe (splice only works with pipes)
36    let stdout = io::stdout();
37    let out_fd = stdout.as_raw_fd();
38    let mut stat: libc::stat = unsafe { std::mem::zeroed() };
39    if unsafe { libc::fstat(out_fd, &mut stat) } != 0 {
40        return Ok(false);
41    }
42    let stdout_is_pipe = (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO;
43
44    let file = std::fs::OpenOptions::new()
45        .read(true)
46        .custom_flags(libc::O_NOATIME)
47        .open(path)
48        .or_else(|_| std::fs::File::open(path))?;
49
50    let in_fd = file.as_raw_fd();
51    let metadata = file.metadata()?;
52    let file_size = metadata.len() as usize;
53
54    if file_size == 0 {
55        return Ok(true);
56    }
57
58    if stdout_is_pipe {
59        // splice: zero-copy file→pipe
60        let mut remaining = file_size;
61        while remaining > 0 {
62            let chunk = remaining.min(1024 * 1024 * 1024);
63            let ret = unsafe {
64                libc::splice(
65                    in_fd,
66                    std::ptr::null_mut(),
67                    out_fd,
68                    std::ptr::null_mut(),
69                    chunk,
70                    libc::SPLICE_F_MOVE,
71                )
72            };
73            if ret > 0 {
74                remaining -= ret as usize;
75            } else if ret == 0 {
76                break;
77            } else {
78                let err = io::Error::last_os_error();
79                if err.kind() == io::ErrorKind::Interrupted {
80                    continue;
81                }
82                // splice not supported — fall through to sendfile
83                return sendfile_to_stdout(in_fd, file_size, out_fd);
84            }
85        }
86        Ok(true)
87    } else {
88        // sendfile: zero-copy file→socket/file
89        sendfile_to_stdout(in_fd, file_size, out_fd)
90    }
91}
92
93#[cfg(target_os = "linux")]
94fn sendfile_to_stdout(in_fd: i32, file_size: usize, out_fd: i32) -> io::Result<bool> {
95    let mut offset: libc::off_t = 0;
96    let mut remaining = file_size;
97
98    while remaining > 0 {
99        let chunk = remaining.min(0x7ffff000);
100        let ret = unsafe { libc::sendfile(out_fd, in_fd, &mut offset, chunk) };
101        if ret > 0 {
102            remaining -= ret as usize;
103        } else if ret == 0 {
104            break;
105        } else {
106            let err = io::Error::last_os_error();
107            if err.kind() == io::ErrorKind::Interrupted {
108                continue;
109            }
110            return Err(err);
111        }
112    }
113
114    Ok(true)
115}
116
117/// Plain cat for a single file — tries splice/sendfile, then falls back to mmap+write
118pub fn cat_plain_file(path: &Path, out: &mut impl Write) -> io::Result<bool> {
119    // Try zero-copy first on Linux
120    #[cfg(target_os = "linux")]
121    {
122        match splice_file_to_stdout(path) {
123            Ok(true) => return Ok(true),
124            Ok(false) => {}
125            Err(_) => {} // fall through
126        }
127    }
128
129    // Fallback: mmap + write
130    let data = read_file(path)?;
131    if !data.is_empty() {
132        out.write_all(&data)?;
133    }
134    Ok(true)
135}
136
137/// Plain cat for stdin — try splice on Linux, otherwise bulk read+write
138pub fn cat_plain_stdin(out: &mut impl Write) -> io::Result<()> {
139    #[cfg(target_os = "linux")]
140    {
141        // Try splice stdin→stdout if both are pipes
142        let stdin_fd = 0i32;
143        let mut stat: libc::stat = unsafe { std::mem::zeroed() };
144        if unsafe { libc::fstat(1, &mut stat) } == 0
145            && (stat.st_mode & libc::S_IFMT) == libc::S_IFIFO
146        {
147            // stdout is a pipe, try splice from stdin
148            loop {
149                let ret = unsafe {
150                    libc::splice(
151                        stdin_fd,
152                        std::ptr::null_mut(),
153                        1,
154                        std::ptr::null_mut(),
155                        1024 * 1024 * 1024,
156                        libc::SPLICE_F_MOVE,
157                    )
158                };
159                if ret > 0 {
160                    continue;
161                } else if ret == 0 {
162                    return Ok(());
163                } else {
164                    let err = io::Error::last_os_error();
165                    if err.kind() == io::ErrorKind::Interrupted {
166                        continue;
167                    }
168                    // splice not supported, fall through to read+write
169                    break;
170                }
171            }
172        }
173    }
174
175    // Fallback: read+write loop
176    let stdin = io::stdin();
177    let mut reader = stdin.lock();
178    let mut buf = [0u8; 131072]; // 128KB buffer
179    loop {
180        let n = match reader.read(&mut buf) {
181            Ok(0) => break,
182            Ok(n) => n,
183            Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
184            Err(e) => return Err(e),
185        };
186        out.write_all(&buf[..n])?;
187    }
188    Ok(())
189}
190
191/// Build the 256-byte lookup table for non-printing character display.
192/// Returns (table, needs_expansion) where needs_expansion[b] is true if
193/// the byte maps to more than one output byte.
194fn _build_nonprinting_table(show_tabs: bool) -> ([u8; 256], [bool; 256]) {
195    let mut table = [0u8; 256];
196    let mut multi = [false; 256];
197
198    for i in 0..256u16 {
199        let b = i as u8;
200        match b {
201            b'\n' => {
202                table[i as usize] = b'\n';
203            }
204            b'\t' => {
205                if show_tabs {
206                    table[i as usize] = b'I';
207                    multi[i as usize] = true;
208                } else {
209                    table[i as usize] = b'\t';
210                }
211            }
212            0..=8 | 10..=31 => {
213                // Control chars: ^@ through ^_
214                table[i as usize] = b + 64;
215                multi[i as usize] = true;
216            }
217            32..=126 => {
218                table[i as usize] = b;
219            }
220            127 => {
221                // DEL: ^?
222                table[i as usize] = b'?';
223                multi[i as usize] = true;
224            }
225            128..=159 => {
226                // M-^@ through M-^_
227                table[i as usize] = b - 128 + 64;
228                multi[i as usize] = true;
229            }
230            160..=254 => {
231                // M-space through M-~
232                table[i as usize] = b - 128;
233                multi[i as usize] = true;
234            }
235            255 => {
236                // M-^?
237                table[i as usize] = b'?';
238                multi[i as usize] = true;
239            }
240        }
241    }
242
243    (table, multi)
244}
245
246/// Write a non-printing byte in cat -v notation
247#[inline]
248fn write_nonprinting(b: u8, show_tabs: bool, out: &mut Vec<u8>) {
249    match b {
250        b'\t' if !show_tabs => out.push(b'\t'),
251        b'\n' => out.push(b'\n'),
252        0..=8 | 10..=31 => {
253            out.push(b'^');
254            out.push(b + 64);
255        }
256        9 => {
257            // show_tabs must be true here
258            out.push(b'^');
259            out.push(b'I');
260        }
261        32..=126 => out.push(b),
262        127 => {
263            out.push(b'^');
264            out.push(b'?');
265        }
266        128..=159 => {
267            out.push(b'M');
268            out.push(b'-');
269            out.push(b'^');
270            out.push(b - 128 + 64);
271        }
272        160..=254 => {
273            out.push(b'M');
274            out.push(b'-');
275            out.push(b - 128);
276        }
277        255 => {
278            out.push(b'M');
279            out.push(b'-');
280            out.push(b'^');
281            out.push(b'?');
282        }
283    }
284}
285
286/// Cat with options (numbering, show-ends, show-tabs, show-nonprinting, squeeze)
287pub fn cat_with_options(
288    data: &[u8],
289    config: &CatConfig,
290    line_num: &mut u64,
291    out: &mut impl Write,
292) -> io::Result<()> {
293    if data.is_empty() {
294        return Ok(());
295    }
296
297    // Pre-allocate output buffer (worst case: every byte expands to 4 chars for M-^X)
298    // In practice, most files are mostly printable, so 1.1x is a good estimate
299    let estimated = data.len() + data.len() / 10 + 1024;
300    let mut buf = Vec::with_capacity(estimated.min(16 * 1024 * 1024));
301
302    let mut prev_blank = false;
303    let mut pos = 0;
304
305    while pos < data.len() {
306        // Find end of this line
307        let line_end = memchr::memchr(b'\n', &data[pos..])
308            .map(|p| pos + p + 1)
309            .unwrap_or(data.len());
310
311        let line = &data[pos..line_end];
312        let is_blank = line == b"\n" || line.is_empty();
313
314        // Squeeze blank lines
315        if config.squeeze_blank && is_blank && prev_blank {
316            pos = line_end;
317            continue;
318        }
319        prev_blank = is_blank;
320
321        // Line numbering
322        if config.number_nonblank {
323            if !is_blank {
324                let _ = write!(buf, "{:6}\t", line_num);
325                *line_num += 1;
326            }
327        } else if config.number {
328            let _ = write!(buf, "{:6}\t", line_num);
329            *line_num += 1;
330        }
331
332        // Process line content
333        if config.show_nonprinting || config.show_tabs {
334            let content_end = if line.last() == Some(&b'\n') {
335                line.len() - 1
336            } else {
337                line.len()
338            };
339
340            for &b in &line[..content_end] {
341                if config.show_nonprinting {
342                    write_nonprinting(b, config.show_tabs, &mut buf);
343                } else if config.show_tabs && b == b'\t' {
344                    buf.extend_from_slice(b"^I");
345                } else {
346                    buf.push(b);
347                }
348            }
349
350            if config.show_ends && line.last() == Some(&b'\n') {
351                buf.push(b'$');
352            }
353            if line.last() == Some(&b'\n') {
354                buf.push(b'\n');
355            }
356        } else {
357            // No character transformation needed
358            if config.show_ends {
359                let content_end = if line.last() == Some(&b'\n') {
360                    line.len() - 1
361                } else {
362                    line.len()
363                };
364                buf.extend_from_slice(&line[..content_end]);
365                if line.last() == Some(&b'\n') {
366                    buf.push(b'$');
367                    buf.push(b'\n');
368                }
369            } else {
370                buf.extend_from_slice(line);
371            }
372        }
373
374        // Flush buffer periodically to avoid excessive memory use
375        if buf.len() >= 8 * 1024 * 1024 {
376            out.write_all(&buf)?;
377            buf.clear();
378        }
379
380        pos = line_end;
381    }
382
383    if !buf.is_empty() {
384        out.write_all(&buf)?;
385    }
386
387    Ok(())
388}
389
390/// Process a single file for cat
391pub fn cat_file(
392    filename: &str,
393    config: &CatConfig,
394    line_num: &mut u64,
395    out: &mut impl Write,
396    tool_name: &str,
397) -> io::Result<bool> {
398    if filename == "-" {
399        if config.is_plain() {
400            match cat_plain_stdin(out) {
401                Ok(()) => return Ok(true),
402                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
403                    std::process::exit(0);
404                }
405                Err(e) => {
406                    eprintln!(
407                        "{}: standard input: {}",
408                        tool_name,
409                        crate::common::io_error_msg(&e)
410                    );
411                    return Ok(false);
412                }
413            }
414        }
415        match read_stdin() {
416            Ok(data) => {
417                cat_with_options(&data, config, line_num, out)?;
418                Ok(true)
419            }
420            Err(e) => {
421                eprintln!(
422                    "{}: standard input: {}",
423                    tool_name,
424                    crate::common::io_error_msg(&e)
425                );
426                Ok(false)
427            }
428        }
429    } else {
430        let path = Path::new(filename);
431
432        // Check if it's a directory
433        match std::fs::metadata(path) {
434            Ok(meta) if meta.is_dir() => {
435                eprintln!("{}: {}: Is a directory", tool_name, filename);
436                return Ok(false);
437            }
438            _ => {}
439        }
440
441        if config.is_plain() {
442            match cat_plain_file(path, out) {
443                Ok(true) => return Ok(true),
444                Ok(false) => {} // fall through
445                Err(e) if e.kind() == io::ErrorKind::BrokenPipe => {
446                    std::process::exit(0);
447                }
448                Err(e) => {
449                    eprintln!(
450                        "{}: {}: {}",
451                        tool_name,
452                        filename,
453                        crate::common::io_error_msg(&e)
454                    );
455                    return Ok(false);
456                }
457            }
458        }
459
460        match read_file(path) {
461            Ok(data) => {
462                cat_with_options(&data, config, line_num, out)?;
463                Ok(true)
464            }
465            Err(e) => {
466                eprintln!(
467                    "{}: {}: {}",
468                    tool_name,
469                    filename,
470                    crate::common::io_error_msg(&e)
471                );
472                Ok(false)
473            }
474        }
475    }
476}