Skip to main content

browser_cat/
reader.rs

1/// Multi-source streaming reader with format sniffing.
2///
3/// Ported from bcat's lib/bcat/reader.rb by Ryan Tomayko.
4use std::io::{self, Read};
5use std::process::{Command, Stdio};
6
7const CHUNK: usize = 16 * 1024; // 16 KiB read buffer
8
9// ── Input format ─────────────────────────────────────────────────────────────
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Format {
13    Html,
14    Text,
15}
16
17// ── Source ────────────────────────────────────────────────────────────────────
18
19/// A single input source (stdin, a file path, or a command to run).
20#[derive(Debug, Clone)]
21pub enum Source {
22    Stdin,
23    File(String),
24    Command(Vec<String>),
25}
26
27// ── Reader ────────────────────────────────────────────────────────────────────
28
29/// ARGF-style multi-source streaming reader.
30///
31/// Reads from stdin, one or more files, or a child process's stdout.
32/// Uses raw `read()` calls (not line-buffered) to enable true streaming.
33pub struct Reader {
34    sources: Vec<Source>,
35    /// Explicit format override; if `None`, sniff from the first chunk.
36    forced_format: Option<Format>,
37    /// The format detected (or overridden) after the first chunk.
38    detected_format: Option<Format>,
39}
40
41impl Reader {
42    pub fn new(sources: Vec<Source>, forced_format: Option<Format>) -> Self {
43        Self {
44            sources,
45            forced_format,
46            detected_format: None,
47        }
48    }
49
50    /// Convenience: build sources from CLI args (empty args → stdin).
51    /// In command mode (`-c`) treat args as a shell command.
52    pub fn from_args(args: &[String], command_mode: bool) -> Self {
53        let sources = if command_mode {
54            vec![Source::Command(args.to_vec())]
55        } else if args.is_empty() {
56            vec![Source::Stdin]
57        } else {
58            args.iter()
59                .map(|a| {
60                    if a == "-" {
61                        Source::Stdin
62                    } else {
63                        Source::File(a.clone())
64                    }
65                })
66                .collect()
67        };
68        Self::new(sources, None)
69    }
70
71    /// The detected (or forced) input format. Only valid after at least one
72    /// call to [`Reader::read_chunks`] or [`Reader::sniff`].
73    pub fn format(&self) -> Option<Format> {
74        self.forced_format.or(self.detected_format)
75    }
76
77    /// Iterate over all chunks from all sources, calling `f` for each.
78    /// The first chunk triggers format sniffing if no format was forced.
79    pub fn read_chunks<F>(&mut self, mut f: F) -> io::Result<()>
80    where
81        F: FnMut(&[u8], Format),
82    {
83        let sources = std::mem::take(&mut self.sources);
84        for source in &sources {
85            self.read_source(source, &mut f)?;
86        }
87        self.sources = sources;
88        Ok(())
89    }
90
91    fn read_source<F>(&mut self, source: &Source, f: &mut F) -> io::Result<()>
92    where
93        F: FnMut(&[u8], Format),
94    {
95        match source {
96            Source::Stdin => self.read_reader(&mut io::stdin(), f),
97            Source::File(path) => {
98                let mut file = std::fs::File::open(path)?;
99                self.read_reader(&mut file, f)
100            }
101            Source::Command(args) => {
102                let (prog, rest) = args.split_first().expect("empty command");
103                let mut child = Command::new(prog)
104                    .args(rest)
105                    .stdout(Stdio::piped())
106                    .spawn()?;
107                let mut stdout = child.stdout.take().expect("no stdout");
108                self.read_reader(&mut stdout, f)?;
109                child.wait()?;
110                Ok(())
111            }
112        }
113    }
114
115    fn read_reader<R: Read, F>(&mut self, reader: &mut R, f: &mut F) -> io::Result<()>
116    where
117        F: FnMut(&[u8], Format),
118    {
119        let mut buf = vec![0u8; CHUNK];
120        loop {
121            let n = reader.read(&mut buf)?;
122            if n == 0 {
123                break;
124            }
125            let chunk = &buf[..n];
126            let fmt = self.ensure_format(chunk);
127            f(chunk, fmt);
128        }
129        Ok(())
130    }
131
132    /// Determine and cache the format, peeking at `chunk` if needed.
133    fn ensure_format(&mut self, chunk: &[u8]) -> Format {
134        if let Some(fmt) = self.forced_format.or(self.detected_format) {
135            return fmt;
136        }
137        let detected = sniff(chunk);
138        self.detected_format = Some(detected);
139        detected
140    }
141}
142
143/// Peek at the first chunk to determine whether it looks like HTML.
144/// Returns `Format::Html` if the first non-whitespace byte is `<`.
145pub fn sniff(chunk: &[u8]) -> Format {
146    let first_nonws = chunk.iter().find(|&&b| !b.is_ascii_whitespace());
147    if first_nonws == Some(&b'<') {
148        Format::Html
149    } else {
150        Format::Text
151    }
152}
153
154// ── TeeFilter ─────────────────────────────────────────────────────────────────
155
156/// Wraps a chunk-producing closure and also writes each chunk to stdout.
157/// Used in `btee` mode.
158pub struct TeeFilter<W: io::Write> {
159    out: W,
160}
161
162impl<W: io::Write> TeeFilter<W> {
163    pub fn new(out: W) -> Self {
164        Self { out }
165    }
166
167    /// Write `chunk` to the tee output and return it unchanged.
168    pub fn filter<'a>(&mut self, chunk: &'a [u8]) -> &'a [u8] {
169        let _ = self.out.write_all(chunk); // best-effort
170        chunk
171    }
172}
173
174// ── Tests ─────────────────────────────────────────────────────────────────────
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179
180    #[test]
181    fn sniff_html() {
182        assert_eq!(sniff(b"<html>"), Format::Html);
183        assert_eq!(sniff(b"  \n<p>"), Format::Html);
184        assert_eq!(sniff(b"<!DOCTYPE"), Format::Html);
185    }
186
187    #[test]
188    fn sniff_text() {
189        assert_eq!(sniff(b"hello"), Format::Text);
190        assert_eq!(sniff(b"  plain text"), Format::Text);
191        assert_eq!(sniff(b""), Format::Text);
192    }
193
194    #[test]
195    fn reader_stdin_like_cursor() {
196        use std::io::Cursor;
197
198        let data = b"hello world";
199        let mut cursor = Cursor::new(data);
200        let mut chunks: Vec<Vec<u8>> = Vec::new();
201        let mut fmt_seen = None;
202
203        let mut buf = vec![0u8; 16 * 1024];
204        loop {
205            let n = cursor.read(&mut buf).unwrap();
206            if n == 0 {
207                break;
208            }
209            let chunk = buf[..n].to_vec();
210            if fmt_seen.is_none() {
211                fmt_seen = Some(sniff(&chunk));
212            }
213            chunks.push(chunk);
214        }
215
216        assert_eq!(fmt_seen, Some(Format::Text));
217        assert_eq!(chunks.concat(), data);
218    }
219
220    #[test]
221    fn reader_from_args_empty_is_stdin() {
222        let r = Reader::from_args(&[], false);
223        assert!(matches!(r.sources[0], Source::Stdin));
224    }
225
226    #[test]
227    fn reader_from_args_dash_is_stdin() {
228        let r = Reader::from_args(&["-".to_string()], false);
229        assert!(matches!(r.sources[0], Source::Stdin));
230    }
231
232    #[test]
233    fn reader_from_args_command_mode() {
234        let r = Reader::from_args(&["echo".to_string(), "hi".to_string()], true);
235        assert!(matches!(&r.sources[0], Source::Command(args) if args[0] == "echo"));
236    }
237
238    #[test]
239    fn tee_filter_passthrough() {
240        let mut captured = Vec::new();
241        let mut tee = TeeFilter::new(&mut captured);
242        let data = b"test data";
243        let out = tee.filter(data);
244        assert_eq!(out, data);
245        assert_eq!(captured, data);
246    }
247}