1use std::io::{self, Read};
5use std::process::{Command, Stdio};
6
7const CHUNK: usize = 16 * 1024; #[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Format {
13 Html,
14 Text,
15}
16
17#[derive(Debug, Clone)]
21pub enum Source {
22 Stdin,
23 File(String),
24 Command(Vec<String>),
25}
26
27pub struct Reader {
34 sources: Vec<Source>,
35 forced_format: Option<Format>,
37 detected_format: Option<Format>,
39}
40
41impl Reader {
42 pub fn new(sources: Vec<Source>, forced_format: Option<Format>) -> Self {
43 Self {
44 sources,
45 forced_format,
46 detected_format: None,
47 }
48 }
49
50 pub fn from_args(args: &[String], command_mode: bool) -> Self {
53 let sources = if command_mode {
54 vec![Source::Command(args.to_vec())]
55 } else if args.is_empty() {
56 vec![Source::Stdin]
57 } else {
58 args.iter()
59 .map(|a| {
60 if a == "-" {
61 Source::Stdin
62 } else {
63 Source::File(a.clone())
64 }
65 })
66 .collect()
67 };
68 Self::new(sources, None)
69 }
70
71 pub fn format(&self) -> Option<Format> {
74 self.forced_format.or(self.detected_format)
75 }
76
77 pub fn read_chunks<F>(&mut self, mut f: F) -> io::Result<()>
80 where
81 F: FnMut(&[u8], Format),
82 {
83 let sources = std::mem::take(&mut self.sources);
84 for source in &sources {
85 self.read_source(source, &mut f)?;
86 }
87 self.sources = sources;
88 Ok(())
89 }
90
91 fn read_source<F>(&mut self, source: &Source, f: &mut F) -> io::Result<()>
92 where
93 F: FnMut(&[u8], Format),
94 {
95 match source {
96 Source::Stdin => self.read_reader(&mut io::stdin(), f),
97 Source::File(path) => {
98 let mut file = std::fs::File::open(path)?;
99 self.read_reader(&mut file, f)
100 }
101 Source::Command(args) => {
102 let (prog, rest) = args.split_first().expect("empty command");
103 let mut child = Command::new(prog)
104 .args(rest)
105 .stdout(Stdio::piped())
106 .spawn()?;
107 let mut stdout = child.stdout.take().expect("no stdout");
108 self.read_reader(&mut stdout, f)?;
109 child.wait()?;
110 Ok(())
111 }
112 }
113 }
114
115 fn read_reader<R: Read, F>(&mut self, reader: &mut R, f: &mut F) -> io::Result<()>
116 where
117 F: FnMut(&[u8], Format),
118 {
119 let mut buf = vec![0u8; CHUNK];
120 loop {
121 let n = reader.read(&mut buf)?;
122 if n == 0 {
123 break;
124 }
125 let chunk = &buf[..n];
126 let fmt = self.ensure_format(chunk);
127 f(chunk, fmt);
128 }
129 Ok(())
130 }
131
132 fn ensure_format(&mut self, chunk: &[u8]) -> Format {
134 if let Some(fmt) = self.forced_format.or(self.detected_format) {
135 return fmt;
136 }
137 let detected = sniff(chunk);
138 self.detected_format = Some(detected);
139 detected
140 }
141}
142
143pub fn sniff(chunk: &[u8]) -> Format {
146 let first_nonws = chunk.iter().find(|&&b| !b.is_ascii_whitespace());
147 if first_nonws == Some(&b'<') {
148 Format::Html
149 } else {
150 Format::Text
151 }
152}
153
154pub struct TeeFilter<W: io::Write> {
159 out: W,
160}
161
162impl<W: io::Write> TeeFilter<W> {
163 pub fn new(out: W) -> Self {
164 Self { out }
165 }
166
167 pub fn filter<'a>(&mut self, chunk: &'a [u8]) -> &'a [u8] {
169 let _ = self.out.write_all(chunk); chunk
171 }
172}
173
174#[cfg(test)]
177mod tests {
178 use super::*;
179
180 #[test]
181 fn sniff_html() {
182 assert_eq!(sniff(b"<html>"), Format::Html);
183 assert_eq!(sniff(b" \n<p>"), Format::Html);
184 assert_eq!(sniff(b"<!DOCTYPE"), Format::Html);
185 }
186
187 #[test]
188 fn sniff_text() {
189 assert_eq!(sniff(b"hello"), Format::Text);
190 assert_eq!(sniff(b" plain text"), Format::Text);
191 assert_eq!(sniff(b""), Format::Text);
192 }
193
194 #[test]
195 fn reader_stdin_like_cursor() {
196 use std::io::Cursor;
197
198 let data = b"hello world";
199 let mut cursor = Cursor::new(data);
200 let mut chunks: Vec<Vec<u8>> = Vec::new();
201 let mut fmt_seen = None;
202
203 let mut buf = vec![0u8; 16 * 1024];
204 loop {
205 let n = cursor.read(&mut buf).unwrap();
206 if n == 0 {
207 break;
208 }
209 let chunk = buf[..n].to_vec();
210 if fmt_seen.is_none() {
211 fmt_seen = Some(sniff(&chunk));
212 }
213 chunks.push(chunk);
214 }
215
216 assert_eq!(fmt_seen, Some(Format::Text));
217 assert_eq!(chunks.concat(), data);
218 }
219
220 #[test]
221 fn reader_from_args_empty_is_stdin() {
222 let r = Reader::from_args(&[], false);
223 assert!(matches!(r.sources[0], Source::Stdin));
224 }
225
226 #[test]
227 fn reader_from_args_dash_is_stdin() {
228 let r = Reader::from_args(&["-".to_string()], false);
229 assert!(matches!(r.sources[0], Source::Stdin));
230 }
231
232 #[test]
233 fn reader_from_args_command_mode() {
234 let r = Reader::from_args(&["echo".to_string(), "hi".to_string()], true);
235 assert!(matches!(&r.sources[0], Source::Command(args) if args[0] == "echo"));
236 }
237
238 #[test]
239 fn tee_filter_passthrough() {
240 let mut captured = Vec::new();
241 let mut tee = TeeFilter::new(&mut captured);
242 let data = b"test data";
243 let out = tee.filter(data);
244 assert_eq!(out, data);
245 assert_eq!(captured, data);
246 }
247}