1use crate::set::LaterOperand;
8use anyhow::{Context, Result};
9use bstr::io::BufReadExt;
10use encoding_rs_io::{DecodeReaderBytes, DecodeReaderBytesBuilder};
11use std::{
12 fs,
13 fs::File,
14 io::{self, Read},
15 ops::FnMut,
16 path::{Path, PathBuf},
17};
18
19fn use_stdin(path: &Path) -> bool {
21 path.to_string_lossy() == "-"
22}
23#[must_use]
26pub fn first_and_rest(files: &[PathBuf]) -> Option<(Result<Vec<u8>>, Remaining)> {
27 fn all_of_stdin() -> Result<Vec<u8>> {
28 let mut buffer = Vec::new();
29 io::stdin().read_to_end(&mut buffer).context("Can't read file: <stdin>")?;
30 Ok(decode_if_utf16(buffer))
31 }
32
33 match files {
34 [] => None,
35 [first, rest @ ..] => {
36 let first_operand = if use_stdin(first) {
37 all_of_stdin()
38 } else {
39 fs::read(first)
40 .with_context(|| format!("Can't read file: {}", first.display()))
41 .map(decode_if_utf16)
42 };
43 let rest = rest.to_vec();
44 Some((first_operand, Remaining::from(rest)))
45 }
46 }
47}
48
49fn decode_if_utf16(candidate: Vec<u8>) -> Vec<u8> {
52 if let Some((enc, _)) = encoding_rs::Encoding::for_bom(&candidate) {
59 if [encoding_rs::UTF_16LE, encoding_rs::UTF_16BE].contains(&enc) {
60 let (translated, _had_malformed_sequences) =
61 enc.decode_without_bom_handling(&candidate);
62 return translated.into_owned().into_bytes();
63 }
64 }
65 candidate
66}
67
68pub struct Remaining {
72 files: std::vec::IntoIter<PathBuf>,
73}
74
75impl From<Vec<PathBuf>> for Remaining {
76 fn from(files: Vec<PathBuf>) -> Self {
77 Remaining { files: files.into_iter() }
78 }
79}
80
81impl Iterator for Remaining {
82 type Item = Result<NextOperand>;
83 fn next(&mut self) -> Option<Self::Item> {
84 self.files.next().map(|path| reader_for(&path))
85 }
86}
87
88impl ExactSizeIterator for Remaining {
89 fn len(&self) -> usize {
90 self.files.len()
91 }
92}
93
94pub struct NextOperand {
98 path_display: String,
99 reader: Box<dyn io::BufRead>,
100}
101
102#[allow(trivial_casts)]
107fn reader_for(path: &Path) -> Result<NextOperand> {
108 fn decoder<R: Read>(f: R) -> DecodeReaderBytes<R, Vec<u8>> {
109 DecodeReaderBytesBuilder::new()
110 .bom_sniffing(true)
111 .strip_bom(true)
112 .utf8_passthru(true)
113 .build(f)
114 }
115 let (path_display, reader) = if use_stdin(path) {
116 let path_display = "<stdin>".to_string();
117 let reader = decoder(io::stdin().lock());
118 (path_display, Box::new(io::BufReader::new(reader)) as Box<dyn io::BufRead>)
119 } else {
120 let path_display = format!("{}", path.display());
121 let reader =
122 decoder(File::open(path).with_context(|| format!("Can't open file: {path_display}"))?);
123 (path_display, Box::new(io::BufReader::new(reader)) as Box<dyn io::BufRead>)
124 };
125 Ok(NextOperand { path_display, reader })
126}
127impl LaterOperand for NextOperand {
128 fn for_byte_line(self, mut for_each_line: impl FnMut(&[u8])) -> Result<()> {
130 let NextOperand { mut reader, path_display } = self;
131 reader
132 .for_byte_line(|line| {
133 for_each_line(line);
134 Ok(true)
135 })
136 .with_context(|| format!("Error reading file: {path_display}"))?;
137 Ok(())
138 }
139}
140
141#[allow(clippy::pedantic)]
142#[cfg(test)]
143mod test {
144 use super::*;
145
146 const UTF8_BOM: &str = "\u{FEFF}";
147
148 fn abominate(expected: &str) -> String {
149 UTF8_BOM.to_string() + expected
150 }
151
152 fn to_utf_16le(source: &str) -> Vec<u8> {
153 let mut result = b"\xff\xfe".to_vec();
154 for b in source.as_bytes().iter() {
155 result.push(*b);
156 result.push(0);
157 }
158 result
159 }
160
161 fn to_utf_16be(source: &str) -> Vec<u8> {
162 let mut result = b"\xfe\xff".to_vec();
163 for b in source.as_bytes().iter() {
164 result.push(0);
165 result.push(*b);
166 }
167 result
168 }
169
170 #[test]
171 fn utf_16le_is_translated_to_utf8() {
172 let expected = "The cute red crab\n jumps over the lazy blue gopher\n";
173 assert_eq!(decode_if_utf16(to_utf_16le(expected)), abominate(expected).as_bytes());
174 }
175
176 #[test]
177 fn utf_16be_is_translated_to_utf8() {
178 let expected = "The cute red crab\n jumps over the lazy blue gopher\n";
179 assert_eq!(decode_if_utf16(to_utf_16be(expected)), abominate(expected).as_bytes());
180 }
181}