use crate::set::LaterOperand;
use anyhow::{Context, Result};
use bstr::io::BufReadExt;
use encoding_rs_io::{DecodeReaderBytes, DecodeReaderBytesBuilder};
use std::{
fs,
fs::File,
io::{self, Read},
ops::FnMut,
path::{Path, PathBuf},
};
fn use_stdin(path: &Path) -> bool {
path.to_string_lossy() == "-"
}
#[must_use]
pub fn first_and_rest(files: &[PathBuf]) -> Option<(Result<Vec<u8>>, Remaining)> {
fn all_of_stdin() -> Result<Vec<u8>> {
let mut buffer = Vec::new();
io::stdin().read_to_end(&mut buffer).context("Can't read file: <stdin>")?;
Ok(decode_if_utf16(buffer))
}
match files {
[] => None,
[first, rest @ ..] => {
let first_operand = if use_stdin(first) {
all_of_stdin()
} else {
fs::read(first)
.with_context(|| format!("Can't read file: {}", first.display()))
.map(decode_if_utf16)
};
let rest = rest.to_vec();
Some((first_operand, Remaining::from(rest)))
}
}
}
fn decode_if_utf16(candidate: Vec<u8>) -> Vec<u8> {
if let Some((enc, _)) = encoding_rs::Encoding::for_bom(&candidate) {
if [encoding_rs::UTF_16LE, encoding_rs::UTF_16BE].contains(&enc) {
let (translated, _had_malformed_sequences) =
enc.decode_without_bom_handling(&candidate);
return translated.into_owned().into_bytes();
}
}
candidate
}
pub struct Remaining {
files: std::vec::IntoIter<PathBuf>,
}
impl From<Vec<PathBuf>> for Remaining {
fn from(files: Vec<PathBuf>) -> Self {
Remaining { files: files.into_iter() }
}
}
impl Iterator for Remaining {
type Item = Result<NextOperand>;
fn next(&mut self) -> Option<Self::Item> {
self.files.next().map(|path| reader_for(&path))
}
}
impl ExactSizeIterator for Remaining {
fn len(&self) -> usize {
self.files.len()
}
}
pub struct NextOperand {
path_display: String,
reader: Box<dyn io::BufRead>,
}
#[allow(trivial_casts)]
fn reader_for(path: &Path) -> Result<NextOperand> {
fn decoder<R: Read>(f: R) -> DecodeReaderBytes<R, Vec<u8>> {
DecodeReaderBytesBuilder::new()
.bom_sniffing(true)
.strip_bom(true)
.utf8_passthru(true)
.build(f)
}
let (path_display, reader) = if use_stdin(path) {
let path_display = "<stdin>".to_string();
let reader = decoder(io::stdin().lock());
(path_display, Box::new(io::BufReader::new(reader)) as Box<dyn io::BufRead>)
} else {
let path_display = format!("{}", path.display());
let reader =
decoder(File::open(path).with_context(|| format!("Can't open file: {path_display}"))?);
(path_display, Box::new(io::BufReader::new(reader)) as Box<dyn io::BufRead>)
};
Ok(NextOperand { path_display, reader })
}
impl LaterOperand for NextOperand {
fn for_byte_line(self, mut for_each_line: impl FnMut(&[u8])) -> Result<()> {
let NextOperand { mut reader, path_display } = self;
reader
.for_byte_line(|line| {
for_each_line(line);
Ok(true)
})
.with_context(|| format!("Error reading file: {path_display}"))?;
Ok(())
}
}
#[allow(clippy::pedantic)]
#[cfg(test)]
mod test {
use super::*;
const UTF8_BOM: &str = "\u{FEFF}";
fn abominate(expected: &str) -> String {
UTF8_BOM.to_string() + expected
}
fn to_utf_16le(source: &str) -> Vec<u8> {
let mut result = b"\xff\xfe".to_vec();
for b in source.as_bytes().iter() {
result.push(*b);
result.push(0);
}
result
}
fn to_utf_16be(source: &str) -> Vec<u8> {
let mut result = b"\xfe\xff".to_vec();
for b in source.as_bytes().iter() {
result.push(0);
result.push(*b);
}
result
}
#[test]
fn utf_16le_is_translated_to_utf8() {
let expected = "The cute red crab\n jumps over the lazy blue gopher\n";
assert_eq!(decode_if_utf16(to_utf_16le(expected)), abominate(expected).as_bytes());
}
#[test]
fn utf_16be_is_translated_to_utf8() {
let expected = "The cute red crab\n jumps over the lazy blue gopher\n";
assert_eq!(decode_if_utf16(to_utf_16be(expected)), abominate(expected).as_bytes());
}
}