use crate::comp::{CompMaker, Compare};
use crate::prelude::*;
use anyhow;
use flate2::read::MultiGzDecoder;
use fs_err as fs;
use regex::Regex;
use std::cmp;
use std::error;
use std::fmt::Write as _;
use std::io;
use std::ops::{Deref, DerefMut};
use std::str;
use std::sync::LazyLock;
fn do_init() -> Result<()> {
crate::agg::AggMaker::init()?;
CompMaker::init()?;
crate::matcher::MatchMaker::init()?;
crate::textgen::GenMaker::init()?;
crate::trans::TransMaker::init()?;
Ok(())
}
static INIT: LazyLock<core::result::Result<(), String>> = LazyLock::new(|| {
do_init().map_err(|e| format!("{e:?}"))?;
Ok(())
});
pub fn init() -> Result<()> {
match INIT.as_ref() {
Ok(()) => Ok(()),
Err(e) => err!(e),
}
}
#[macro_export]
macro_rules! err {
($($x:tt)*) => {Err(anyhow!($($x)*))}
}
pub use err;
#[derive(Debug)]
#[non_exhaustive]
pub enum CdxError {
Error(String),
NeedLookup,
Silent,
NoError,
}
pub type Error = anyhow::Error;
pub type Result<T> = core::result::Result<T, Error>;
impl error::Error for CdxError {}
pub fn cdx_err<T>(err: CdxError) -> Result<T> {
Err(Error::new(err))
}
#[must_use]
pub fn suppress(err: &Error) -> bool {
if let Some(ioerr) = err.downcast_ref::<io::Error>() {
ioerr.kind() == io::ErrorKind::BrokenPipe
} else {
matches!(err.downcast_ref::<CdxError>(), Some(CdxError::NoError))
}
}
#[must_use]
pub fn silent(err: &Error) -> bool {
matches!(err.downcast_ref::<CdxError>(), Some(CdxError::Silent))
}
impl fmt::Display for CdxError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Error(s) => write!(f, "{s}")?,
Self::NeedLookup => {
write!(f, "ColumnSet.lookup() must be called before ColumnSet.select()")?;
}
Self::Silent => write!(f, "Silent")?,
Self::NoError => write!(f, "Not an error.")?,
}
Ok(())
}
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum Tri {
Yes,
No,
Maybe,
}
impl Tri {
pub fn new(x: &str) -> Result<Self> {
if x.eq_ignore_ascii_case("yes")
|| x.eq_ignore_ascii_case("true")
|| x.eq_ignore_ascii_case("1")
|| x.eq_ignore_ascii_case("on")
{
Ok(Self::Yes)
} else if x.eq_ignore_ascii_case("no")
|| x.eq_ignore_ascii_case("false")
|| x.eq_ignore_ascii_case("0")
|| x.eq_ignore_ascii_case("off")
{
Ok(Self::No)
} else if x.eq_ignore_ascii_case("maybe") || x.eq_ignore_ascii_case("sometimes") {
Ok(Self::Maybe)
} else {
err!("Tri value must be yes, no or maybe '{}'", x)
}
}
#[must_use]
pub const fn yes_if(x: bool) -> Self {
if x { Self::Yes } else { Self::Maybe }
}
#[must_use]
pub const fn no_if(x: bool) -> Self {
if x { Self::No } else { Self::Maybe }
}
}
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum HeadMode {
Yes,
No,
Maybe,
Skip,
Cdx,
}
impl HeadMode {
#[must_use]
pub fn has_cdx(self) -> bool {
self == Self::Cdx || self == Self::Maybe
}
}
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum QuoteMode {
Plain,
Quote,
Backslash,
}
#[must_use]
pub const fn auto_escape(ch: char) -> u8 {
if ch == 't' {
b'\t'
} else if ch == 's' {
b' '
} else if ch == 'r' {
b'\r'
} else if ch == 'n' {
b'\n'
}
else {
ch as u8
}
}
#[must_use]
pub const fn auto_escape_char(ch: char) -> char {
if ch == 't' {
'\t'
} else if ch == 's' {
' '
} else if ch == 'r' {
'\r'
} else if ch == 'n' {
'\n'
}
else {
ch
}
}
const TEXT_HELP_STRING: &str = r#"
# Input Text Files #
All tools have a --text-in option, that takes a string of up to five characters
which defines how text files are parsed.
The first character is the header mode, which can be
m - maybe (default) : Use the CDX header if it exists, otherwise don't.
y - yes : A CDX header is required
n - no : A CDX header is forbidden
s - skip : A CDX header, if present, is ignored.
The second character is the column delimiter, auto-escaped
default is 't' for 'tab'
The third character is the quoting mode
p - plain (default) : no quoting is done. It is impossible to have a data field
containing the column delimiter.
q - quote : The column delimiter is ignored when between double quotes : '"'
Within a quoted sting, '""' is used to mean '"'
b - backslash : All backslashes are combined with the following character in the usual way.
The fourth character is the replacement character, used when the quote mode is 'plain'.
Any occurrences of the column delimiter are replaced with this character.
default is space.
The fifth character is the line delimiter, default is newline (\n)
Omitted trailing characters accept the default, so 'y' is the same as 'ytpsn'
# Output Text Files #
All tools have a --text-out option that controls how text files are written.
Its parameter is treated exactly like --text-in, except that the default values
are those passed to --text-in.
If --text-in specified "maybe" as the header mode, then the output default is
'yes' if an input header was present and 'no' otherwise.
# Auto Escaped #
Auto escaping make it easier to specify special characters,
mostly by implying a leading backslash.
Auto escaping maps letters to other characters :
t is tab
s is space
r is return (/r)
n is newline (/n)
other lowercase ascii is reserved
otherwise the character is unchanged.
"#;
#[derive(Debug, Copy, Clone)]
pub struct TextFileMode {
pub head_mode: HeadMode,
pub col_mode: QuoteMode,
pub delim: u8,
pub line_break: u8,
pub repl: u8,
}
impl Default for TextFileMode {
fn default() -> Self {
Self {
head_mode: HeadMode::Maybe,
col_mode: QuoteMode::Plain,
delim: b'\t',
line_break: b'\n',
repl: b' ',
}
}
}
impl TextFileMode {
pub fn new(spec: &str) -> Result<Self> {
Self::new_with(spec, &Self::default())
}
pub fn new_with(spec: &str, dflt: &Self) -> Result<Self> {
let mut spec = spec;
let mut head_mode = dflt.head_mode;
let mut col_mode = dflt.col_mode;
let mut delim = dflt.delim;
let mut line_break = dflt.line_break;
let mut repl = dflt.repl;
if !spec.is_empty() {
let ch = spec.take_first();
if ch == 'y' {
head_mode = HeadMode::Yes;
} else if ch == 'n' {
head_mode = HeadMode::No;
} else if ch == 's' {
head_mode = HeadMode::Skip;
} else if ch == 'm' {
head_mode = HeadMode::Maybe;
} else if ch == 'x' {
head_mode = HeadMode::Cdx;
} else {
return err!(
"First char of text-fmt spec must be y (yes), n (no), m (maybe) s (skip) or x (cdx) not '{ch}'"
);
}
}
if !spec.is_empty() {
let ch = spec.take_first();
delim = auto_escape(ch);
}
if !spec.is_empty() {
let ch = spec.take_first();
if ch == 'p' {
col_mode = QuoteMode::Plain;
} else if ch == 'q' {
col_mode = QuoteMode::Quote;
} else if ch == 'b' {
col_mode = QuoteMode::Backslash;
} else {
return err!(
"Fourth char of text-fmt spec must be p (plain), q (quote) or b (backslash) not '{ch}'"
);
}
}
if !spec.is_empty() {
let ch = spec.take_first();
repl = auto_escape(ch);
}
if !spec.is_empty() {
let ch = spec.take_first();
line_break = auto_escape(ch);
}
Ok(Self { head_mode, col_mode, delim, line_break, repl })
}
pub fn text_help() {
println!("{TEXT_HELP_STRING}");
}
pub fn write(&self, w: &mut dyn Write, buf: &[u8]) -> Result<()> {
match self.col_mode {
QuoteMode::Plain => write_plain(w, buf, self.delim, self.line_break, self.repl),
QuoteMode::Backslash => write_backslash(w, buf, self.delim, self.line_break),
QuoteMode::Quote => write_quotes(w, buf, self.delim, self.line_break),
}
}
pub fn split(&self, line: &mut TextLine) {
match self.col_mode {
QuoteMode::Plain => split_plain(&mut line.parts, &line.line, self.delim),
QuoteMode::Backslash => {
line.orig.clear();
line.orig.extend_from_slice(&line.line);
split_backslash(&mut line.parts, &line.orig, &mut line.line, self.delim);
}
QuoteMode::Quote => {
line.orig.clear();
line.orig.extend_from_slice(&line.line);
split_quotes(&mut line.parts, &line.orig, &mut line.line, self.delim);
}
}
}
pub fn split_head(&self, line: &mut StringLine) {
line.split(self.delim);
if line.line.starts_with(" CDX") {
line.parts.remove(0);
}
}
pub fn ensure_eof(&self, line: &mut Vec<u8>) -> Result<bool> {
if line[line.len() - 1] != self.line_break {
line.push(self.line_break);
}
Ok(false)
}
pub fn read_string<T: BufRead>(&self, f: &mut T, line: &mut String) -> Result<bool> {
let mut x = Vec::new();
if self.read_line(f, &mut x)? {
return Ok(true);
}
*line = String::from_utf8(x)?;
Ok(false)
}
pub fn read_header<T: BufRead>(&self, f: &mut T, line: &mut String) -> Result<bool> {
line.clear();
let start = f.fill_buf()?;
if start.is_empty() {
return Ok(true);
}
match self.head_mode {
HeadMode::Yes => self.read_string(f, line),
HeadMode::Cdx => self.read_string(f, line),
HeadMode::No => Ok(false),
HeadMode::Maybe => {
if start.starts_with(b" CDX") {
self.read_string(f, line)
} else {
Ok(false)
}
}
HeadMode::Skip => {
if start.starts_with(b" CDX") {
self.read_string(f, line)?;
line.clear();
}
let start = f.fill_buf()?;
Ok(start.is_empty())
}
}
}
pub fn read_line<T: BufRead>(&self, f: &mut T, line: &mut Vec<u8>) -> Result<bool> {
line.clear();
let sz = f.read_until(self.line_break, line)?;
if sz == 0 {
return Ok(true);
}
if self.col_mode != QuoteMode::Quote {
return self.ensure_eof(line);
}
let mut within = false;
let mut skip = 0;
loop {
for ch in line.iter().skip(skip) {
if *ch == b'"' {
within = !within;
}
}
skip = line.len();
if !within {
return self.ensure_eof(line);
}
let sz = f.read_until(self.line_break, line)?;
if sz == 0 {
return self.ensure_eof(line);
}
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct FakeSlice {
begin: u32,
end: u32,
}
impl FakeSlice {
pub fn new(spec: &str) -> Result<Self> {
if let Some((a, b)) = spec.split_once('-') {
let begin = a.to_usize_whole(spec.as_bytes(), "range")? as u32;
let end = b.to_usize_whole(spec.as_bytes(), "range")? as u32;
if begin == 0 || end == 0 {
err!("Invalid range, both number must be greater than zero")
} else if begin > end {
err!("Invalid range, begin is greater than end")
} else {
Ok(Self { begin: begin - 1, end })
}
} else {
let num = spec.to_usize_whole(spec.as_bytes(), "position")? as u32;
if num == 0 {
err!("Invalid offset, must be greater than zero")
} else {
Ok(Self { begin: num - 1, end: num })
}
}
}
#[must_use]
pub fn get<'a>(&self, data: &'a [u8]) -> &'a [u8] {
&data[self.begin as usize..self.end as usize]
}
#[must_use]
pub fn get_safe<'a>(&self, data: &'a [u8]) -> &'a [u8] {
&data[cmp::min(self.begin as usize, data.len())..cmp::min(self.end as usize, data.len())]
}
#[must_use]
pub const fn len(&self) -> usize {
(self.end - self.begin) as usize
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.begin == self.end
}
}
fn write_plain(w: &mut dyn Write, buf: &[u8], tab: u8, eol: u8, repl: u8) -> Result<()> {
for ch in buf {
if *ch == tab || *ch == eol {
w.write_all(&[repl])?;
} else {
w.write_all(&[*ch])?;
}
}
Ok(())
}
fn write_backslash(w: &mut dyn Write, buf: &[u8], tab: u8, eol: u8) -> Result<()> {
for ch in buf {
if *ch == tab || *ch == eol || *ch == b'\\' {
w.write_all(b"\\")?;
w.write_all(&[enslash(*ch)])?;
} else {
w.write_all(&[*ch])?;
}
}
Ok(())
}
fn write_quotes(w: &mut dyn Write, buf: &[u8], tab: u8, eol: u8) -> Result<()> {
let mut made_quote = false;
for ch in buf {
if *ch == b'"' {
w.write_all(b"\"\"")?;
continue;
}
if !made_quote && (*ch == tab || *ch == eol) {
made_quote = true;
w.write_all(b"\"")?;
}
w.write_all(&[*ch])?;
}
Ok(())
}
pub fn split_plain(parts: &mut Vec<FakeSlice>, line: &[u8], delim: u8) {
parts.clear();
let mut begin: u32 = 0;
let mut end: u32 = 0;
#[allow(clippy::explicit_counter_loop)] for ch in line {
if *ch == delim {
parts.push(FakeSlice { begin, end });
begin = end + 1;
}
end += 1;
}
if begin != end {
let mut f = FakeSlice { begin, end };
if line[(end - 1) as usize] == b'\n' {
f.end -= 1;
}
parts.push(f);
}
}
#[must_use]
pub const fn unslash(ch: u8) -> u8 {
match ch {
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
_ => ch,
}
}
#[must_use]
pub const fn enslash(ch: u8) -> u8 {
match ch {
b'\n' => b'n',
b'\r' => b'r',
b'\t' => b't',
_ => ch,
}
}
pub fn split_backslash(parts: &mut Vec<FakeSlice>, line: &[u8], tmp: &mut Vec<u8>, delim: u8) {
parts.clear();
tmp.clear();
let mut line = line;
while !line.is_empty() && line[line.len() - 1] == b'\n' {
line = &line[..line.len() - 1];
}
let mut begin: u32 = 0;
let mut end: u32 = 0;
let mut last_was_slash = false;
for xch in line {
let ch = *xch;
if last_was_slash {
tmp.push(unslash(ch));
end += 1;
last_was_slash = false;
continue;
} else if ch == b'\\' {
last_was_slash = true;
continue;
}
tmp.push(ch);
if ch == delim && !last_was_slash {
parts.push(FakeSlice { begin, end });
begin = end + 1;
}
end += 1;
last_was_slash = false;
}
if begin != end {
parts.push(FakeSlice { begin, end });
}
}
pub fn split_quotes(parts: &mut Vec<FakeSlice>, line: &[u8], tmp: &mut Vec<u8>, delim: u8) {
parts.clear();
tmp.clear();
let mut line = line;
while !line.is_empty() && line[line.len() - 1] == b'\n' {
line = &line[..line.len() - 1];
}
let mut begin: u32 = 0;
let mut end: u32 = 0;
let mut last_was_quote = false;
let mut in_quote = false;
for ch in line {
if in_quote {
if last_was_quote {
if *ch == b'"' {
tmp.push(*ch);
end += 1;
continue;
}
in_quote = false;
} else if *ch == b'"' {
last_was_quote = true;
continue;
}
} else if *ch == b'"' {
in_quote = true;
last_was_quote = true;
continue;
}
tmp.push(*ch);
if *ch == delim {
parts.push(FakeSlice { begin, end });
begin = end + 1;
}
end += 1;
}
if begin != end {
parts.push(FakeSlice { begin, end });
}
}
#[derive(Debug, Clone, Default)]
pub struct TextLine {
pub(crate) line: Vec<u8>,
pub(crate) parts: Vec<FakeSlice>,
pub(crate) orig: Vec<u8>,
}
#[derive(Debug, Clone, Default)]
pub struct StringLine {
pub line: String,
pub parts: Vec<FakeSlice>,
pub orig: String,
}
impl fmt::Display for TextLine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for i in self {
write!(f, "{} ", str::from_utf8(i).unwrap())?;
}
Ok(())
}
}
impl fmt::Display for StringLine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for i in self {
write!(f, "{i} ")?;
}
Ok(())
}
}
impl std::ops::Index<usize> for TextLine {
type Output = [u8];
fn index(&self, index: usize) -> &Self::Output {
self.get(index)
}
}
impl std::ops::Index<usize> for StringLine {
type Output = str;
fn index(&self, index: usize) -> &Self::Output {
self.get(index)
}
}
pub fn write_all_nl(w: &mut impl Write, buf: &[u8]) -> Result<()> {
if !buf.is_empty() {
w.write_all(buf)?;
if buf[buf.len() - 1] != b'\n' {
w.write_all(b"\n")?;
}
}
Ok(())
}
impl TextLine {
pub const fn parts(&mut self) -> &mut Vec<FakeSlice> {
&mut self.parts
}
#[must_use]
pub fn line(&self) -> &[u8] {
if self.orig.is_empty() { &self.line } else { &self.orig }
}
#[must_use]
pub fn line_nl(&self) -> &[u8] {
&self.line[..self.line.len() - 1]
}
pub const fn raw(&mut self) -> &mut Vec<u8> {
&mut self.line
}
pub fn assign(&mut self, x: &Self) {
self.line.clear();
self.line.extend_from_slice(&x.line[..]);
self.parts.clear();
self.parts.extend_from_slice(&x.parts[..]);
}
#[must_use]
pub const fn new() -> Self {
Self { line: Vec::new(), parts: Vec::new(), orig: Vec::new() }
}
#[must_use]
pub const fn iter(&self) -> TextLineIter<'_> {
TextLineIter { line: self, index: 0 }
}
pub fn clear(&mut self) {
self.parts.clear();
self.line.clear();
}
#[must_use]
pub const fn len(&self) -> usize {
self.parts.len()
}
#[must_use]
pub const fn strlen(&self) -> usize {
self.line.len()
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.parts.is_empty()
}
#[must_use]
pub fn get(&self, index: usize) -> &[u8] {
if index >= self.parts.len() {
&self.line[0..0]
} else {
&self.line[self.parts[index].begin as usize..self.parts[index].end as usize]
}
}
pub fn read<T: BufRead>(&mut self, f: &mut T) -> Result<bool> {
self.clear();
let sz = f.read_until(b'\n', &mut self.line)?;
if sz == 0 {
Ok(true)
} else {
if self.line.last() != Some(&b'\n') {
self.line.push(b'\n');
}
Ok(false)
}
}
pub fn split(&mut self, text: &TextFileMode) {
split_plain(&mut self.parts, &self.line, text.delim);
}
#[must_use]
pub fn vec(&self) -> Vec<&[u8]> {
self.iter().collect()
}
}
impl StringLine {
#[must_use]
pub const fn new() -> Self {
Self { line: String::new(), parts: Vec::new(), orig: String::new() }
}
#[must_use]
pub const fn iter(&self) -> StringLineIter<'_> {
StringLineIter { line: self, index: 0 }
}
pub fn fake(&mut self, num_cols: usize, delim: u8) {
self.line = " CDX".to_string();
for i in 1..=num_cols {
self.line += str::from_utf8(&[delim]).unwrap();
self.line += "c";
self.line += &i.to_string();
}
self.line += "\n";
}
fn clear(&mut self) {
self.parts.clear();
self.line.clear();
}
#[must_use]
pub const fn len(&self) -> usize {
self.parts.len()
}
#[must_use]
pub const fn strlen(&self) -> usize {
self.line.len()
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.parts.is_empty()
}
#[must_use]
pub fn get(&self, index: usize) -> &str {
if index >= self.parts.len() {
&self.line[0..0]
} else {
&self.line[self.parts[index].begin as usize..self.parts[index].end as usize]
}
}
pub fn read<T: BufRead>(&mut self, f: &mut T) -> Result<bool> {
self.clear();
let sz = f.read_line(&mut self.line)?;
if sz == 0 {
Ok(true)
} else {
if self.line.as_bytes().last() != Some(&b'\n') {
self.line.push('\n');
}
Ok(false)
}
}
pub fn split(&mut self, delim: u8) {
split_plain(&mut self.parts, self.line.as_bytes(), delim);
}
#[must_use]
pub fn vec(&self) -> Vec<&str> {
self.iter().collect()
}
#[must_use]
pub fn line(&self) -> &str {
if self.orig.is_empty() { &self.line } else { &self.orig }
}
}
impl<'a> IntoIterator for &'a TextLine {
type Item = &'a [u8];
type IntoIter = TextLineIter<'a>;
fn into_iter(self) -> Self::IntoIter {
TextLineIter { line: self, index: 0 }
}
}
impl<'a> IntoIterator for &'a StringLine {
type Item = &'a str;
type IntoIter = StringLineIter<'a>;
fn into_iter(self) -> Self::IntoIter {
StringLineIter { line: self, index: 0 }
}
}
#[derive(Debug, Clone)]
pub struct TextLineIter<'a> {
line: &'a TextLine,
index: usize,
}
#[derive(Debug, Clone)]
pub struct StringLineIter<'a> {
line: &'a StringLine,
index: usize,
}
impl<'a> Iterator for TextLineIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.line.len() {
None
} else {
self.index += 1;
Some(&self.line[self.index - 1])
}
}
}
impl<'a> Iterator for StringLineIter<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.line.parts.len() {
None
} else {
self.index += 1;
Some(&self.line[self.index - 1])
}
}
}
#[cfg(feature = "s3")]
struct S3Reader {
rt: tokio::runtime::Runtime,
f: aws_sdk_s3::operation::get_object::GetObjectOutput,
left: Option<bytes::Bytes>,
}
#[cfg(feature = "s3")]
impl S3Reader {
fn new(bucket: &str, key: &str) -> Result<Self> {
let rt = tokio::runtime::Builder::new_current_thread().enable_all().build()?;
let version = aws_config::BehaviorVersion::latest();
let shared_config = rt.block_on(aws_config::load_defaults(version));
let client = aws_sdk_s3::Client::new(&shared_config);
let obj = rt.block_on(client.get_object().bucket(bucket).key(key).send())?;
Ok(Self {
rt,
f: obj,
left: None,
})
}
fn new_path(spec: &str) -> Result<Self> {
if let Some(name) = spec.strip_prefix("s3://") {
if let Some((a, b)) = name.split_once('/') {
Self::new(a, b)
} else {
err!("Not an S3 file spec '{}'", spec)
}
} else {
err!("Not an S3 file '{}'", spec)
}
}
}
#[cfg(feature = "s3")]
impl Read for S3Reader {
fn read(&mut self, buf: &mut [u8]) -> std::result::Result<usize, io::Error> {
if let Some(bytes) = &self.left {
return if bytes.len() > buf.len() {
buf.clone_from_slice(&bytes[..buf.len()]);
self.left = Some(bytes.slice(buf.len()..));
Ok(buf.len())
} else {
let len = bytes.len();
buf[0..len].clone_from_slice(bytes);
self.left = None;
Ok(len)
};
}
let bytes_res = self.rt.block_on(self.f.body.try_next());
if bytes_res.is_err() {
return Err(io::Error::other("oh no"));
}
self.left = bytes_res.unwrap();
if let Some(bytes) = &self.left {
if bytes.len() > buf.len() {
buf.clone_from_slice(&bytes[..buf.len()]);
self.left = Some(bytes.slice(buf.len()..));
Ok(buf.len())
} else {
let len = bytes.len();
buf[0..bytes.len()].clone_from_slice(bytes);
self.left = None;
Ok(len)
}
} else {
Ok(0)
}
}
}
pub struct Infile(
pub io::BufReader<Box<dyn Read>>,
pub String,
);
impl Infile {
#[must_use]
pub fn new(f: io::BufReader<Box<dyn Read>>, n: &str) -> Self {
Self(f, n.to_string())
}
}
impl Default for Infile {
fn default() -> Self {
Self::new(io::BufReader::new(Box::new(io::empty())), "")
}
}
impl fmt::Debug for Infile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Infile : {}", self.1)
}
}
impl Deref for Infile {
type Target = io::BufReader<Box<dyn Read>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Infile {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl AsRef<io::BufReader<Box<dyn Read>>> for Infile {
fn as_ref(&self) -> &io::BufReader<Box<dyn Read>> {
&self.0
}
}
impl AsMut<io::BufReader<Box<dyn Read>>> for Infile {
fn as_mut(&mut self) -> &mut io::BufReader<Box<dyn Read>> {
&mut self.0
}
}
pub struct Outfile(pub io::BufWriter<Box<dyn Write>>, pub String);
impl Outfile {
#[must_use]
pub fn new(f: io::BufWriter<Box<dyn Write>>, n: &str) -> Self {
Self(f, n.to_string())
}
}
impl Default for Outfile {
fn default() -> Self {
Self::new(io::BufWriter::new(Box::new(io::sink())), "")
}
}
impl fmt::Debug for Outfile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Outfile : {}", self.1)
}
}
impl Deref for Outfile {
type Target = io::BufWriter<Box<dyn Write>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Outfile {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl AsRef<io::BufWriter<Box<dyn Write>>> for Outfile {
fn as_ref(&self) -> &io::BufWriter<Box<dyn Write>> {
&self.0
}
}
impl AsMut<io::BufWriter<Box<dyn Write>>> for Outfile {
fn as_mut(&mut self) -> &mut io::BufWriter<Box<dyn Write>> {
&mut self.0
}
}
pub fn get_writer(name: &str) -> Result<Outfile> {
let inner: Box<dyn Write> = {
if name == "-" {
Box::new(io::stdout())
} else if name == "--" {
Box::new(io::stderr())
} else {
Box::new(fs::OpenOptions::new().write(true).create(true).open(name)?)
}
};
Ok(Outfile::new(io::BufWriter::new(inner), name))
}
fn unescape_vec(data: &[u8]) -> Vec<u8> {
let mut ret: Vec<u8> = Vec::with_capacity(data.len());
let mut last_was_slash = false;
for x in data {
if last_was_slash {
ret.push(match x {
b'n' => b'\n',
b't' => b'\t',
b's' => b' ',
ch => *ch,
});
last_was_slash = false;
} else if x == &b'\\' {
last_was_slash = true;
} else {
ret.push(*x);
}
}
if last_was_slash {
ret.push(b'\\');
}
ret
}
#[cfg(feature = "s3")]
fn new_s3_reader(name: &str) -> Result<Box<dyn Read>> {
Ok(Box::new(S3Reader::new_path(name)?))
}
#[cfg(not(feature = "s3"))]
fn new_s3_reader(name: &str) -> Result<Box<dyn Read>> {
err!("S3 feature not enabled, cannot read '{}'", name)
}
pub fn get_reader(name: &str) -> Result<Infile> {
let inner: Box<dyn Read> = {
if name == "-" {
Box::new(io::stdin())
} else if name.starts_with("s3://") {
new_s3_reader(name)?
} else if let Some(stripped) = name.strip_prefix("<<") {
Box::new(io::Cursor::new(unescape_vec(stripped.as_bytes())))
} else {
Box::new(fs::File::open(name)?)
}
};
let mut outer = io::BufReader::new(inner);
let start = outer.fill_buf()?;
if start.starts_with(&[0x1f, 0x8b, 0x08]) {
outer = io::BufReader::new(Box::new(MultiGzDecoder::new(outer)));
}
let start = outer.fill_buf()?;
if start.starts_with(&[0xef, 0xbb, 0xbf]) {
outer.consume(3);
}
Ok(Infile::new(outer, name))
}
#[derive(Debug, Default)]
pub struct InfileContext {
header: StringLine,
is_done: bool,
is_empty: bool,
has_header: bool,
text: TextFileMode,
}
#[must_use]
pub fn make_header(line: &[u8]) -> StringLine {
let mut s = StringLine::new();
if is_cdx(line) {
s.line = String::from_utf8_lossy(&line[5..]).to_string();
} else {
s.line = String::new();
for x in 1..=line.split(|ch| *ch == b'\t').count() {
write!(s.line, "c{x}\t").unwrap();
}
s.line.pop();
}
s.split(b'\t');
s
}
impl InfileContext {
#[allow(clippy::trivially_copy_pass_by_ref)]
const fn new(text_in: &TextFileMode) -> Self {
Self {
header: StringLine::new(),
is_done: true,
is_empty: true,
has_header: false,
text: *text_in,
}
}
fn read_header(&mut self, file: &mut impl BufRead, line: &mut TextLine) -> Result<()> {
self.is_empty = self.text.read_header(file, &mut self.header.line)?;
if self.is_empty {
return Ok(());
}
self.has_header = !self.header.line.is_empty();
if self.has_header {
self.text.split_head(&mut self.header);
}
self.is_done = self.text.read_line(file, &mut line.line)?;
if self.is_done {
return Ok(());
}
self.text.split(line);
if !self.has_header {
let mut head_str = String::new();
for i in 1..=line.len() {
head_str += "c";
head_str += &i.to_string();
head_str += "\t";
}
head_str.pop();
head_str += "\n";
let mut fake_head = head_str.as_bytes();
self.header.read(&mut fake_head)?;
self.header.split(self.text.delim);
}
Ok(())
}
}
#[derive(Debug, Default, Clone)]
pub struct FileLocData {
name: String,
bytes: usize,
line: usize,
}
#[derive(Debug, Copy, Clone)]
enum FileLocItem {
Bytes,
Line,
Name(usize),
}
impl FileLocItem {
fn new(spec: &str) -> Result<Self> {
if spec.eq_ignore_ascii_case("bytes") {
Ok(Self::Bytes)
} else if spec.eq_ignore_ascii_case("line") {
Ok(Self::Line)
} else if spec.eq_ignore_ascii_case("name") {
Ok(Self::Name(0))
} else if let Some((a, b)) = spec.split_once('.') {
if a.eq_ignore_ascii_case("name") {
Ok(Self::Name(b.to_usize_whole(spec.as_bytes(), "File location")?))
} else {
err!("File Loc must be once of Bytes, Line, Name : '{}'", spec)
}
} else {
err!("File Loc must be once of Bytes, Line, Name : '{}'", spec)
}
}
const fn dflt_name(&self) -> &'static str {
match self {
Self::Bytes => "bytes",
Self::Line => "line",
Self::Name(_) => "filename",
}
}
fn write_data(&mut self, data: &mut impl Write, loc: &FileLocData) -> Result<()> {
match self {
Self::Bytes => write!(data, "{}", loc.bytes).unwrap(),
Self::Line => write!(data, "{}", loc.line).unwrap(),
Self::Name(n) => {
if *n == 0 {
data.write_all(loc.name.as_bytes())?;
} else {
data.write_all(loc.name.tail_path_u8(*n, b'/').as_bytes())?;
}
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
struct FileLoc {
col_name: String,
item: FileLocItem,
}
impl FileLoc {
fn new(spec: &str) -> Result<Self> {
if let Some((a, b)) = spec.split_once(':') {
Ok(Self { col_name: a.to_string(), item: FileLocItem::new(b)? })
} else {
let item = FileLocItem::new(spec)?;
Ok(Self { col_name: item.dflt_name().to_string(), item })
}
}
fn write_data(&mut self, data: &mut impl Write, loc: &FileLocData) -> Result<()> {
self.item.write_data(data, loc)
}
}
#[derive(Default, Debug, Clone)]
pub struct FileLocList {
v: Vec<FileLoc>,
}
impl FileLocList {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.v.is_empty()
}
pub fn push(&mut self, spec: &str) -> Result<()> {
for x in spec.split(',') {
self.v.push(FileLoc::new(x)?);
}
Ok(())
}
pub fn write_data(
&mut self,
data: &mut impl Write,
delim: u8,
loc: &FileLocData,
) -> Result<()> {
for x in &mut self.v {
x.write_data(data, loc)?;
data.write_all(&[delim])?;
}
Ok(())
}
pub fn write_names(&mut self, data: &mut String, delim: u8) {
for x in &mut self.v {
data.push_str(&x.col_name);
data.push(delim as char);
}
}
pub fn add(&self, header: &mut ColumnHeader) -> Result<()> {
for x in &self.v {
header.push(&x.col_name)?;
}
Ok(())
}
}
pub struct Reader {
file: Infile,
lines: Vec<TextLine>,
cont: InfileContext,
do_split: bool,
curr: usize,
loc: FileLocData,
}
impl fmt::Debug for Reader {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Reader")
}
}
impl Default for Reader {
fn default() -> Self {
Self::new(&TextFileMode::default())
}
}
impl Reader {
#[must_use]
pub const fn loc(&self) -> &FileLocData {
&self.loc
}
#[must_use]
pub fn new(text_in: &TextFileMode) -> Self {
Self::new_with(1, text_in)
}
pub const fn do_split(&mut self, val: bool) {
self.do_split = val;
}
#[must_use]
pub fn new_with(lookback: usize, text_in: &TextFileMode) -> Self {
let mut lines: Vec<TextLine> = Vec::new();
lines.resize(lookback + 1, TextLine::new());
Self {
file: Infile::default(),
lines,
cont: InfileContext::new(text_in),
do_split: true,
curr: 0,
loc: FileLocData::default(),
}
}
pub fn new_open2(name: &str) -> Result<Self> {
Self::new_open_with(name, 1, &TextFileMode::default())
}
pub fn new_open(name: &str, text_in: &TextFileMode) -> Result<Self> {
Self::new_open_with(name, 1, text_in)
}
pub fn new_open_with(name: &str, lookback: usize, text_in: &TextFileMode) -> Result<Self> {
let mut lines: Vec<TextLine> = Vec::new();
lines.resize(lookback + 1, TextLine::new());
let mut tmp = Self {
file: get_reader(name)?,
lines,
cont: InfileContext::new(text_in),
do_split: true,
curr: 0,
loc: FileLocData::default(),
};
tmp.cont.read_header(&mut *tmp.file, &mut tmp.lines[0])?;
tmp.loc.name = name.to_string();
tmp.loc.line = 1;
tmp.loc.bytes = if tmp.has_header() { tmp.header().line.len() } else { 0 };
Ok(tmp)
}
#[must_use]
pub fn curr_nl(&self) -> &[u8] {
let line = self.curr_line();
&line.line[0..line.line.len() - 1]
}
#[must_use]
pub fn prev_nl(&self, n: usize) -> &[u8] {
let line = self.prev_line(n);
&line.line[0..line.line.len() - 1]
}
#[must_use]
pub const fn delim(&self) -> u8 {
self.cont.text.delim
}
#[must_use]
pub fn names(&self) -> Vec<&str> {
self.cont.header.vec()
}
pub fn write(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
pub fn open(&mut self, name: &str) -> Result<()> {
self.file = get_reader(name)?;
self.cont.read_header(&mut *self.file, &mut self.lines[0])
}
#[must_use]
pub const fn header_line(&self) -> &String {
&self.cont.header.line
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.cont.is_empty
}
#[must_use]
pub const fn is_done(&self) -> bool {
self.cont.is_done
}
#[must_use]
pub const fn line_number(&self) -> usize {
self.loc.line
}
const fn incr(&mut self) {
self.loc.line += 1;
self.curr += 1;
if self.curr >= self.lines.len() {
self.curr = 0;
}
}
pub fn get_line(&mut self) -> Result<bool> {
self.loc.bytes += self.curr().line.len();
self.incr();
if self.cont.text.read_line(&mut *self.file, &mut self.lines[self.curr].line)? {
self.cont.is_done = true;
} else if self.do_split {
self.cont.text.split(&mut self.lines[self.curr]);
}
Ok(self.cont.is_done)
}
#[must_use]
pub fn curr_line(&self) -> &TextLine {
&self.lines[self.curr]
}
pub fn curr_mut(&mut self) -> &mut TextLine {
&mut self.lines[self.curr]
}
#[must_use]
pub fn curr(&self) -> &TextLine {
&self.lines[self.curr]
}
#[must_use]
pub fn prev_line(&self, lookback: usize) -> &TextLine {
if lookback <= self.curr {
&self.lines[self.curr - lookback]
} else {
&self.lines[self.curr + self.lines.len() - lookback]
}
}
pub fn write_curr(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
pub fn write_prev(&self, w: &mut impl Write, lookback: usize) -> Result<()> {
w.write_all(&self.prev_line(lookback).line)?;
Ok(())
}
pub fn write_header(&self, w: &mut impl Write) -> Result<()> {
w.write_all(self.cont.header.line.as_bytes())?;
Ok(())
}
#[must_use]
pub const fn header(&self) -> &StringLine {
&self.cont.header
}
#[must_use]
pub const fn has_header(&self) -> bool {
self.cont.has_header
}
}
pub fn prerr(data: &[&[u8]]) {
for x in data {
io::stderr().write_all(x).unwrap();
}
io::stderr().write_all(b"\n").unwrap();
}
pub fn prerr_n(data: &[&[u8]]) {
for x in data {
io::stderr().write_all(x).unwrap();
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
pub enum HeaderMode {
#[default]
Match,
Require,
Strip,
None,
Trust,
Ignore,
}
impl FromStr for HeaderMode {
type Err = Error;
fn from_str(spec: &str) -> Result<Self> {
if spec.eq_ignore_ascii_case("match") {
Ok(Self::Match)
} else if spec.eq_ignore_ascii_case("require") {
Ok(Self::Require)
} else if spec.eq_ignore_ascii_case("strip") {
Ok(Self::Strip)
} else if spec.eq_ignore_ascii_case("none") {
Ok(Self::None)
} else if spec.eq_ignore_ascii_case("trust") {
Ok(Self::Trust)
} else if spec.eq_ignore_ascii_case("ignore") {
Ok(Self::Ignore)
} else {
err!("Input Header Mode must be one of Match, Require, Strip, None or Trust : {}", spec)
}
}
}
#[derive(Debug, Default, Clone)]
pub struct HeaderChecker {
pub mode: HeaderMode,
pub head: Vec<u8>,
pub saw_one: bool,
}
#[must_use]
pub fn is_cdx(data: &[u8]) -> bool {
data.starts_with(b" CDX")
}
fn is_valid_cdx(data_in: &[u8], mode: HeaderMode, fname: &str) -> Result<bool> {
if mode == HeaderMode::Ignore {
return Ok(false);
}
if !data_in.starts_with(b" CDX") {
return Ok(false);
}
if mode == HeaderMode::Strip || mode == HeaderMode::None {
return Ok(true);
}
let mut data = data_in;
if data.last().unwrap() == &b'\n' {
data = &data[..data.len() - 1];
}
if data.len() < 6 {
return err!("File {} has an oddly truncated header line", fname);
}
let delim = data[4];
if delim == b'\n' || delim.is_ascii_alphanumeric() || delim > 127 {
return err!("Header for file {} has an invalid column delimiter", fname);
}
let data = str::from_utf8(&data[5..])?;
let delim = char::from_u32(u32::from(delim)).unwrap();
for x in data.split(delim) {
if x.is_empty() {
return err!("File {} has an empty column name", fname);
}
if !x.first().is_alphabetic() {
return err!(
"Header for file {} has column name {} which does not start with an alphabetic character.",
fname,
x
);
}
for ch in x.chars() {
if !ch.is_alphanumeric() && ch != '_' {
return err!(
"Header for file {} has column name {} which contains something other than alphanumeric and underscore.",
fname,
x
);
}
}
}
Ok(true)
}
impl HeaderChecker {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn from_mode(mode: HeaderMode) -> Self {
Self { mode, ..Self::default() }
}
pub fn check_file(&mut self, file: &Reader, fname: &str) -> Result<bool> {
let first = !self.saw_one;
if file.has_header() {
self.check(file.header().line.as_bytes(), fname)?;
} else {
self.check(b"fake", fname)?;
}
Ok(file.has_header() && first)
}
pub fn check(&mut self, first_line: &[u8], fname: &str) -> Result<bool> {
let cdx = is_valid_cdx(first_line, self.mode, fname)?;
if first_line.is_empty() {
Ok(false)
} else if first_line.last().unwrap() != &b'\n' && cdx {
err!("Malformed Header line in file {}", &fname)
} else if self.saw_one {
match self.mode {
HeaderMode::Match => {
if cdx {
if self.head.is_empty() {
return err!("CDX Header found in {}, but first file had none.", fname);
}
if first_line != self.head {
return err!(
"Header Mismatch. First was {}, File {} has {}",
String::from_utf8_lossy(&self.head),
fname,
String::from_utf8_lossy(first_line)
);
}
Ok(false)
} else {
if !self.head.is_empty() {
return err!(
"No CDX Header found in {}, but first file had one.",
fname
);
}
Ok(true)
}
}
HeaderMode::Require => {
if !cdx {
return err!("No CDX Header found in {} where one was required.", fname);
}
Ok(false)
}
HeaderMode::Strip => Ok(!cdx),
HeaderMode::Ignore => Ok(true),
HeaderMode::None => {
if cdx {
return err!("CDX Header found in {} where one was forbidden.", fname);
}
Ok(true)
}
HeaderMode::Trust => Ok(!cdx),
}
} else {
self.saw_one = true;
if cdx && self.mode == HeaderMode::Strip {
return err!("Found CDX header in {} where one was forbidden", fname);
} else if !cdx && self.mode == HeaderMode::Require {
return err!("Found no CDX header in {} where one was required", fname);
}
if cdx && (self.mode == HeaderMode::Strip) {
return Ok(false);
}
if cdx {
self.head = first_line.to_vec();
}
Ok(true)
}
}
}
pub fn copy(mut r: impl Read, mut w: impl Write) -> Result<()> {
let mut buff = [0u8; 16 * 1024];
loop {
let sz = r.read(&mut buff)?;
if sz == 0 {
return Ok(());
}
w.write_all(&buff[0..sz])?;
}
}
#[derive(Debug, Copy, Clone, Default)]
pub enum CompareOp {
#[default]
LT,
GT,
LE,
GE,
EQ,
NE,
}
impl FromStr for CompareOp {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
if s.eq_ignore_ascii_case("lt") {
Ok(Self::LT)
} else if s.eq_ignore_ascii_case("gt") {
Ok(Self::GT)
} else if s.eq_ignore_ascii_case("le") {
Ok(Self::LE)
} else if s.eq_ignore_ascii_case("ge") {
Ok(Self::GE)
} else if s.eq_ignore_ascii_case("eq") {
Ok(Self::EQ)
} else if s.eq_ignore_ascii_case("ne") {
Ok(Self::NE)
} else if s.eq_ignore_ascii_case("<") {
Ok(Self::LT)
} else if s.eq_ignore_ascii_case(">") {
Ok(Self::GT)
} else if s.eq_ignore_ascii_case("<=") {
Ok(Self::LE)
} else if s.eq_ignore_ascii_case(">=") {
Ok(Self::GE)
} else if s.eq_ignore_ascii_case("==") {
Ok(Self::EQ)
} else if s.eq_ignore_ascii_case("!=") {
Ok(Self::NE)
} else {
err!("Invalid CompareOp, should be one of LT,<,GT,>,LE,<=,GE,>=,EQ,==,NE,!= : '{}'", s)
}
}
}
impl CompareOp {
#[must_use]
pub const fn invert(&self) -> Self {
use CompareOp::{EQ, GE, GT, LE, LT, NE};
match self {
LT => GT,
GT => LT,
LE => GE,
GE => LE,
EQ => EQ,
NE => NE,
}
}
pub fn line_ok_verbose(
&self,
line: &TextLine,
comp: &mut LineCompList,
line_num: usize,
) -> bool {
if self.invert().line_ok(line, comp) {
true
} else {
eprint!("Line {line_num} : ");
prerr_n(&[&line.line]);
eprint!("should have been {self:?} ");
prerr_n(&[comp.get_value()]);
eprintln!(" but wasn't");
false
}
}
pub fn line_ok(&self, line: &TextLine, comp: &mut LineCompList) -> bool {
let o = comp.comp_self_cols(line);
self.ord_ok(o)
}
#[must_use]
pub fn ord_ok(&self, o: Ordering) -> bool {
use CompareOp::{EQ, GE, GT, LE, LT, NE};
use Ordering::{Equal, Greater, Less};
match self {
LT => o == Less,
GT => o == Greater,
LE => o != Greater,
GE => o != Less,
EQ => o == Equal,
NE => o != Equal,
}
}
}
struct RangeSpec<'a> {
op1: &'a str,
val1: &'a str,
op2: Option<&'a str>,
val2: Option<&'a str>,
}
impl<'a> RangeSpec<'a> {
fn new(spec: &'a str) -> Result<Self> {
static RE1: LazyLock<Regex> = LazyLock::new(|| {
Regex::new("^(<|>|<=|>=|==|!=)([^<>!=]+)(<|>|<=|>=|==|!=)(.+)$").unwrap()
});
static RE2: LazyLock<Regex> =
LazyLock::new(|| Regex::new("^(<|>|<=|>=|==|!=)(.+)$").unwrap());
static RE3: LazyLock<Regex> = LazyLock::new(|| {
Regex::new("^(LT|GT|LE|GE|EQ|NE),([^,]+),(LT|GT|LE|GE|EQ|NE),(.+)$").unwrap()
});
static RE4: LazyLock<Regex> =
LazyLock::new(|| Regex::new("^(LT|GT|LE|GE|EQ|NE),(.+)$").unwrap());
if let Some(caps) = RE1.captures(spec) {
Ok(Self {
op1: caps.get(1).unwrap().as_str(),
val1: caps.get(2).unwrap().as_str(),
op2: Some(caps.get(3).unwrap().as_str()),
val2: Some(caps.get(4).unwrap().as_str()),
})
} else if let Some(caps) = RE2.captures(spec) {
Ok(Self {
op1: caps.get(1).unwrap().as_str(),
val1: caps.get(2).unwrap().as_str(),
op2: None,
val2: None,
})
} else if let Some(caps) = RE3.captures(spec) {
Ok(Self {
op1: caps.get(1).unwrap().as_str(),
val1: caps.get(2).unwrap().as_str(),
op2: Some(caps.get(3).unwrap().as_str()),
val2: Some(caps.get(4).unwrap().as_str()),
})
} else if let Some(caps) = RE4.captures(spec) {
Ok(Self {
op1: caps.get(1).unwrap().as_str(),
val1: caps.get(2).unwrap().as_str(),
op2: None,
val2: None,
})
} else {
err!("Not valid range spec '{}'", spec)
}
}
fn new_trail(spec: &'a str) -> Result<(Self, usize)> {
static RE1: LazyLock<Regex> = LazyLock::new(|| {
Regex::new("(^|,)(<|>|<=|>=|==|!=)([^<>!=]+)(<|>|<=|>=|==|!=)([^=].*)$").unwrap()
});
static RE2: LazyLock<Regex> =
LazyLock::new(|| Regex::new("(^|,)(<|>|<=|>=|==|!=)([^=].*)$").unwrap());
static RE3: LazyLock<Regex> = LazyLock::new(|| {
Regex::new("(^|,)(LT|GT|LE|GE|EQ|NE),([^,]+),(LT|GT|LE|GE|EQ|NE),(.+)$").unwrap()
});
static RE4: LazyLock<Regex> =
LazyLock::new(|| Regex::new("(^|,)(LT|GT|LE|GE|EQ|NE),(.+)$").unwrap());
if let Some(caps) = RE1.captures(spec) {
Ok((
Self {
op1: caps.get(2).unwrap().as_str(),
val1: caps.get(3).unwrap().as_str(),
op2: Some(caps.get(4).unwrap().as_str()),
val2: Some(caps.get(5).unwrap().as_str()),
},
caps.get(0).unwrap().start(),
))
} else if let Some(caps) = RE2.captures(spec) {
Ok((
Self {
op1: caps.get(2).unwrap().as_str(),
val1: caps.get(3).unwrap().as_str(),
op2: None,
val2: None,
},
caps.get(0).unwrap().start(),
))
} else if let Some(caps) = RE3.captures(spec) {
Ok((
Self {
op1: caps.get(2).unwrap().as_str(),
val1: caps.get(3).unwrap().as_str(),
op2: Some(caps.get(4).unwrap().as_str()),
val2: Some(caps.get(5).unwrap().as_str()),
},
caps.get(0).unwrap().start(),
))
} else if let Some(caps) = RE4.captures(spec) {
Ok((
Self {
op1: caps.get(2).unwrap().as_str(),
val1: caps.get(3).unwrap().as_str(),
op2: None,
val2: None,
},
caps.get(0).unwrap().start(),
))
} else {
err!("Not valid trailing range spec '{}'", spec)
}
}
}
#[derive(Debug, Default)]
pub struct CheckLine {
op: CompareOp,
val: String,
op2: Option<CompareOp>,
val2: Option<String>,
}
impl CheckLine {
pub fn new(spec: &str) -> Result<Self> {
let mut s = Self::default();
s.set(spec)?;
Ok(s)
}
fn set_with(&mut self, r: &RangeSpec<'_>) -> Result<()> {
self.op = r.op1.parse::<CompareOp>()?;
r.val1.clone_into(&mut self.val);
self.op2 = if let Some(item) = r.op2 { Some(item.parse::<CompareOp>()?) } else { None };
self.val2 = if r.val2.is_none() { None } else { Some(r.op2.unwrap().to_owned()) };
Ok(())
}
pub fn set(&mut self, spec: &str) -> Result<()> {
self.set_with(&RangeSpec::new(spec)?)
}
pub fn line_ok_verbose(
&self,
line: &TextLine,
comp: &mut LineCompList,
line_num: usize,
) -> Result<bool> {
comp.set(self.val.as_bytes(), b',')?;
Ok(self.op.line_ok_verbose(line, comp, line_num))
}
pub fn line_ok(&self, line: &TextLine, comp: &mut LineCompList) -> Result<bool> {
comp.set(self.val.as_bytes(), b',')?;
let ret = self.op.line_ok(line, comp);
if !ret {
return Ok(false);
}
match self.op2 {
None => Ok(true),
Some(o) => {
comp.set(self.val2.as_ref().unwrap().as_bytes(), b',')?;
Ok(o.line_ok(line, comp))
}
}
}
}
pub struct CheckBuff {
op: CompareOp,
val: Box<dyn Compare>,
op2: Option<CompareOp>,
val2: Option<Box<dyn Compare>>,
}
impl fmt::Debug for CheckBuff {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "CheckBuff")
}
}
impl CheckBuff {
pub fn new(spec: &str) -> Result<Self> {
let (range, bytes) = RangeSpec::new_trail(spec)?;
Self::new_with(&spec[..bytes], &range)
}
fn new_with(comp_spec: &str, r: &RangeSpec<'_>) -> Result<Self> {
Ok(Self {
op: r.op1.parse::<CompareOp>()?,
val: {
let mut c = CompMaker::make_comp_box(comp_spec)?;
c.set(r.val1.as_bytes());
c
},
op2: if let Some(item) = r.op2 { Some(item.parse::<CompareOp>()?) } else { None },
val2: if let Some(item) = r.val2 {
let mut c = CompMaker::make_comp_box(comp_spec)?;
c.set(item.as_bytes());
Some(c)
} else {
None
},
})
}
#[must_use]
pub fn buff_ok(&self, right: &[u8]) -> bool {
let c = self.val.comp_self(right);
if !self.op.invert().ord_ok(c) {
return false;
}
match self.op2 {
None => true,
Some(o) => {
let c = self.val2.as_ref().unwrap().comp_self(right);
o.invert().ord_ok(c)
}
}
}
}
pub fn closer(ch: u8) -> Result<u8> {
match ch {
b'(' => Ok(b')'),
b'{' => Ok(b'}'),
b'[' => Ok(b']'),
b'<' => Ok(b'>'),
_ => err!("I don't know how to close a '{}'", ch as char),
}
}
pub fn find_close(spec: &str) -> Result<usize> {
if spec.is_empty() {
return err!("Can't find_close an empty string");
}
let bspec = spec.as_bytes();
let open = bspec[0];
let close = closer(open)?;
let mut depth = 0;
for (i, x) in bspec.iter().enumerate() {
if *x == open {
depth += 1;
} else if *x == close {
depth -= 1;
}
if depth == 0 {
return Ok(i + 1);
}
}
err!("Had no closing delimiter '{}'", spec)
}
#[must_use]
pub fn chomp(mut x: &[u8]) -> &[u8] {
while !x.is_empty() {
let len = x.len() - 1;
if x[len] != b'\n' && x[len] != b'\r' {
break;
}
x = &x[..len];
}
x
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn read_line() -> Result<()> {
let mut mode = TextFileMode::default();
let data1 = b"aaa\tbbb\n";
let data2 = b"aaa\tbbb";
let data3 = b"aaa\tbb\"\n\"b\n";
let data4 = b"aaa\tbb\"\n";
let mut line = Vec::new();
mode.read_line(&mut &data1[..], &mut line)?;
assert_eq!(data1, &line[..]);
mode.read_line(&mut &data2[..], &mut line)?;
assert_eq!(data1, &line[..]);
mode.read_line(&mut &data3[..], &mut line)?;
assert_eq!(data4, &line[..]);
mode.col_mode = QuoteMode::Quote;
mode.read_line(&mut &data3[..], &mut line)?;
assert_eq!(data3, &line[..]);
mode.col_mode = QuoteMode::Backslash;
mode.read_line(&mut &data3[..], &mut line)?;
assert_eq!(data4, &line[..]);
Ok(())
}
#[test]
fn markers() {
let c = closer(b'(');
assert!(c.is_ok());
assert_eq!(c.unwrap(), b')');
let c = find_close("(abc)");
assert!(c.is_ok());
assert_eq!(c.unwrap(), 5);
let c = find_close("[abc][def]");
assert!(c.is_ok());
assert_eq!(c.unwrap(), 5);
let c = find_close("{abc{def}}");
assert!(c.is_ok());
assert_eq!(c.unwrap(), 10);
let c = find_close("{abc{def}ghi");
assert!(c.is_err());
}
}