use crate::matcher::MatchMaker;
use crate::prelude::*;
use crate::util::find_close;
use std::collections::HashSet;
use std::str;
#[derive(Debug, Copy, Clone)]
pub enum DupColHandling {
Fail,
Allow,
Numeric,
}
impl DupColHandling {
pub fn new(spec: &str) -> Result<Self> {
if spec.to_ascii_lowercase() == "fail" {
Ok(Self::Fail)
} else if spec.to_ascii_lowercase() == "allow" {
Ok(Self::Allow)
} else if spec.to_ascii_lowercase() == "numeric" {
Ok(Self::Numeric)
} else {
err!("Duplicate Column Mode must be one of : Fail, Allow, Numeric")
}
}
}
impl Default for DupColHandling {
fn default() -> Self {
Self::Fail
}
}
#[derive(Default, Clone, Debug)]
struct NameMap {
from: String,
to: String,
}
impl NameMap {
fn new(spec: &str) -> Result<Self> {
if let Some((a, b)) = spec.split_once('.') {
Ok(Self {
from: a.to_string(),
to: b.to_string(),
})
} else {
err!("Format for rename is 'old.new' not '{spec}'")
}
}
}
impl fmt::Display for NameMap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}.{}", self.from, self.to)
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnHeader {
cols: Vec<String>,
dups: DupColHandling,
rename: Vec<NameMap>,
sloppy: bool,
}
pub fn is_valid_column_name(name: &str) -> bool {
get_col_name(name) == name.len()
}
pub fn validate_column_name(name: &str) -> Result<()> {
if !is_valid_column_name(name) {
err!("Invalid column name {}", name)
} else {
Ok(())
}
}
pub fn get_col(data: &[u8], col: usize, delim: u8) -> &[u8] {
for (n, s) in data.split(|ch| *ch == delim).enumerate() {
if n == col {
return if !s.is_empty() && s.last().unwrap() == &b'\n' {
&s[0..s.len() - 1]
} else {
s
};
}
}
&data[0..0]
}
impl ColumnHeader {
pub fn new() -> Self {
Self::default()
}
pub fn set_handling(&mut self, dups: DupColHandling) {
self.dups = dups;
}
pub fn clear(&mut self) {
self.cols.clear();
}
pub fn rename(&mut self, spec: &str) -> Result<()> {
if !spec.is_empty() {
for x in spec.split(',') {
self.rename.push(NameMap::new(x)?);
}
}
Ok(())
}
pub fn rename_sloppy(&mut self) {
self.sloppy = true;
}
pub fn fieldnames(&self) -> Vec<&str> {
let mut v: Vec<&str> = Vec::with_capacity(self.cols.len());
for x in &self.cols {
v.push(x);
}
v
}
pub fn push_all(&mut self, cols: &StringLine) -> Result<()> {
for x in cols.into_iter() {
self.push(x)?;
}
Ok(())
}
pub fn push_all_unchecked(&mut self, cols: &StringLine) {
for x in cols.into_iter() {
self.push_unchecked(x);
}
}
pub fn contains(&self, name: &str) -> bool {
for x in &self.cols {
if x == name {
return true;
}
}
false
}
pub fn push_unchecked(&mut self, name: &str) {
self.cols.push(name.to_string());
}
pub fn check_rename(&self) -> Result<()> {
if !self.sloppy & !self.rename.is_empty() {
let mut s = self.rename[0].to_string();
for x in self.rename.iter().skip(1) {
s.push(',');
s.push_str(&x.to_string());
}
err!("Unused rename : '{}'", s)
} else {
Ok(())
}
}
pub fn push(&mut self, name: &str) -> Result<()> {
validate_column_name(name)?;
if self.contains(name) {
let mut key: Option<usize> = None;
for (i, x) in self.rename.iter().enumerate() {
if x.from == name {
key = Some(i);
break;
}
}
if let Some(k) = key {
if self.contains(&self.rename[k].to) {
return err!(
"Applied rename of {} to {}, but {} was already used",
self.rename[k].from,
self.rename[k].to,
self.rename[k].to
);
}
self.cols.push(self.rename[k].to.clone());
self.rename.remove(k);
return Ok(());
}
match self.dups {
DupColHandling::Fail => return err!("Duplicate column name {}", name),
DupColHandling::Allow => {
self.cols.push(name.to_string());
eprintln!("Warning : creating duplicate column name {}", name);
}
DupColHandling::Numeric => {
for x in 1..10000 {
let new_name = format!("{name}{}", x);
if !self.contains(&new_name) {
self.cols.push(new_name);
break;
}
}
}
}
} else {
self.cols.push(name.to_string());
}
Ok(())
}
pub fn get_head(&self, text: &TextFileMode) -> String {
let mut res = String::with_capacity(self.get_size() + 6);
if text.cdx {
res.push_str(" CDX");
res.push(text.delim as char);
}
self.add_head(&mut res, text);
res.push('\n');
res
}
pub fn get_head_short(&self, text: &TextFileMode) -> String {
let mut res = String::with_capacity(self.get_size());
self.add_head(&mut res, text);
res
}
fn get_size(&self) -> usize {
self.cols.iter().map(|v| v.len()).sum::<usize>() + self.cols.len() - 1
}
fn add_head(&self, res: &mut String, text: &TextFileMode) {
if self.cols.is_empty() {
return;
}
res.push_str(&self.cols[0]);
for x in self.cols.iter().skip(1) {
res.push(text.delim as char);
res.push_str(x);
}
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnSet {
pos: Vec<ColSetPart>,
neg: Vec<ColSetPart>,
columns: Vec<OutCol>,
did_lookup: bool,
trans: Vec<TransList>,
}
#[derive(Debug, Clone, Default)]
struct ColSetPart {
val: String,
trans: Option<usize>,
}
impl ColSetPart {
fn new(spec: &str, trans: Option<usize>) -> Self {
Self {
val: spec.to_string(),
trans,
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct OutCol {
pub num: usize,
pub name: String,
trans: Option<usize>,
}
impl OutCol {
pub fn new(num: usize, name: &str) -> Self {
Self {
num,
name: name.to_string(),
trans: None,
}
}
pub fn new_trans(num: usize, name: &str, trans: usize) -> Self {
Self {
num,
name: name.to_string(),
trans: Some(trans),
}
}
pub const fn from_num(num: usize) -> Self {
Self {
num,
name: String::new(),
trans: None,
}
}
pub const fn from_num_trans(num: usize, trans: usize) -> Self {
Self {
num,
name: String::new(),
trans: Some(trans),
}
}
}
#[derive(Debug, Default, Clone)]
pub struct ScopedValue {
cols: ColumnSet,
value: String,
}
impl ScopedValue {
pub fn new(spec: &str, del: char) -> Result<Self> {
let mut s = Self::default();
if spec.is_empty() {
return Ok(s);
}
let mut spec = spec;
let ch = spec.first();
if ch == del {
spec.skip_first();
if spec.is_empty() {
s.value.push(del);
return Ok(s);
}
let ch = spec.first();
if ch == del {
s.value.push(del);
}
}
while !spec.is_empty() {
let ch = spec.take_first();
if ch == del {
s.cols.add_yes(spec)?;
return Ok(s);
}
s.value.push(ch);
}
Ok(s)
}
pub fn new2(value: &str, cols: &str) -> Result<Self> {
let mut s = Self::default();
s.cols.add_yes(cols)?;
s.value = value.to_string();
Ok(s)
}
pub fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.cols.lookup(fieldnames)
}
}
#[derive(Debug, Default, Clone)]
pub struct ScopedValues {
default: String,
data: Vec<ScopedValue>,
has_value: Vec<bool>,
strings: Vec<String>,
ints: Vec<isize>,
floats: Vec<f64>,
}
impl ScopedValues {
pub fn new() -> Self {
Self::default()
}
pub fn set_default(&mut self, d: &str) {
self.default = d.to_string();
}
pub fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.has_value.clear();
self.has_value.resize(fieldnames.len(), false);
self.strings.clear();
self.strings.resize(fieldnames.len(), self.default.clone());
for x in &mut self.data {
x.lookup(fieldnames)?;
for i in 0..x.cols.columns.len() {
let j = x.cols.columns[i].num;
self.strings[j] = x.value.clone();
self.has_value[j] = true;
}
}
Ok(())
}
pub fn make_ints(&mut self) -> Result<()> {
self.ints.clear();
for i in 0..self.strings.len() {
self.ints[i] = self
.get(i)
.to_isize_whole(self.get(i).as_bytes(), "number")?;
}
Ok(())
}
pub fn make_floats(&mut self) -> Result<()> {
self.floats.clear();
for i in 0..self.strings.len() {
self.floats[i] = self.get(i).to_f64_whole(self.get(i).as_bytes(), "float")?;
}
Ok(())
}
pub fn add(&mut self, spec: &str, del: char) -> Result<()> {
self.data.push(ScopedValue::new(spec, del)?);
Ok(())
}
pub fn add2(&mut self, value: &str, cols: &str) -> Result<()> {
self.data.push(ScopedValue::new2(value, cols)?);
Ok(())
}
pub fn get(&self, col: usize) -> &str {
if col < self.strings.len() {
&self.strings[col]
} else {
self.strings.last().unwrap()
}
}
pub fn get_int(&self, col: usize) -> isize {
if col < self.ints.len() {
self.ints[col]
} else {
*self.ints.last().unwrap()
}
}
pub fn get_float(&self, col: usize) -> f64 {
if col < self.floats.len() {
self.floats[col]
} else {
*self.floats.last().unwrap()
}
}
pub fn has_value(&self, col: usize) -> bool {
if col < self.has_value.len() {
self.has_value[col]
} else {
false
}
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
}
pub trait ColumnFun {
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()>;
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()>;
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()>;
}
#[derive(Debug, Default)]
pub struct ColumnExpr {
name: String,
expr: Expr,
}
impl ColumnExpr {
pub fn new(spec: &str) -> Result<Self> {
if let Some((a, b)) = spec.split_once(':') {
Ok(Self {
name: a.to_string(),
expr: Expr::new(b)?,
})
} else {
Ok(Self {
name: String::new(),
expr: Expr::new(spec)?,
})
}
}
}
impl ColumnFun for ColumnExpr {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, _text: &TextFileMode) -> Result<()> {
write!(w, "{}", self.expr.eval(line))?;
Ok(())
}
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.expr.lookup(fieldnames)
}
}
#[derive(Debug, Default)]
pub struct ColumnSingle {
col: NamedCol,
new_name: String,
}
impl ColumnSingle {
pub fn with_named_col(col: &NamedCol) -> Self {
Self {
col: col.clone(),
new_name: String::new(),
}
}
pub fn with_name(col: &str) -> Result<Self> {
Ok(Self {
col: NamedCol::new_from(col)?,
new_name: String::new(),
})
}
}
impl ColumnFun for ColumnSingle {
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
if self.new_name.is_empty() {
w.push(&head[self.col.num])
} else {
w.push(&self.new_name)
}
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()> {
text.write(w, &line[self.col.num])?;
Ok(())
}
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.col.lookup(fieldnames)
}
}
#[derive(Debug, Default, Copy, Clone)]
pub struct ColumnWhole;
impl ColumnFun for ColumnWhole {
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
w.push_all(head)
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, _text: &TextFileMode) -> Result<()> {
w.write_all(&line.line()[0..line.line().len() - 1])?;
Ok(())
}
fn lookup(&mut self, _fieldnames: &[&str]) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnCount {
num: isize,
name: String,
}
impl ColumnCount {
pub fn new(num: isize, name: &str) -> Self {
Self {
num,
name: name.to_string(),
}
}
}
impl ColumnFun for ColumnCount {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, _line: &TextLine, _text: &TextFileMode) -> Result<()> {
w.write_all(self.num.to_string().as_bytes())?;
self.num += 1;
Ok(())
}
fn lookup(&mut self, _fieldnames: &[&str]) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnLiteral {
value: Vec<u8>,
name: String,
}
impl ColumnLiteral {
pub fn new(value: &[u8], name: &str) -> Self {
Self {
value: value.to_vec(),
name: name.to_string(),
}
}
}
impl ColumnFun for ColumnLiteral {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, _line: &TextLine, text: &TextFileMode) -> Result<()> {
text.write(w, &self.value)?;
Ok(())
}
fn lookup(&mut self, _fieldnames: &[&str]) -> Result<()> {
Ok(())
}
}
impl fmt::Debug for dyn ColumnFun {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "ColumnFun")
}
}
impl ColumnSet {
pub fn new() -> Self {
Self::default()
}
pub fn is_empty(&self) -> bool {
self.pos.is_empty() && self.neg.is_empty()
}
pub fn from_spec(spec: &str) -> Result<Self> {
let mut s = Self::default();
s.add_yes(spec)?;
Ok(s)
}
pub fn add_yes(&mut self, spec: &str) -> Result<()> {
self.add(spec, false)
}
pub fn add_no(&mut self, spec: &str) -> Result<()> {
self.add(spec, true)
}
pub fn add_one(&mut self, s: &str, negate: bool, trans: Option<usize>) {
if let Some(stripped) = s.strip_prefix('~') {
let st = ColSetPart::new(stripped, trans);
if negate {
self.pos.push(st);
} else {
self.neg.push(st);
}
} else {
let st = ColSetPart::new(s, trans);
if negate {
self.neg.push(st);
} else {
self.pos.push(st);
}
}
}
fn find_trans(spec: &str) -> Option<(&str, &str)> {
let mut last_was_plus = false;
for (i, x) in spec.char_indices() {
if last_was_plus {
if x.is_alphabetic() {
return Some((&spec[0..i - 1], &spec[i..]));
} else {
last_was_plus = false;
}
} else if x == '+' {
last_was_plus = true;
}
}
None
}
pub fn add(&mut self, spec: &str, negate: bool) -> Result<()> {
let mut spc = spec;
let trans = if let Some((a, b)) = Self::find_trans(spec) {
spc = a;
self.trans.push(TransList::new(b)?);
Some(self.trans.len() - 1)
} else {
None
};
loop {
if spc.first() == '(' {
let pos = find_close(spc)?;
self.add_one(&spc[..pos], negate, trans);
spc = &spc[pos..];
if spc.is_empty() {
break;
}
} else if let Some((a, b)) = spc.split_once(',') {
self.add_one(a, negate, trans);
spc = b;
} else {
self.add_one(spc, negate, trans);
break;
}
}
Ok(())
}
pub fn lookup_col(fieldnames: &[&str], colname: &str) -> Result<usize> {
for f in fieldnames.iter().enumerate() {
if f.1 == &colname {
return Ok(f.0);
}
}
err!("{} not found", colname)
}
pub fn single(fieldnames: &[&str], colname: &str) -> Result<usize> {
if let Some(stripped) = colname.strip_prefix('+') {
let n = stripped.to_usize_whole(colname.as_bytes(), "column")?;
let len = fieldnames.len();
if n > len {
err!("Column {} out of bounds", colname)
} else {
Ok(len - n)
}
} else {
let ch = colname.first();
if ch.is_ascii_digit() && ch != '0' {
let n = colname.to_usize_whole(colname.as_bytes(), "column")?;
if n < 1 {
err!("Column {} out of bounds", colname)
} else {
Ok(n - 1)
}
} else {
Self::lookup_col(fieldnames, colname)
}
}
}
fn match_range(fieldnames: &[&str], rng: &str) -> Result<Vec<usize>> {
let mut ret = Vec::new();
let c = MatchMaker::make(rng)?;
for f in fieldnames.iter().enumerate() {
if c.smatch(f.1) {
ret.push(f.0);
}
}
Ok(ret)
}
fn to_outcols(data: &[usize], name: &str) -> Vec<OutCol> {
let mut ret = Vec::with_capacity(data.len());
for x in data {
ret.push(OutCol::new(*x, name));
}
ret
}
pub fn ranges(fieldnames: &[&str], rng: &str) -> Result<Vec<OutCol>> {
let mut c = Self::new();
c.add_yes(rng)?;
c.lookup(fieldnames)?;
Ok(c.get_cols_full())
}
fn range(fieldnames: &[&str], rng: &str) -> Result<Vec<OutCol>> {
if rng.is_empty() {
return err!("Empty Range {}", rng);
}
let (name, range) = match rng.split_once(':') {
None => ("", rng),
Some(x) => x,
};
let ch = range.first();
if ch == '(' {
return Ok(Self::to_outcols(
&Self::match_range(fieldnames, &range[1..range.len() - 1])?,
name,
));
}
let mut parts: Vec<&str> = range.split('-').collect();
if parts.len() > 2 {
return err!("Malformed Range {}", rng);
}
if parts[0].is_empty() {
parts[0] = "1";
}
let start: usize;
let end = if parts.len() == 1 {
start = Self::single(fieldnames, parts[0])?;
start
} else {
if parts[1].is_empty() {
parts[1] = "+1";
}
start = Self::single(fieldnames, parts[0])?;
Self::single(fieldnames, parts[1])?
};
if start > end {
return err!("Start greater than end : {}", rng);
}
let mut res = Vec::new();
for i in start..=end {
res.push(OutCol::new(i, name));
}
Ok(res)
}
pub fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.did_lookup = true;
self.columns = Vec::new();
let mut no_cols: HashSet<usize> = HashSet::new();
for s in &self.neg {
for x in Self::range(fieldnames, &s.val)? {
no_cols.insert(x.num);
}
}
if self.pos.is_empty() {
for x in 0..fieldnames.len() {
if !no_cols.contains(&x) {
self.columns.push(OutCol::from_num(x));
}
}
} else {
for s in &self.pos {
for mut x in Self::range(fieldnames, &s.val)? {
if !no_cols.contains(&x.num) {
x.trans = s.trans;
self.columns.push(x);
}
}
}
}
Ok(())
}
pub fn select<T: AsRef<str> + Clone>(&self, cols: &[T], result: &mut Vec<T>) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
result.clear();
for x in &self.columns {
if x.num < cols.len() {
result.push(cols[x.num].clone());
} else {
return err!(
"Line has only {} columns, but column {} was requested.",
cols.len(),
x.num + 1
);
}
}
Ok(())
}
pub fn write(&self, w: &mut dyn Write, cols: &[&str], delim: &str) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
let mut iter = self.columns.iter();
match iter.next() {
None => {}
Some(first) => {
w.write_all(cols[first.num].as_bytes())?;
for x in iter {
w.write_all(delim.as_bytes())?;
if x.num < cols.len() {
w.write_all(cols[x.num].as_bytes())?;
} else {
return err!(
"Line has only {} columns, but column {} was requested.",
cols.len(),
x.num + 1
);
}
}
}
};
w.write_all(&[b'\n'])?;
Ok(())
}
pub fn write2(&self, w: &mut dyn Write, cols: &TextLine, delim: u8) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
let mut iter = self.columns.iter();
match iter.next() {
None => {}
Some(first) => {
w.write_all(&cols[first.num])?;
for x in iter {
w.write_all(&[delim])?;
w.write_all(cols.get(x.num))?;
}
}
};
w.write_all(&[b'\n'])?;
Ok(())
}
fn fetch<'a>(&'a mut self, cols: &'a TextLine, col: usize) -> &'a [u8] {
let col = &self.columns[col];
if let Some(trans) = col.trans {
self.trans[trans].trans(cols.get(col.num), cols).unwrap()
} else {
cols.get(col.num)
}
}
pub fn write3(
&mut self,
w: &mut dyn Write,
cols: &TextLine,
text: &TextFileMode,
) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
if self.columns.is_empty() {
return Ok(());
}
text.write(w, self.fetch(cols, 0))?;
for i in 1..self.columns.len() {
w.write_all(&[text.delim])?;
text.write(w, self.fetch(cols, i))?;
}
Ok(())
}
pub fn write3s(&self, w: &mut dyn Write, cols: &StringLine, delim: u8) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
let mut iter = self.columns.iter();
match iter.next() {
None => {}
Some(first) => {
w.write_all(cols.get(first.num).as_bytes())?;
for x in iter {
w.write_all(&[delim])?;
w.write_all(cols.get(x.num).as_bytes())?;
}
}
};
Ok(())
}
pub fn write_sloppy(&self, cols: &[&str], rest: &str, w: &mut dyn Write) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
for x in &self.columns {
if x.num < cols.len() {
w.write_all(cols[x.num].as_bytes())?;
} else {
w.write_all(rest.as_bytes())?;
}
}
Ok(())
}
pub fn select_sloppy<T: AsRef<str> + Clone>(
&self,
cols: &[T],
restval: &T,
result: &mut Vec<T>,
) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
result.clear();
for x in &self.columns {
if x.num < cols.len() {
result.push(cols[x.num].clone());
} else {
result.push(restval.clone());
}
}
Ok(())
}
pub const fn get_cols(&self) -> &Vec<OutCol> {
&self.columns
}
#[allow(clippy::missing_const_for_fn)] pub fn get_cols_full(self) -> Vec<OutCol> {
self.columns
}
pub fn get_cols_num(&self) -> Vec<usize> {
let mut v = Vec::with_capacity(self.columns.len());
for x in &self.columns {
v.push(x.num);
}
v
}
pub fn lookup_cols(spec: &str, names: &[&str]) -> Result<Vec<usize>> {
let mut s = Self::new();
s.add_yes(spec)?;
s.lookup(names)?;
Ok(s.get_cols_num())
}
pub fn lookup_cols_full(spec: &str, names: &[&str]) -> Result<Vec<OutCol>> {
let mut s = Self::new();
s.add_yes(spec)?;
s.lookup(names)?;
Ok(s.get_cols_full())
}
pub fn lookup1(spec: &str, names: &[&str]) -> Result<usize> {
let mut s = Self::new();
s.add_yes(spec)?;
s.lookup(names)?;
if s.get_cols().len() != 1 {
return err!(
"Spec {} resolves to {} columns, rather than a single column",
spec,
s.get_cols().len()
);
}
Ok(s.get_cols_num()[0])
}
}
pub struct ColumnClump {
cols: Box<dyn ColumnFun>,
name: String,
text: TextFileMode,
}
impl fmt::Debug for ColumnClump {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ColumnClump")
}
}
impl ColumnClump {
pub fn new(cols: Box<dyn ColumnFun>, name: &str, delim: u8) -> Self {
let text = TextFileMode {
delim,
..Default::default()
};
Self {
cols,
name: name.to_string(),
text,
}
}
pub fn from_spec(orig_spec: &str) -> Result<Self> {
let mut spec = orig_spec;
if spec.is_empty() {
return err!("Can't construct a group from an empty string");
}
let delim = spec.take_first();
if delim.len_utf8() != 1 {
return err!("Delimiter must be a plain ascii character : {}", orig_spec);
}
let parts = spec.split_once(':');
if parts.is_none() {
return err!(
"No colon found. Group format is DelimOutname:Columns : {}",
orig_spec
);
}
let parts = parts.unwrap();
let mut g = ColumnSet::new();
g.add_yes(parts.1)?;
let cols = Box::new(ReaderColumns::new(g));
let text = TextFileMode {
delim: delim as u8,
..Default::default()
};
Ok(Self {
cols,
name: parts.0.to_string(),
text,
})
}
}
impl ColumnFun for ColumnClump {
fn write(&mut self, w: &mut dyn Write, line: &TextLine, _text: &TextFileMode) -> Result<()> {
self.cols.write(w, line, &self.text)?;
Ok(())
}
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.cols.lookup(fieldnames)
}
}
#[derive(Debug)]
pub struct ReaderColumns {
columns: ColumnSet,
}
impl ReaderColumns {
pub const fn new(columns: ColumnSet) -> Self {
Self { columns }
}
}
pub fn write_colname(w: &mut dyn Write, col: &OutCol, head: &StringLine) -> Result<()> {
if col.name.is_empty() {
w.write_all(head.get(col.num).as_bytes())?;
} else {
w.write_all(col.name.as_bytes())?;
}
Ok(())
}
impl ColumnFun for ReaderColumns {
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()> {
self.columns.write3(w, line, text)?;
Ok(())
}
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
for x in self.columns.get_cols() {
if x.name.is_empty() {
w.push(head.get(x.num))?;
} else {
w.push(&x.name)?;
}
}
Ok(())
}
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.columns.lookup(fieldnames)
}
}
#[derive(Default)]
pub struct Writer {
v: Vec<Box<dyn ColumnFun>>,
text: TextFileMode,
}
impl fmt::Debug for Writer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Writer")
}
}
impl Writer {
pub fn new(text: TextFileMode) -> Self {
Self {
v: Vec::new(),
text,
}
}
pub fn is_empty(&self) -> bool {
self.v.is_empty()
}
pub fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
for x in self.v.iter_mut() {
x.lookup(fieldnames)?
}
Ok(())
}
pub fn push(&mut self, x: Box<dyn ColumnFun>) {
self.v.push(x);
}
pub fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
for x in self.v.iter() {
x.add_names(w, head)?;
}
Ok(())
}
pub fn write(&mut self, w: &mut dyn Write, line: &TextLine) -> Result<()> {
let mut iter = self.v.iter_mut();
match iter.next() {
None => {}
Some(first) => {
first.write(w, line, &self.text)?;
for x in iter {
w.write_all(&[self.text.delim])?;
x.write(w, line, &self.text)?;
}
}
};
w.write_all(&[b'\n'])?;
Ok(())
}
}
#[derive(Debug, Clone, Default)]
pub struct NamedCol {
pub name: String,
pub num: usize,
pub from_end: usize,
}
impl fmt::Display for NamedCol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Column {}", self.num + 1)?;
if !self.name.is_empty() {
write!(f, " ({})", self.name)
} else if self.from_end != 0 {
write!(f, " (+{})", self.from_end)
} else {
Ok(())
}
}
}
impl NamedCol {
pub fn new() -> Self {
Self::default()
}
pub fn new_from(spec: &str) -> Result<Self> {
let mut x = Self::default();
let rest = x.parse(spec)?;
if rest.is_empty() {
Ok(x)
} else {
err!("Extra stuff {} at the end of column spec {}", rest, spec)
}
}
pub fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
if !self.name.is_empty() {
self.num = ColumnSet::lookup_col(fieldnames, &self.name)?
} else if self.from_end > 0 {
if self.from_end > fieldnames.len() {
return err!(
"Requested column +{}, but there are only {} columns",
self.from_end,
fieldnames.len()
);
}
self.num = fieldnames.len() - self.from_end
}
Ok(())
}
pub fn parse<'a>(&mut self, orig_spec: &'a str) -> Result<&'a str> {
let mut spec = orig_spec;
self.num = 0;
self.name.clear();
if spec.is_empty() {
return err!("Empty string found where column name expected");
}
let mut ch = spec.first();
let was_plus = ch == '+';
if was_plus {
ch = spec.take_first();
}
if ch.is_ascii_digit() && ch != '0' {
let (a, b) = spec.to_usize();
self.num = a;
spec = b;
if was_plus {
self.from_end = self.num;
} else {
self.num -= 1;
}
Ok(spec)
} else if ch.is_alphabetic() {
if was_plus {
return err!("'+' must be follwed by a number : {}", orig_spec);
}
let pos = get_col_name(spec);
self.name.clear();
self.name += &spec[0..pos];
Ok(&spec[pos..])
} else {
err!("Bad parse of compare spec for {}", spec)
}
}
}
pub fn get_col_name(spec: &str) -> usize {
if spec.is_empty() {
return 0;
}
let mut sp = spec;
let mut ch = sp.take_first();
if !ch.is_alphabetic() {
return 0;
}
while !sp.is_empty() {
ch = sp.first();
if ch.is_alphanumeric() || ch == '_' {
sp = sp.skip_first();
} else {
break;
}
}
spec.len() - sp.len()
}
#[derive(Debug, Clone, Default)]
struct CompositeColumnPart {
prefix: String,
col: NamedCol,
}
impl CompositeColumnPart {
fn new() -> Self {
Self::default()
}
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
self.col.lookup(fieldnames)
}
}
#[derive(Debug, Clone, Default)]
pub struct CompositeColumn {
parts: Vec<CompositeColumnPart>,
suffix: String,
name: String,
}
impl CompositeColumn {
pub fn new(s: &str) -> Result<Self> {
let mut c = Self::default();
c.set(s)?;
Ok(c)
}
pub fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
for x in &mut self.parts {
x.lookup(fieldnames)?;
}
Ok(())
}
pub fn get(&self, t: &mut Vec<u8>, fields: &[&[u8]]) {
t.clear();
for x in &self.parts {
t.extend(x.prefix.as_bytes());
t.extend(fields[x.col.num]);
}
t.extend(self.suffix.as_bytes());
}
pub fn set(&mut self, spec: &str) -> Result<()> {
let name = spec.split_once(':');
if name.is_none() {
return err!("Composite Column Spec must start with ColName: : {}", spec);
}
let name = name.unwrap();
self.name = name.0.to_string();
let mut curr = name.1;
self.suffix.clear();
self.parts.clear();
const TAG: char = '^';
while !curr.is_empty() {
let ch = curr.take_first();
if ch == TAG {
if curr.is_empty() {
self.suffix.push(TAG);
} else if curr.first() == TAG {
self.suffix.push(TAG);
curr = curr.skip_first();
} else {
let mut p = CompositeColumnPart::new();
std::mem::swap(&mut p.prefix, &mut self.suffix);
if curr.first() == '{' {
curr = curr.skip_first();
curr = p.col.parse(curr)?;
if curr.is_empty() || curr.first() != '}' {
return err!("Closing bracket not found : {}", spec);
}
curr = curr.skip_first();
} else {
curr = p.col.parse(curr)?;
}
self.parts.push(p);
}
} else {
self.suffix.push(ch);
}
}
Ok(())
}
}
impl ColumnFun for CompositeColumn {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()> {
for x in &self.parts {
text.write(w, x.prefix.as_bytes())?;
text.write(w, line.get(x.col.num))?;
}
text.write(w, self.suffix.as_bytes())?;
Ok(())
}
fn lookup(&mut self, fieldnames: &[&str]) -> Result<()> {
Self::lookup(self, fieldnames)
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! assert_err {
($expression:expr, $($pattern:tt)+) => {
match $expression {
$($pattern)+ => (),
ref e => panic!("expected `{}` but got `{:?}`", stringify!($($pattern)+), e),
}
}
}
#[test]
fn range() -> Result<()> {
let f: [&str; 5] = ["zero", "one", "two", "three", "four"];
let res: [OutCol; 5] = [
OutCol::from_num(0),
OutCol::from_num(1),
OutCol::from_num(2),
OutCol::from_num(3),
OutCol::from_num(4),
];
assert_eq!(
ColumnSet::range(&f, "(range,<p)")?,
[OutCol::from_num(1), OutCol::from_num(4)]
);
assert_eq!(ColumnSet::range(&f, "2-+2")?, res[1..=3]);
assert_eq!(ColumnSet::range(&f, "-")?, res);
assert_eq!(ColumnSet::range(&f, "2-")?, res[1..]);
assert_eq!(ColumnSet::range(&f, "-2")?, res[..2]);
assert_err!(ColumnSet::range(&f, "1-2-3"), Err(_));
Ok(())
}
#[test]
fn named_range() -> Result<()> {
let f: [&str; 5] = ["zero", "one", "two", "three", "four"];
let res: [OutCol; 5] = [
OutCol::new(0, "stuff"),
OutCol::new(1, "junk"),
OutCol::new(2, "this"),
OutCol::new(3, "that"),
OutCol::new(4, "other"),
];
assert_eq!(ColumnSet::range(&f, "stuff:1")?, res[0..1]);
assert_eq!(ColumnSet::ranges(&f, "stuff:1,junk:2")?, res[0..2]);
Ok(())
}
#[test]
fn do_get_col_name() {
assert_eq!(get_col_name(""), 0);
assert_eq!(get_col_name("..."), 0);
assert_eq!(get_col_name("_aaa"), 0);
assert_eq!(get_col_name("1aaa"), 0);
assert_eq!(get_col_name("abc"), 3);
assert_eq!(get_col_name("abc,"), 3);
assert_eq!(get_col_name("abc,aaa"), 3);
assert_eq!(get_col_name("a_b_c"), 5);
assert_eq!(get_col_name("a_ñ_c"), 6);
}
}