use crate::matcher::MatchMaker;
use crate::prelude::*;
use crate::util::find_close;
use std::collections::HashSet;
use std::str;
#[derive(Debug, Copy, Clone, Default)]
pub enum DupColHandling {
#[default]
Fail,
Allow,
Numeric,
}
impl DupColHandling {
pub fn new(spec: &str) -> Result<Self> {
if spec.eq_ignore_ascii_case("fail") {
Ok(Self::Fail)
} else if spec.eq_ignore_ascii_case("allow") {
Ok(Self::Allow)
} else if spec.eq_ignore_ascii_case("numeric") {
Ok(Self::Numeric)
} else {
err!("Duplicate Column Mode must be one of : Fail, Allow, Numeric")
}
}
}
#[derive(Default, Clone, Debug)]
struct NameMap {
from: String,
to: String,
}
impl NameMap {
fn new(spec: &str) -> Result<Self> {
if let Some((a, b)) = spec.split_once('.') {
Ok(Self { from: a.to_string(), to: b.to_string() })
} else {
err!("Format for rename is 'old.new' not '{spec}'")
}
}
}
impl fmt::Display for NameMap {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}.{}", self.from, self.to)
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnHeader {
cols: Vec<String>,
dups: DupColHandling,
rename: Vec<NameMap>,
sloppy: bool,
}
#[must_use]
pub fn is_valid_column_name(name: &str) -> bool {
get_col_name(name) == name.len()
}
fn make_valid_column_name(name: &str) -> String {
if name.is_empty() {
return "X_".to_string();
}
let mut out = String::new();
let mut sp = name;
let mut ch = sp.take_first();
if ch.is_alphabetic() {
out.push(ch);
} else {
out.push_str("X_");
}
while !sp.is_empty() {
ch = sp.first();
let nch = if ch.is_alphanumeric() { ch } else { '_' };
out.push(nch);
sp = sp.skip_first();
}
out
}
pub fn validate_column_name(name: &str) -> Result<()> {
if is_valid_column_name(name) { Ok(()) } else { err!("Invalid column name {}", name) }
}
#[must_use]
pub fn get_col(data: &[u8], col: usize, delim: u8) -> &[u8] {
for (n, s) in data.split(|ch| *ch == delim).enumerate() {
if n == col {
return if !s.is_empty() && s.last().unwrap() == &b'\n' {
&s[0..s.len() - 1]
} else {
s
};
}
}
&data[0..0]
}
impl ColumnHeader {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub const fn set_handling(&mut self, dups: DupColHandling) {
self.dups = dups;
}
pub fn clear(&mut self) {
self.cols.clear();
}
pub fn rename(&mut self, spec: &str) -> Result<()> {
if !spec.is_empty() {
for x in spec.split(',') {
self.rename.push(NameMap::new(x)?);
}
}
Ok(())
}
pub const fn rename_sloppy(&mut self) {
self.sloppy = true;
}
#[must_use]
pub fn field_names(&self) -> Vec<&str> {
let mut v: Vec<&str> = Vec::with_capacity(self.cols.len());
for x in &self.cols {
v.push(x);
}
v
}
pub fn push_all(&mut self, cols: &StringLine) -> Result<()> {
for x in cols {
self.push(x)?;
}
Ok(())
}
pub fn push_all_unchecked(&mut self, cols: &StringLine) {
for x in cols {
self.push_unchecked(x);
}
}
#[must_use]
pub fn contains(&self, name: &str) -> bool {
for x in &self.cols {
if x == name {
return true;
}
}
false
}
pub fn push_unchecked(&mut self, name: &str) {
self.cols.push(name.to_string());
}
pub fn check_rename(&self) -> Result<()> {
if !self.sloppy & !self.rename.is_empty() {
let mut s = self.rename[0].to_string();
for x in self.rename.iter().skip(1) {
s.push(',');
s.push_str(&x.to_string());
}
err!("Unused rename : '{}'", s)
} else {
Ok(())
}
}
fn valid_column_name(name: &str) -> String {
if is_valid_column_name(name) { name.to_string() } else { make_valid_column_name(name) }
}
pub fn push(&mut self, name: &str) -> Result<()> {
let name = Self::valid_column_name(name);
if self.contains(&name) {
let mut key: Option<usize> = None;
for (i, x) in self.rename.iter().enumerate() {
if x.from == name {
key = Some(i);
break;
}
}
if let Some(k) = key {
if self.contains(&self.rename[k].to) {
return err!(
"Applied rename of {} to {}, but {} was already used",
self.rename[k].from,
self.rename[k].to,
self.rename[k].to
);
}
self.cols.push(self.rename[k].to.clone());
self.rename.remove(k);
return Ok(());
}
match self.dups {
DupColHandling::Fail => return err!("Duplicate column name {}", name),
DupColHandling::Allow => {
eprintln!("Warning : creating duplicate column name {name}");
self.cols.push(name);
}
DupColHandling::Numeric => {
for x in 1..10000 {
let new_name = format!("{name}{x}");
if !self.contains(&new_name) {
self.cols.push(new_name);
break;
}
}
}
}
} else {
self.cols.push(name);
}
Ok(())
}
#[must_use]
pub fn get_head(&self, text: &TextFileMode) -> String {
let mut res = String::with_capacity(self.get_size() + 6);
if text.head_mode.has_cdx() {
res.push_str(" CDX");
res.push(text.delim as char);
}
self.add_head(&mut res, text);
res.push('\n');
res
}
#[must_use]
pub fn get_head_short(&self, text: &TextFileMode) -> String {
let mut res = String::with_capacity(self.get_size());
self.add_head(&mut res, text);
res
}
fn get_size(&self) -> usize {
self.cols.iter().map(String::len).sum::<usize>() + self.cols.len() - 1
}
#[allow(clippy::trivially_copy_pass_by_ref)]
fn add_head(&self, res: &mut String, text: &TextFileMode) {
if self.cols.is_empty() {
return;
}
res.push_str(&self.cols[0]);
for x in self.cols.iter().skip(1) {
res.push(text.delim as char);
res.push_str(x);
}
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnSet {
pub pos: Vec<String>,
pub neg: Vec<String>,
pub columns: Vec<OutCol>,
pub did_lookup: bool,
pub trans: TransList,
pub agg: AggList,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct OutCol {
pub num: usize,
pub name: String,
pub oc_num_trans: Option<usize>,
pub oc_num_agg: Option<usize>,
}
impl OutCol {
#[must_use]
pub fn new(num: usize, name: &str) -> Self {
Self { num, name: name.to_string(), oc_num_trans: None, oc_num_agg: None }
}
#[must_use]
pub const fn from_num(num: usize) -> Self {
Self { num, name: String::new(), oc_num_trans: None, oc_num_agg: None }
}
}
#[derive(Debug, Default, Clone)]
pub struct ScopedValue {
cols: ColumnSet,
value: String,
}
impl ScopedValue {
pub fn new(spec: &str, del: char) -> Result<Self> {
let mut s = Self::default();
if spec.is_empty() {
return Ok(s);
}
let mut spec = spec;
let ch = spec.first();
if ch == del {
spec.skip_first();
if spec.is_empty() {
s.value.push(del);
return Ok(s);
}
let ch = spec.first();
if ch == del {
s.value.push(del);
}
}
while !spec.is_empty() {
let ch = spec.take_first();
if ch == del {
s.cols.add_yes(spec)?;
return Ok(s);
}
s.value.push(ch);
}
Ok(s)
}
pub fn new2(value: &str, cols: &str) -> Result<Self> {
let mut s = Self::default();
s.cols.add_yes(cols)?;
s.value = value.to_string();
Ok(s)
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.cols.lookup(field_names)
}
}
#[derive(Debug, Default, Clone)]
pub struct ScopedValues {
default: String,
data: Vec<ScopedValue>,
has_value: Vec<bool>,
strings: Vec<String>,
ints: Vec<isize>,
floats: Vec<f64>,
}
impl ScopedValues {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn set_default(&mut self, d: &str) {
self.default = d.to_string();
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.has_value.clear();
self.has_value.resize(field_names.len(), false);
self.strings.clear();
self.strings.resize(field_names.len(), self.default.clone());
for x in &mut self.data {
x.lookup(field_names)?;
for i in 0..x.cols.columns.len() {
let j = x.cols.columns[i].num;
self.strings[j].clone_from(&x.value);
self.has_value[j] = true;
}
}
Ok(())
}
pub fn make_ints(&mut self) -> Result<()> {
self.ints.clear();
for i in 0..self.strings.len() {
self.ints[i] = self.get(i).to_isize_whole(self.get(i).as_bytes(), "number")?;
}
Ok(())
}
pub fn make_floats(&mut self) -> Result<()> {
self.floats.clear();
for i in 0..self.strings.len() {
self.floats[i] = self.get(i).to_f64_whole(self.get(i).as_bytes(), "float")?;
}
Ok(())
}
pub fn add(&mut self, spec: &str, del: char) -> Result<()> {
self.data.push(ScopedValue::new(spec, del)?);
Ok(())
}
pub fn add2(&mut self, value: &str, cols: &str) -> Result<()> {
self.data.push(ScopedValue::new2(value, cols)?);
Ok(())
}
#[must_use]
pub fn get(&self, col: usize) -> &str {
if col < self.strings.len() { &self.strings[col] } else { self.strings.last().unwrap() }
}
#[must_use]
pub fn get_int(&self, col: usize) -> isize {
if col < self.ints.len() { self.ints[col] } else { *self.ints.last().unwrap() }
}
#[must_use]
pub fn get_float(&self, col: usize) -> f64 {
if col < self.floats.len() { self.floats[col] } else { *self.floats.last().unwrap() }
}
#[must_use]
pub fn has_value(&self, col: usize) -> bool {
if col < self.has_value.len() { self.has_value[col] } else { false }
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.data.is_empty()
}
}
pub trait ColumnFun {
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()>;
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()>;
fn lookup(&mut self, field_names: &[&str]) -> Result<()>;
}
#[derive(Debug, Default)]
pub struct ColumnExpr {
name: String,
expr: Expr,
}
impl ColumnExpr {
pub fn new(spec: &str) -> Result<Self> {
if let Some((a, b)) = spec.split_once(':') {
Ok(Self { name: a.to_string(), expr: Expr::new(b)? })
} else {
Ok(Self { name: String::new(), expr: Expr::new(spec)? })
}
}
}
impl ColumnFun for ColumnExpr {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, _text: &TextFileMode) -> Result<()> {
write!(w, "{}", self.expr.eval(line))?;
Ok(())
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.expr.lookup(field_names)
}
}
#[derive(Debug, Default)]
pub struct ColumnSingle {
col: NamedCol,
new_name: String,
}
impl ColumnSingle {
#[must_use]
pub fn with_named_col(col: &NamedCol) -> Self {
Self { col: col.clone(), new_name: String::new() }
}
pub fn with_name(col: &str) -> Result<Self> {
Ok(Self { col: NamedCol::new_from(col)?, new_name: String::new() })
}
}
impl ColumnFun for ColumnSingle {
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
if self.new_name.is_empty() { w.push(&head[self.col.num]) } else { w.push(&self.new_name) }
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()> {
text.write(w, &line[self.col.num])?;
Ok(())
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.col.lookup(field_names)
}
}
#[derive(Debug, Default, Copy, Clone)]
pub struct ColumnWhole;
impl ColumnFun for ColumnWhole {
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
w.push_all(head)
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, _text: &TextFileMode) -> Result<()> {
w.write_all(&line.line()[0..line.line().len() - 1])?;
Ok(())
}
fn lookup(&mut self, _field_names: &[&str]) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnCount {
num: isize,
name: String,
}
impl ColumnCount {
#[must_use]
pub fn new(num: isize, name: &str) -> Self {
Self { num, name: name.to_string() }
}
}
impl ColumnFun for ColumnCount {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, _line: &TextLine, _text: &TextFileMode) -> Result<()> {
w.write_all(self.num.to_string().as_bytes())?;
self.num += 1;
Ok(())
}
fn lookup(&mut self, _field_names: &[&str]) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Default, Clone)]
pub struct ColumnLiteral {
value: Vec<u8>,
name: String,
}
impl ColumnLiteral {
#[must_use]
pub fn new(value: &[u8], name: &str) -> Self {
Self { value: value.to_vec(), name: name.to_string() }
}
}
impl ColumnFun for ColumnLiteral {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, _line: &TextLine, text: &TextFileMode) -> Result<()> {
text.write(w, &self.value)?;
Ok(())
}
fn lookup(&mut self, _field_names: &[&str]) -> Result<()> {
Ok(())
}
}
impl fmt::Debug for dyn ColumnFun {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "ColumnFun")
}
}
impl ColumnSet {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.pos.is_empty() && self.neg.is_empty()
}
pub fn from_spec(spec: &str) -> Result<Self> {
let mut s = Self::default();
s.add_yes(spec)?;
Ok(s)
}
pub fn add_yes(&mut self, spec: &str) -> Result<()> {
self.add(spec, false)
}
pub fn add_no(&mut self, spec: &str) -> Result<()> {
self.add(spec, true)
}
pub fn add_one(&mut self, s: &str, negate: bool) {
if let Some(stripped) = s.strip_prefix('~') {
let st = stripped.to_string();
if negate {
self.pos.push(st);
} else {
self.neg.push(st);
}
} else {
let st = s.to_string();
if negate {
self.neg.push(st);
} else {
self.pos.push(st);
}
}
}
fn find_trans(spec: &str) -> Option<(&str, &str)> {
let mut last_was_plus = false;
for (i, x) in spec.char_indices() {
if last_was_plus {
if x.is_alphabetic() {
return Some((&spec[0..i - 1], &spec[i..]));
}
last_was_plus = false;
} else if x == '+' {
last_was_plus = true;
}
}
None
}
pub fn add(&mut self, in_spec: &str, negate: bool) -> Result<()> {
let mut spc = in_spec;
if let Some((a, b)) = spc.split_once("::") {
spc = a;
for a in b.split('+') {
self.agg.push(a)?;
}
}
if let Some((a, b)) = Self::find_trans(spc) {
spc = a;
self.trans = TransList::new(b)?;
}
loop {
if spc.first() == '(' {
let pos = find_close(spc)?;
self.add_one(&spc[..pos], negate);
spc = &spc[pos..];
if spc.is_empty() {
break;
}
} else if let Some((a, b)) = spc.split_once(',') {
self.add_one(a, negate);
spc = b;
} else {
self.add_one(spc, negate);
break;
}
}
Ok(())
}
pub fn lookup_col(field_names: &[&str], colname: &str) -> Result<usize> {
for f in field_names.iter().enumerate() {
if f.1 == &colname {
return Ok(f.0);
}
}
err!("{} not found", colname)
}
pub fn single(field_names: &[&str], colname: &str) -> Result<usize> {
if let Some(stripped) = colname.strip_prefix('+') {
let n = stripped.to_usize_whole(colname.as_bytes(), "column")?;
let len = field_names.len();
if n > len { err!("Column {} out of bounds", colname) } else { Ok(len - n) }
} else {
let ch = colname.first();
if ch.is_ascii_digit() && ch != '0' {
let n = colname.to_usize_whole(colname.as_bytes(), "column")?;
if n < 1 { err!("Column {} out of bounds", colname) } else { Ok(n - 1) }
} else {
Self::lookup_col(field_names, colname)
}
}
}
fn match_range(field_names: &[&str], rng: &str) -> Result<Vec<usize>> {
let mut ret = Vec::new();
let c = MatchMaker::make(rng)?;
for f in field_names.iter().enumerate() {
if c.smatch(f.1) {
ret.push(f.0);
}
}
Ok(ret)
}
fn to_outcols(data: &[usize], name: &str) -> Vec<OutCol> {
let mut ret = Vec::with_capacity(data.len());
for x in data {
ret.push(OutCol::new(*x, name));
}
ret
}
pub fn ranges(field_names: &[&str], rng: &str) -> Result<Vec<OutCol>> {
let mut c = Self::new();
c.add_yes(rng)?;
c.lookup(field_names)?;
Ok(c.get_cols_full())
}
fn range(field_names: &[&str], rng: &str) -> Result<Vec<OutCol>> {
if rng.is_empty() {
return err!("Empty Range {}", rng);
}
let (name, range) = match rng.split_once(':') {
None => ("", rng),
Some(x) => x,
};
let ch = range.first();
if ch == '(' {
return Ok(Self::to_outcols(
&Self::match_range(field_names, &range[1..range.len() - 1])?,
name,
));
}
let mut parts: Vec<&str> = range.split('-').collect();
if parts.len() > 2 {
return err!("Malformed Range {}", rng);
}
if parts[0].is_empty() {
parts[0] = "1";
}
let start: usize;
let end = if parts.len() == 1 {
start = Self::single(field_names, parts[0])?;
start
} else {
if parts[1].is_empty() {
parts[1] = "+1";
}
start = Self::single(field_names, parts[0])?;
Self::single(field_names, parts[1])?
};
if start > end {
return err!("Start greater than end : {}", rng);
}
let mut res = Vec::new();
for i in start..=end {
res.push(OutCol::new(i, name));
}
Ok(res)
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.trans.lookup(field_names)?;
self.did_lookup = true;
self.columns = Vec::new();
let mut no_cols: HashSet<usize> = HashSet::new();
for s in &self.neg {
for x in Self::range(field_names, s)? {
no_cols.insert(x.num);
}
}
if self.pos.is_empty() {
for x in 0..field_names.len() {
if !no_cols.contains(&x) {
self.columns.push(OutCol::from_num(x));
}
}
} else {
for s in &self.pos {
for x in Self::range(field_names, s)? {
if !no_cols.contains(&x.num) {
self.columns.push(x);
}
}
}
}
Ok(())
}
pub fn select<T: AsRef<str> + Clone>(&self, cols: &[T], result: &mut Vec<T>) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
result.clear();
for x in &self.columns {
if x.num < cols.len() {
result.push(cols[x.num].clone());
} else {
return err!(
"Line has only {} columns, but column {} was requested.",
cols.len(),
x.num + 1
);
}
}
Ok(())
}
pub fn write(&self, w: &mut dyn Write, cols: &[&str], delim: &str) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
let mut iter = self.columns.iter();
match iter.next() {
None => {}
Some(first) => {
w.write_all(cols[first.num].as_bytes())?;
for x in iter {
w.write_all(delim.as_bytes())?;
if x.num < cols.len() {
w.write_all(cols[x.num].as_bytes())?;
} else {
return err!(
"Line has only {} columns, but column {} was requested.",
cols.len(),
x.num + 1
);
}
}
}
}
w.write_all(b"\n")?;
Ok(())
}
pub fn write2(&self, w: &mut dyn Write, cols: &TextLine, delim: u8) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
let mut iter = self.columns.iter();
match iter.next() {
None => {}
Some(first) => {
w.write_all(&cols[first.num])?;
for x in iter {
w.write_all(&[delim])?;
w.write_all(cols.get(x.num))?;
}
}
}
w.write_all(b"\n")?;
Ok(())
}
fn fetch<'a>(
col: &'a OutCol,
mut_trans: &'a mut TransList,
cols: &'a TextLine,
) -> Result<&'a [u8]> {
mut_trans.trans(cols.get(col.num), cols)
}
pub fn write3(
&mut self,
w: &mut dyn Write,
cols: &TextLine,
text: &TextFileMode,
) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
if self.columns.is_empty() {
return Ok(());
}
if self.agg.is_empty() {
text.write(w, Self::fetch(&self.columns[0], &mut self.trans, cols)?)?;
for i in 1..self.columns.len() {
w.write_all(&[text.delim])?;
text.write(w, Self::fetch(&self.columns[i], &mut self.trans, cols)?)?;
}
} else {
let mut is_first = true;
for agg in &mut self.agg.v {
agg.reset();
for i in 0..self.columns.len() {
let val = Self::fetch(&self.columns[i], &mut self.trans, cols)?;
agg.add(val);
}
if is_first {
is_first = false;
} else {
w.write_all(&[text.delim])?;
}
agg.write(w, cols, text)?;
}
}
Ok(())
}
pub fn write3s(&self, w: &mut dyn Write, cols: &StringLine, delim: u8) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
let mut iter = self.columns.iter();
match iter.next() {
None => {}
Some(first) => {
w.write_all(cols.get(first.num).as_bytes())?;
for x in iter {
w.write_all(&[delim])?;
w.write_all(cols.get(x.num).as_bytes())?;
}
}
}
Ok(())
}
pub fn write_sloppy(&self, cols: &[&str], rest: &str, w: &mut dyn Write) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
for x in &self.columns {
if x.num < cols.len() {
w.write_all(cols[x.num].as_bytes())?;
} else {
w.write_all(rest.as_bytes())?;
}
}
Ok(())
}
pub fn select_sloppy<T: AsRef<str> + Clone>(
&self,
cols: &[T],
restval: &T,
result: &mut Vec<T>,
) -> Result<()> {
if !self.did_lookup {
return cdx_err(CdxError::NeedLookup);
}
result.clear();
for x in &self.columns {
if x.num < cols.len() {
result.push(cols[x.num].clone());
} else {
result.push(restval.clone());
}
}
Ok(())
}
#[must_use]
pub const fn get_cols(&self) -> &Vec<OutCol> {
&self.columns
}
#[allow(clippy::missing_const_for_fn)] #[must_use]
pub fn get_cols_full(self) -> Vec<OutCol> {
self.columns
}
#[must_use]
pub fn get_cols_num(&self) -> Vec<usize> {
let mut v = Vec::with_capacity(self.columns.len());
for x in &self.columns {
v.push(x.num);
}
v
}
pub fn lookup_cols(spec: &str, names: &[&str]) -> Result<Vec<usize>> {
let mut s = Self::new();
s.add_yes(spec)?;
s.lookup(names)?;
Ok(s.get_cols_num())
}
pub fn lookup_cols_full(spec: &str, names: &[&str]) -> Result<Vec<OutCol>> {
let mut s = Self::new();
s.add_yes(spec)?;
s.lookup(names)?;
Ok(s.get_cols_full())
}
pub fn lookup1(spec: &str, names: &[&str]) -> Result<usize> {
let mut s = Self::new();
s.add_yes(spec)?;
s.lookup(names)?;
if s.get_cols().len() != 1 {
return err!(
"Spec {} resolves to {} columns, rather than a single column",
spec,
s.get_cols().len()
);
}
Ok(s.get_cols_num()[0])
}
}
pub struct ColumnClump {
cols: Box<dyn ColumnFun>,
name: String,
text: TextFileMode,
}
impl fmt::Debug for ColumnClump {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ColumnClump")
}
}
impl ColumnClump {
#[must_use]
pub fn new(cols: Box<dyn ColumnFun>, name: &str, delim: u8) -> Self {
let text = TextFileMode { delim, ..Default::default() };
Self { cols, name: name.to_string(), text }
}
pub fn from_spec(orig_spec: &str) -> Result<Self> {
let mut spec = orig_spec;
if spec.is_empty() {
return err!("Can't construct a group from an empty string");
}
let delim = spec.take_first();
if delim.len_utf8() != 1 {
return err!("Delimiter must be a plain ascii character : {}", orig_spec);
}
let parts = spec.split_once(':');
if parts.is_none() {
return err!("No colon found. Group format is DelimOutname:Columns : {}", orig_spec);
}
let parts = parts.unwrap();
let mut g = ColumnSet::new();
g.add_yes(parts.1)?;
let cols = Box::new(ReaderColumns::new(g));
let text = TextFileMode { delim: delim as u8, ..Default::default() };
Ok(Self { cols, name: parts.0.to_string(), text })
}
}
impl ColumnFun for ColumnClump {
fn write(&mut self, w: &mut dyn Write, line: &TextLine, _text: &TextFileMode) -> Result<()> {
self.cols.write(w, line, &self.text)?;
Ok(())
}
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.cols.lookup(field_names)
}
}
#[derive(Debug)]
pub struct ReaderColumns {
columns: ColumnSet,
}
impl ReaderColumns {
#[must_use]
pub const fn new(columns: ColumnSet) -> Self {
Self { columns }
}
}
pub fn write_colname(w: &mut dyn Write, col: &OutCol, head: &StringLine) -> Result<()> {
if col.name.is_empty() {
w.write_all(head.get(col.num).as_bytes())?;
} else {
w.write_all(col.name.as_bytes())?;
}
Ok(())
}
impl ColumnFun for ReaderColumns {
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()> {
self.columns.write3(w, line, text)?;
Ok(())
}
fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
for x in self.columns.get_cols() {
if x.name.is_empty() {
w.push(head.get(x.num))?;
} else {
w.push(&x.name)?;
}
}
Ok(())
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.columns.lookup(field_names)
}
}
#[derive(Default)]
pub struct Writer {
v: Vec<Box<dyn ColumnFun>>,
text: TextFileMode,
}
impl fmt::Debug for Writer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Writer")
}
}
impl Writer {
#[must_use]
pub fn new(text: TextFileMode) -> Self {
Self { v: Vec::new(), text }
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.v.is_empty()
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
for x in &mut self.v {
x.lookup(field_names)?;
}
Ok(())
}
pub fn push(&mut self, x: Box<dyn ColumnFun>) {
self.v.push(x);
}
pub fn add_names(&self, w: &mut ColumnHeader, head: &StringLine) -> Result<()> {
for x in &self.v {
x.add_names(w, head)?;
}
Ok(())
}
pub fn write(&mut self, w: &mut dyn Write, line: &TextLine) -> Result<()> {
let mut iter = self.v.iter_mut();
match iter.next() {
None => {}
Some(first) => {
first.write(w, line, &self.text)?;
for x in iter {
w.write_all(&[self.text.delim])?;
x.write(w, line, &self.text)?;
}
}
}
w.write_all(b"\n")?;
Ok(())
}
}
#[derive(Debug, Clone, Default)]
pub struct NamedCol {
pub name: String,
pub num: usize,
pub from_end: usize,
}
impl fmt::Display for NamedCol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Column {}", self.num + 1)?;
if !self.name.is_empty() {
write!(f, " ({})", self.name)
} else if self.from_end != 0 {
write!(f, " (+{})", self.from_end)
} else {
Ok(())
}
}
}
impl NamedCol {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn new_from(spec: &str) -> Result<Self> {
let mut x = Self::default();
let rest = x.parse(spec)?;
if rest.is_empty() {
Ok(x)
} else {
err!("Extra stuff {} at the end of column spec {}", rest, spec)
}
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
if !self.name.is_empty() {
self.num = ColumnSet::lookup_col(field_names, &self.name)?;
} else if self.from_end > 0 {
if self.from_end > field_names.len() {
return err!(
"Requested column +{}, but there are only {} columns",
self.from_end,
field_names.len()
);
}
self.num = field_names.len() - self.from_end;
}
Ok(())
}
pub fn parse<'a>(&mut self, orig_spec: &'a str) -> Result<&'a str> {
let mut spec = orig_spec;
self.num = 0;
self.name.clear();
if spec.is_empty() {
return err!("Empty string found where column name expected");
}
let mut ch = spec.first();
let was_plus = ch == '+';
if was_plus {
ch = spec.take_first();
}
if ch.is_ascii_digit() && ch != '0' {
let (a, b) = spec.to_usize();
self.num = a;
spec = b;
if was_plus {
self.from_end = self.num;
} else {
self.num -= 1;
}
Ok(spec)
} else if ch.is_alphabetic() {
if was_plus {
return err!("'+' must be followed by a number : {}", orig_spec);
}
let pos = get_col_name(spec);
self.name.clear();
self.name += &spec[0..pos];
Ok(&spec[pos..])
} else {
err!("Bad parse of compare spec for {}", spec)
}
}
}
#[must_use]
pub fn get_col_name(spec: &str) -> usize {
if spec.is_empty() {
return 0;
}
let mut sp = spec;
let mut ch = sp.take_first();
if !ch.is_alphabetic() {
return 0;
}
while !sp.is_empty() {
ch = sp.first();
if ch.is_alphanumeric() || ch == '_' {
sp = sp.skip_first();
} else {
break;
}
}
spec.len() - sp.len()
}
#[derive(Debug, Clone, Default)]
struct CompositeColumnPart {
prefix: String,
col: NamedCol,
}
impl CompositeColumnPart {
fn new() -> Self {
Self::default()
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.col.lookup(field_names)
}
}
#[derive(Debug, Clone, Default)]
pub struct CompositeColumn {
parts: Vec<CompositeColumnPart>,
suffix: String,
name: String,
}
impl CompositeColumn {
pub fn new(s: &str) -> Result<Self> {
let mut c = Self::default();
c.set(s)?;
Ok(c)
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
for x in &mut self.parts {
x.lookup(field_names)?;
}
Ok(())
}
pub fn get(&self, t: &mut Vec<u8>, fields: &[&[u8]]) {
t.clear();
for x in &self.parts {
t.extend(x.prefix.as_bytes());
t.extend(fields[x.col.num]);
}
t.extend(self.suffix.as_bytes());
}
pub fn set(&mut self, spec: &str) -> Result<()> {
const TAG: char = '^';
let name = spec.split_once(':');
if name.is_none() {
return err!("Composite Column Spec must start with ColName: : {}", spec);
}
let name = name.unwrap();
self.name = name.0.to_string();
let mut curr = name.1;
self.suffix.clear();
self.parts.clear();
while !curr.is_empty() {
let ch = curr.take_first();
if ch == TAG {
if curr.is_empty() {
self.suffix.push(TAG);
} else if curr.first() == TAG {
self.suffix.push(TAG);
curr = curr.skip_first();
} else {
let mut p = CompositeColumnPart::new();
std::mem::swap(&mut p.prefix, &mut self.suffix);
if curr.first() == '{' {
curr = curr.skip_first();
curr = p.col.parse(curr)?;
if curr.is_empty() || curr.first() != '}' {
return err!("Closing bracket not found : {}", spec);
}
curr = curr.skip_first();
} else {
curr = p.col.parse(curr)?;
}
self.parts.push(p);
}
} else {
self.suffix.push(ch);
}
}
Ok(())
}
}
impl ColumnFun for CompositeColumn {
fn add_names(&self, w: &mut ColumnHeader, _head: &StringLine) -> Result<()> {
w.push(&self.name)
}
fn write(&mut self, w: &mut dyn Write, line: &TextLine, text: &TextFileMode) -> Result<()> {
for x in &self.parts {
text.write(w, x.prefix.as_bytes())?;
text.write(w, line.get(x.col.num))?;
}
text.write(w, self.suffix.as_bytes())?;
Ok(())
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
Self::lookup(self, field_names)
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! assert_err {
($expression:expr, $($pattern:tt)+) => {
match $expression {
$($pattern)+ => (),
ref e => panic!("expected `{}` but got `{:?}`", stringify!($($pattern)+), e),
}
}
}
#[test]
fn range() -> Result<()> {
crate::util::init()?;
let f: [&str; 5] = ["zero", "one", "two", "three", "four"];
let res: [OutCol; 5] = [
OutCol::from_num(0),
OutCol::from_num(1),
OutCol::from_num(2),
OutCol::from_num(3),
OutCol::from_num(4),
];
assert_eq!(ColumnSet::range(&f, "(range,<p)")?, [OutCol::from_num(1), OutCol::from_num(4)]);
assert_eq!(ColumnSet::range(&f, "2-+2")?, res[1..=3]);
assert_eq!(ColumnSet::range(&f, "-")?, res);
assert_eq!(ColumnSet::range(&f, "2-")?, res[1..]);
assert_eq!(ColumnSet::range(&f, "-2")?, res[..2]);
assert_err!(ColumnSet::range(&f, "1-2-3"), Err(_));
Ok(())
}
#[test]
fn named_range() -> Result<()> {
crate::util::init()?;
let f: [&str; 5] = ["zero", "one", "two", "three", "four"];
let res: [OutCol; 5] = [
OutCol::new(0, "stuff"),
OutCol::new(1, "junk"),
OutCol::new(2, "this"),
OutCol::new(3, "that"),
OutCol::new(4, "other"),
];
assert_eq!(ColumnSet::range(&f, "stuff:1")?, res[0..1]);
assert_eq!(ColumnSet::ranges(&f, "stuff:1,junk:2")?, res[0..2]);
Ok(())
}
#[test]
fn do_get_col_name() -> Result<()> {
crate::util::init()?;
assert_eq!(get_col_name(""), 0);
assert_eq!(get_col_name("..."), 0);
assert_eq!(get_col_name("_aaa"), 0);
assert_eq!(get_col_name("1aaa"), 0);
assert_eq!(get_col_name("abc"), 3);
assert_eq!(get_col_name("abc,"), 3);
assert_eq!(get_col_name("abc,aaa"), 3);
assert_eq!(get_col_name("a_b_c"), 5);
assert_eq!(get_col_name("a_ñ_c"), 6);
Ok(())
}
}