use crate::prelude::*;
use crate::util::FakeSlice;
use base64::Engine;
use base64::engine::general_purpose;
use std::sync::Mutex;
pub trait Trans {
fn trans(&mut self, src: &[u8], cont: &TextLine, dst: &mut Vec<u8>) -> Result<()>;
fn lookup(&mut self, _field_names: &[&str]) -> Result<()> {
Ok(())
}
}
#[derive(Default, Clone, Debug)]
struct SelectTrans {
cols: ColumnSet,
in_mode: TextFileMode,
out_mode: TextFileMode,
text: TextLine,
}
impl SelectTrans {
#[must_use]
const fn mode(delim: char) -> TextFileMode {
TextFileMode {
head_mode: crate::util::HeadMode::No,
col_mode: crate::util::QuoteMode::Plain,
delim: crate::util::auto_escape(delim),
line_break: b'\n',
repl: b' ',
}
}
fn new(spec: &str) -> Result<Self> {
let mut chars = spec.chars();
match (chars.next(), chars.next()) {
(Some(in_delim), Some(out_delim)) => {
let in_mode = Self::mode(in_delim);
let out_mode = Self::mode(out_delim);
let cols = ColumnSet::from_spec(chars.as_str())?;
Ok(Self { cols, in_mode, out_mode, text: TextLine::default() })
}
_ => err!("Invalid select spec : {}", spec),
}
}
}
impl Trans for SelectTrans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
self.text.line.clear();
self.text.line.extend_from_slice(src);
self.text.split(&self.in_mode);
self.cols.write3(dst, &self.text, &self.out_mode)
}
fn lookup(&mut self, _field_names: &[&str]) -> Result<()> {
self.cols.lookup(&[])?;
Ok(())
}
}
struct BytesTrans {
v: Vec<FakeSlice>,
}
impl BytesTrans {
fn new(spec: &str) -> Result<Self> {
let mut v = Vec::new();
for x in spec.split(',') {
v.push(FakeSlice::new(x)?);
}
Ok(Self { v })
}
}
impl Trans for BytesTrans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
for x in &self.v {
dst.extend(x.get_safe(src));
}
Ok(())
}
}
struct Base64Trans {
encode: bool,
}
impl Base64Trans {
const fn new(encode: bool) -> Self {
Self { encode }
}
}
impl Trans for Base64Trans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
if self.encode {
dst.resize(src.len() * 4 / 3 + 4, 0);
let bytes_written = general_purpose::STANDARD.encode_slice(src, dst)?;
dst.truncate(bytes_written);
} else {
general_purpose::STANDARD.decode_vec(src, dst)?;
}
Ok(())
}
}
struct LowerTrans {}
impl Trans for LowerTrans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
for x in src {
dst.push(x.to_ascii_lowercase());
}
Ok(())
}
}
#[derive(Default)]
struct LowerUtfTrans {
tmp: String,
}
impl Trans for LowerUtfTrans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
String::from_utf8_lossy(src).as_ref().assign_lower(&mut self.tmp);
dst.extend(self.tmp.as_bytes());
Ok(())
}
}
#[derive(Default)]
struct UpperUtfTrans {
tmp: String,
}
impl Trans for UpperUtfTrans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
String::from_utf8_lossy(src).as_ref().assign_upper(&mut self.tmp);
dst.extend(self.tmp.as_bytes());
Ok(())
}
}
struct UpperTrans {}
impl Trans for UpperTrans {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
for x in src {
dst.push(x.to_ascii_uppercase());
}
Ok(())
}
}
struct NormSpace {}
impl Trans for NormSpace {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
let mut need_space = false;
for x in src {
if *x <= b' ' {
need_space = true;
} else {
if need_space && !dst.is_empty() {
dst.push(b' ');
need_space = false;
}
dst.push(*x);
}
}
Ok(())
}
}
#[derive(Default)]
struct NormSpaceUtf8 {
tmp: String,
}
impl Trans for NormSpaceUtf8 {
fn trans(&mut self, src: &[u8], _cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
let mut need_space = false;
self.tmp.clear();
for x in String::from_utf8_lossy(src).chars() {
if x.is_whitespace() || x == std::char::REPLACEMENT_CHARACTER {
need_space = true;
} else {
if need_space && !self.tmp.is_empty() {
self.tmp.push(' ');
need_space = false;
}
self.tmp.push(x);
}
}
dst.extend(self.tmp.bytes());
Ok(())
}
}
#[derive(Debug, Default, Copy, Clone)]
pub struct TransSettings {
utf8: bool,
}
pub struct Transform {
pub spec: String,
pub conf: TransSettings,
pub trans: Box<dyn Trans>,
}
impl fmt::Debug for Transform {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Transform {}", self.spec)
}
}
impl Clone for Transform {
fn clone(&self) -> Self {
TransMaker::make(&self.spec).unwrap()
}
}
impl Transform {
fn trans(&mut self, src: &[u8], cont: &TextLine, dst: &mut Vec<u8>) -> Result<()> {
self.trans.trans(src, cont, dst)
}
fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
self.trans.lookup(field_names)
}
}
#[derive(Default, Clone, Debug)]
pub struct TransList {
v: Vec<Transform>,
tmp1: Vec<u8>,
tmp2: Vec<u8>,
}
impl TransList {
pub fn new(spec: &str) -> Result<Self> {
let mut s = Self::default();
for x in spec.split('+') {
s.push(x)?;
}
Ok(s)
}
pub fn push(&mut self, spec: &str) -> Result<()> {
self.v.push(TransMaker::make(spec)?);
Ok(())
}
pub fn trans<'a>(&'a mut self, src: &'a [u8], cont: &'a TextLine) -> Result<&'a [u8]> {
if self.v.is_empty() {
return Ok(src);
}
let mut use_1 = true;
for (i, x) in self.v.iter_mut().enumerate() {
if i == 0 {
self.tmp1.clear();
x.trans(src, cont, &mut self.tmp1)?;
} else if i % 2 == 0 {
self.tmp1.clear();
x.trans(&self.tmp2, cont, &mut self.tmp1)?;
use_1 = true;
} else {
self.tmp2.clear();
x.trans(&self.tmp1, cont, &mut self.tmp2)?;
use_1 = false;
}
}
if use_1 { Ok(&self.tmp1) } else { Ok(&self.tmp2) }
}
pub fn lookup(&mut self, field_names: &[&str]) -> Result<()> {
for x in &mut self.v {
x.lookup(field_names)?;
}
Ok(())
}
}
type MakerBox = Box<dyn Fn(&TransSettings, &str) -> Result<Box<dyn Trans>> + Send>;
struct TransMakerItem {
tag: &'static str,
help: &'static str,
maker: MakerBox,
}
struct TransMakerAlias {
old_name: &'static str,
new_name: &'static str,
}
static TRANS_MAKER: Mutex<Vec<TransMakerItem>> = Mutex::new(Vec::new());
static TRANS_ALIAS: Mutex<Vec<TransMakerAlias>> = Mutex::new(Vec::new());
const MODIFIERS: &[&str] = &["utf8"];
#[derive(Debug, PartialEq, Eq, Copy, Clone, Default, Hash)]
pub struct TransMaker {}
impl TransMaker {
pub(crate) fn init() -> Result<()> {
if !TRANS_MAKER.lock().unwrap().is_empty() {
return err!("Double init of TransMaker not allowed");
}
Self::do_add_alias("lower", "lowercase")?;
Self::do_add_alias("upper", "uppercase")?;
Self::do_push("normspace", "normalize white space", |c, _p: &str| {
if c.utf8 { Ok(Box::<NormSpaceUtf8>::default()) } else { Ok(Box::new(NormSpace {})) }
})?;
Self::do_push("lower", "make lower case", |c, _p| {
if c.utf8 { Ok(Box::<LowerUtfTrans>::default()) } else { Ok(Box::new(LowerTrans {})) }
})?;
Self::do_push("upper", "make upper case", |c, _p| {
if c.utf8 { Ok(Box::<UpperUtfTrans>::default()) } else { Ok(Box::new(UpperTrans {})) }
})?;
Self::do_push("from_base64", "decode base64 encoding", |_c, _p| {
Ok(Box::new(Base64Trans::new(false)))
})?;
Self::do_push("to_base64", "encode base64 encoding", |_c, _p| {
Ok(Box::new(Base64Trans::new(true)))
})?;
Self::do_push("bytes", "Select bytes from value", |_c, p| {
Ok(Box::new(BytesTrans::new(p)?))
})?;
Self::do_push("select", "Select sub-columns from value", |_c, p| {
Ok(Box::new(SelectTrans::new(p)?))
})?;
Ok(())
}
pub fn push<F>(tag: &'static str, help: &'static str, maker: F) -> Result<()>
where
F: Fn(&TransSettings, &str) -> Result<Box<dyn Trans>> + Send + 'static,
{
Self::do_push(tag, help, maker)
}
pub fn add_alias(old_name: &'static str, new_name: &'static str) -> Result<()> {
Self::do_add_alias(old_name, new_name)
}
fn resolve_alias(name: &str) -> &str {
for x in TRANS_ALIAS.lock().unwrap().iter_mut() {
if x.new_name == name {
return x.old_name;
}
}
name
}
fn do_add_alias(old_name: &'static str, new_name: &'static str) -> Result<()> {
if MODIFIERS.contains(&new_name) {
return err!(
"You can't add an alias named {new_name} because that is reserved for a modifier"
);
}
let m = TransMakerAlias { old_name, new_name };
let mut mm = TRANS_ALIAS.lock().unwrap();
for x in mm.iter_mut() {
if x.new_name == m.new_name {
*x = m;
return Ok(());
}
}
mm.push(m);
drop(mm);
Ok(())
}
fn do_push<F>(tag: &'static str, help: &'static str, maker: F) -> Result<()>
where
F: Fn(&TransSettings, &str) -> Result<Box<dyn Trans>> + Send + 'static,
{
if MODIFIERS.contains(&tag) {
return err!(
"You can't add a trans named {tag} because that is reserved for a modifier"
);
}
let m = TransMakerItem { tag, help, maker: Box::new(maker) };
let mut mm = TRANS_MAKER.lock().unwrap();
for x in mm.iter_mut() {
if x.tag == m.tag {
*x = m;
return Ok(());
}
}
mm.push(m);
drop(mm);
Ok(())
}
pub fn help() {
println!("Modifiers :");
println!("utf8 Operations are on utf8 strings, rather than the default u8 bytes.");
println!("Methods :");
let mut results = Vec::new();
for x in &*TRANS_MAKER.lock().unwrap() {
results.push(format!("{:12}{}", x.tag, x.help));
}
results.sort();
for x in results {
println!("{x}");
}
println!("See also https://avjewe.github.io/cdxdoc/Transform.html.");
}
pub fn make2(trans: &str, pattern: &str) -> Result<Transform> {
let mut spec = trans.to_string();
if !pattern.is_empty() {
spec.push(',');
spec.push_str(pattern);
}
let mut conf = TransSettings::default();
let mut kind = "";
if !trans.is_empty() {
for x in trans.split('.') {
if x.eq_ignore_ascii_case("utf8") {
conf.utf8 = true;
} else {
kind = x;
}
}
kind = Self::resolve_alias(kind);
}
Ok(Transform { spec, conf, trans: Self::make_box(kind, &conf, pattern)? })
}
pub fn make_box(kind: &str, conf: &TransSettings, pattern: &str) -> Result<Box<dyn Trans>> {
for x in &*TRANS_MAKER.lock().unwrap() {
if x.tag == kind {
return (x.maker)(conf, pattern);
}
}
err!("Unknown trans : {}", kind)
}
pub fn make(spec: &str) -> Result<Transform> {
if let Some((a, b)) = spec.split_once(',') {
Self::make2(a, b)
} else {
Self::make2(spec, "")
}
}
}