#![allow(clippy::too_many_arguments)]
extern crate puruda_macro;
extern crate csv;
use puruda_macro::*;
use csv::{ReaderBuilder, WriterBuilder, Trim};
use std::error::Error;
use std::ops::{Index, IndexMut};
use std::str::FromStr;
use std::string::ToString;
use std::collections::HashMap;
pub trait Column {
type DType;
fn row(&self) -> usize;
fn idx(&self, n: usize) -> &Self::DType;
fn idx_mut(&mut self, n: usize) -> &mut Self::DType;
fn to_vec(&self) -> &Vec<Self::DType>;
fn push(&mut self, val: Self::DType);
}
pub trait ColumnApply: Column {
fn apply<F: FnMut(&mut Self::DType)>(&mut self, f: F);
}
impl<T> Index<usize> for dyn Column<DType = T> {
type Output = T;
fn index(&self, index: usize) -> &Self::Output {
self.idx(index)
}
}
impl<T> IndexMut<usize> for dyn Column<DType = T> {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
self.idx_mut(index)
}
}
col_vec_impl!(bool);
col_vec_impl!(u32);
col_vec_impl!(u64);
col_vec_impl!(usize);
col_vec_impl!(i32);
col_vec_impl!(i64);
col_vec_impl!(isize);
col_vec_impl!(f32);
col_vec_impl!(f64);
col_vec_impl!(String);
impl<'a> Column for Vec<&'a str> {
type DType = &'a str;
fn row(&self) -> usize {
self.len()
}
fn idx(&self, n: usize) -> &Self::DType {
&self[n]
}
fn idx_mut(&mut self, n: usize) -> &mut Self::DType {
&mut self[n]
}
fn to_vec(&self) -> &Vec<Self::DType> {
self
}
fn push(&mut self, val: Self::DType) {
Vec::push(self, val);
}
}
impl ColumnApply for Vec<&str> {
fn apply<F: FnMut(&mut Self::DType)>(&mut self, mut f: F) {
for item in self.iter_mut() {
f(item);
}
}
}
pub trait ColumnDisplay: Column where Self::DType: std::fmt::Display {
fn print(&self) {
let v = self.to_vec();
print!("[");
for (i, item) in v.iter().enumerate() {
if i > 0 { print!(", "); }
print!("{}", item);
}
println!("]");
}
}
impl<C: Column> ColumnDisplay for C where C::DType: std::fmt::Display {}
pub trait Numeric: Column {
fn sum(&self) -> Self::DType;
fn mean(&self) -> f64;
fn min_val(&self) -> Option<&Self::DType>;
fn max_val(&self) -> Option<&Self::DType>;
fn var(&self) -> f64;
fn std_dev(&self) -> f64;
}
macro_rules! impl_numeric_int {
($($t:ty),*) => {
$(
impl Numeric for Vec<$t> {
fn sum(&self) -> $t {
self.iter().copied().sum()
}
fn mean(&self) -> f64 {
if self.is_empty() { return 0.0; }
self.iter().copied().map(|x| x as f64).sum::<f64>() / self.len() as f64
}
fn min_val(&self) -> Option<&$t> {
self.iter().min()
}
fn max_val(&self) -> Option<&$t> {
self.iter().max()
}
fn var(&self) -> f64 {
if self.is_empty() { return 0.0; }
let m = self.mean();
let n = self.len() as f64;
self.iter().copied().map(|x| {
let d = x as f64 - m;
d * d
}).sum::<f64>() / n
}
fn std_dev(&self) -> f64 {
self.var().sqrt()
}
}
)*
};
}
macro_rules! impl_numeric_float {
($($t:ty),*) => {
$(
impl Numeric for Vec<$t> {
fn sum(&self) -> $t {
self.iter().copied().sum()
}
fn mean(&self) -> f64 {
if self.is_empty() { return 0.0; }
self.iter().copied().map(|x| x as f64).sum::<f64>() / self.len() as f64
}
fn min_val(&self) -> Option<&$t> {
self.iter().reduce(|a, b| if a <= b { a } else { b })
}
fn max_val(&self) -> Option<&$t> {
self.iter().reduce(|a, b| if a >= b { a } else { b })
}
fn var(&self) -> f64 {
if self.is_empty() { return 0.0; }
let m = self.mean();
let n = self.len() as f64;
self.iter().copied().map(|x| {
let d = x as f64 - m;
d * d
}).sum::<f64>() / n
}
fn std_dev(&self) -> f64 {
self.var().sqrt()
}
}
)*
};
}
impl_numeric_int!(u32, u64, usize, i32, i64, isize);
impl_numeric_float!(f32, f64);
pub trait ColumnUnique: Column where Self::DType: Clone + Eq + std::hash::Hash {
fn unique(&self) -> Vec<Self::DType> {
let v = self.to_vec();
let mut seen = std::collections::HashSet::new();
let mut result = Vec::new();
for item in v.iter() {
if seen.insert(item.clone()) {
result.push(item.clone());
}
}
result
}
fn n_unique(&self) -> usize {
self.unique().len()
}
}
impl<C: Column> ColumnUnique for C where C::DType: Clone + Eq + std::hash::Hash {}
pub fn map_column<C: Column, U, F: Fn(&C::DType) -> U>(col: &C, f: F) -> Vec<U> {
let v = col.to_vec();
v.iter().map(f).collect()
}
multi_col_def!();
multi_col_impl!();
multi_col_extra_impl!();
multi_col_display_impl!();
multi_col_describe_impl!();
pub trait CSV: Sized {
fn write_csv(&self, file_path: &str, delimiter: char) -> Result<(), Box<dyn Error>>;
fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>>;
}
multi_col_csv_impl!();
pub trait JsonIO: Sized {
fn write_json(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
fn read_json(file_path: &str) -> Result<Self, Box<dyn Error>>;
fn to_json_string(&self) -> String;
fn from_json_string(s: &str) -> Result<Self, Box<dyn Error>>;
}
pub type JsonParseResult = Result<(Vec<String>, HashMap<String, Vec<String>>), Box<dyn Error>>;
pub fn parse_puruda_json(s: &str) -> JsonParseResult {
let s = s.trim();
let headers_start = s.find("\"headers\"")
.ok_or("Missing 'headers' key")?;
let arr_start = s[headers_start..].find('[')
.ok_or("Missing headers array")?;
let arr_end = s[headers_start + arr_start..].find(']')
.ok_or("Missing headers array end")?;
let headers_str = &s[headers_start + arr_start + 1..headers_start + arr_start + arr_end];
let headers: Vec<String> = headers_str
.split(',')
.map(|h| h.trim().trim_matches('"').to_string())
.filter(|h| !h.is_empty())
.collect();
let data_start = s.find("\"data\"")
.ok_or("Missing 'data' key")?;
let data_brace = s[data_start..].find('{')
.ok_or("Missing data object")?;
let data_section = &s[data_start + data_brace..];
let mut depth = 0;
let mut data_end = 0;
for (i, ch) in data_section.char_indices() {
match ch {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
data_end = i;
break;
}
}
_ => {}
}
}
let data_inner = &data_section[1..data_end];
let mut data: HashMap<String, Vec<String>> = HashMap::new();
let mut pos = 0;
let bytes = data_inner.as_bytes();
while pos < bytes.len() {
let key_start = match data_inner[pos..].find('"') {
Some(i) => pos + i + 1,
None => break,
};
let key_end = match data_inner[key_start..].find('"') {
Some(i) => key_start + i,
None => break,
};
let key = data_inner[key_start..key_end].to_string();
let arr_start = match data_inner[key_end..].find('[') {
Some(i) => key_end + i + 1,
None => break,
};
let arr_end = match data_inner[arr_start..].find(']') {
Some(i) => arr_start + i,
None => break,
};
let arr_str = &data_inner[arr_start..arr_end];
let values = parse_json_array_values(arr_str);
data.insert(key, values);
pos = arr_end + 1;
}
Ok((headers, data))
}
fn parse_json_array_values(s: &str) -> Vec<String> {
let s = s.trim();
if s.is_empty() {
return vec![];
}
let mut values = Vec::new();
let mut i = 0;
let chars: Vec<char> = s.chars().collect();
while i < chars.len() {
while i < chars.len() && (chars[i] == ' ' || chars[i] == ',' || chars[i] == '\n' || chars[i] == '\r' || chars[i] == '\t') {
i += 1;
}
if i >= chars.len() { break; }
if chars[i] == '"' {
i += 1;
let start = i;
while i < chars.len() && chars[i] != '"' {
if chars[i] == '\\' { i += 1; } i += 1;
}
let val: String = chars[start..i].iter().collect();
values.push(val);
if i < chars.len() { i += 1; } } else {
let start = i;
while i < chars.len() && chars[i] != ',' && chars[i] != ']' && chars[i] != ' ' && chars[i] != '\n' {
i += 1;
}
let val: String = chars[start..i].iter().collect();
if !val.is_empty() {
values.push(val);
}
}
}
values
}
multi_col_json_impl!();