use std::io;
use io::BufRead;
use io::Write;
use std::collections::BTreeMap;
use serde_json::Map;
use serde_json::Value;
#[derive(Default, serde::Serialize)]
pub struct RealStat {
pub minimum: f64,
pub maximum: f64,
pub total: f64,
pub count: u64,
}
impl RealStat {
pub fn process_value(&mut self, val: f64) {
self.minimum = val.min(self.minimum);
self.maximum = val.max(self.maximum);
self.total += val;
self.count += 1;
}
}
#[derive(Default, serde::Serialize)]
pub struct IntStat {
pub minimum: i64,
pub maximum: i64,
pub total: i64,
pub count: u64,
}
impl IntStat {
pub fn process_value(&mut self, val: i64) {
self.minimum = val.min(self.minimum);
self.maximum = val.max(self.maximum);
self.total += val;
self.count += 1;
}
}
#[derive(Default, serde::Serialize)]
pub struct StrStat {
pub minimum: String,
pub maximum: String,
pub count: u64,
}
impl StrStat {
pub fn process_value(&mut self, val: String) {
self.count += 1;
match (self.maximum < val, val < self.minimum) {
(true, true) => {
self.maximum = val.clone();
self.minimum = val;
}
(true, false) => {
self.maximum = val;
}
(false, true) => {
self.minimum = val;
}
(false, false) => {}
}
}
}
#[derive(Default, serde::Serialize)]
pub struct BasicStats {
pub real: BTreeMap<String, RealStat>,
pub integer: BTreeMap<String, IntStat>,
pub string: BTreeMap<String, StrStat>,
}
impl BasicStats {
pub fn process_real(&mut self, key: &str, real: f64) -> Result<(), io::Error> {
if !self.real.contains_key(key) {
self.real.insert(
key.into(),
RealStat {
minimum: real,
maximum: real,
total: real,
count: 1,
},
);
return Ok(());
}
let stat: &mut RealStat = self
.real
.get_mut(key)
.ok_or(io::Error::other("must exist"))?;
stat.process_value(real);
Ok(())
}
pub fn process_int(&mut self, key: &str, integer: i64) -> Result<(), io::Error> {
if !self.integer.contains_key(key) {
self.integer.insert(
key.into(),
IntStat {
minimum: integer,
maximum: integer,
total: integer,
count: 1,
},
);
return Ok(());
}
let stat: &mut IntStat = self
.integer
.get_mut(key)
.ok_or(io::Error::other("must exist"))?;
stat.process_value(integer);
Ok(())
}
pub fn process_str(&mut self, key: &str, string: String) -> Result<(), io::Error> {
if !self.string.contains_key(key) {
self.string.insert(
key.into(),
StrStat {
minimum: string.clone(),
maximum: string,
count: 1,
},
);
return Ok(());
}
let stat: &mut StrStat = self
.string
.get_mut(key)
.ok_or(io::Error::other("must exist"))?;
stat.process_value(string);
Ok(())
}
}
impl BasicStats {
pub fn jsonl2stats<I>(
jsonl: I,
reals: Vec<String>,
ints: Vec<String>,
strs: Vec<String>,
) -> Result<Self, io::Error>
where
I: Iterator<Item = Result<Map<String, Value>, io::Error>>,
{
let mut stats = BasicStats::default();
for robj in jsonl {
let mut obj: Map<String, Value> = robj?;
for rkey in &reals {
let oval: Option<Value> = obj.remove(rkey);
let of64: Option<f64> = oval.and_then(|v| v.as_f64());
if let Some(val) = of64 {
stats.process_real(rkey, val)?;
}
}
for ikey in &ints {
let oval: Option<Value> = obj.remove(ikey);
let oi64: Option<i64> = oval.and_then(|v| v.as_i64());
if let Some(val) = oi64 {
stats.process_int(ikey, val)?;
}
}
for skey in &strs {
if let Some(Value::String(s)) = obj.remove(skey) {
stats.process_str(skey, s)?;
}
}
}
Ok(stats)
}
}
impl BasicStats {
pub fn to_writer<W>(&self, wtr: W) -> Result<(), io::Error>
where
W: Write,
{
serde_json::to_writer(wtr, self).map_err(io::Error::other)
}
}
pub fn jsonl2objs<I>(jsonl: I) -> impl Iterator<Item = Result<Map<String, Value>, io::Error>>
where
I: Iterator<Item = Result<String, io::Error>>,
{
jsonl.map(|rline| rline.and_then(|line| serde_json::from_str(&line).map_err(io::Error::other)))
}
pub fn stdin2jsonl() -> impl Iterator<Item = Result<String, io::Error>> {
io::stdin().lock().lines()
}
pub fn stdin2jsonl2stats2stdout(
reals: Vec<String>,
ints: Vec<String>,
strs: Vec<String>,
) -> Result<(), io::Error> {
let jsonl = stdin2jsonl();
let objs = jsonl2objs(jsonl);
let stat: BasicStats = BasicStats::jsonl2stats(objs, reals, ints, strs)?;
let o = io::stdout();
let mut ol = o.lock();
stat.to_writer(&mut ol)?;
ol.flush()
}
pub fn env_key2val2splited(split_char: &'static str) -> impl Fn(&'static str) -> Vec<String> {
move |env_key: &'static str| {
let s: String = std::env::var(env_key).unwrap_or_default();
let splited = s.split(split_char);
splited.map(|s| s.into()).collect()
}
}
pub const SPLIT_CHAR_DEFAULT: &str = ",";
pub const ENV_KEY_REAL: &str = "ENV_REALS";
pub const ENV_KEY_INT: &str = "ENV_INTS";
pub const ENV_KEY_STR: &str = "ENV_STRS";
pub fn stdin2jsonl2stats2stdout_default() -> Result<(), io::Error> {
let key2v = env_key2val2splited(SPLIT_CHAR_DEFAULT);
stdin2jsonl2stats2stdout(key2v(ENV_KEY_REAL), key2v(ENV_KEY_INT), key2v(ENV_KEY_STR))
}