rs_jsonl2stats/
lib.rs

1use std::io;
2
3use io::BufRead;
4
5use io::Write;
6
7use std::collections::BTreeMap;
8
9use serde_json::Map;
10use serde_json::Value;
11
12#[derive(Default, serde::Serialize)]
13pub struct RealStat {
14    pub minimum: f64,
15    pub maximum: f64,
16    pub total: f64,
17    pub count: u64,
18}
19
20impl RealStat {
21    pub fn process_value(&mut self, val: f64) {
22        self.minimum = val.min(self.minimum);
23        self.maximum = val.max(self.maximum);
24        self.total += val;
25        self.count += 1;
26    }
27}
28
29#[derive(Default, serde::Serialize)]
30pub struct IntStat {
31    pub minimum: i64,
32    pub maximum: i64,
33    pub total: i64,
34    pub count: u64,
35}
36
37impl IntStat {
38    pub fn process_value(&mut self, val: i64) {
39        self.minimum = val.min(self.minimum);
40        self.maximum = val.max(self.maximum);
41        self.total += val;
42        self.count += 1;
43    }
44}
45
46#[derive(Default, serde::Serialize)]
47pub struct StrStat {
48    pub minimum: String,
49    pub maximum: String,
50    pub count: u64,
51}
52
53impl StrStat {
54    pub fn process_value(&mut self, val: String) {
55        self.count += 1;
56
57        match (self.maximum < val, val < self.minimum) {
58            (true, true) => {
59                self.maximum = val.clone();
60                self.minimum = val;
61            }
62            (true, false) => {
63                self.maximum = val;
64            }
65            (false, true) => {
66                self.minimum = val;
67            }
68            (false, false) => {}
69        }
70    }
71}
72
73#[derive(Default, serde::Serialize)]
74pub struct BasicStats {
75    pub real: BTreeMap<String, RealStat>,
76    pub integer: BTreeMap<String, IntStat>,
77    pub string: BTreeMap<String, StrStat>,
78}
79
80impl BasicStats {
81    pub fn process_real(&mut self, key: &str, real: f64) -> Result<(), io::Error> {
82        if !self.real.contains_key(key) {
83            self.real.insert(
84                key.into(),
85                RealStat {
86                    minimum: real,
87                    maximum: real,
88                    total: real,
89                    count: 1,
90                },
91            );
92            return Ok(());
93        }
94
95        let stat: &mut RealStat = self
96            .real
97            .get_mut(key)
98            .ok_or(io::Error::other("must exist"))?;
99
100        stat.process_value(real);
101
102        Ok(())
103    }
104
105    pub fn process_int(&mut self, key: &str, integer: i64) -> Result<(), io::Error> {
106        if !self.integer.contains_key(key) {
107            self.integer.insert(
108                key.into(),
109                IntStat {
110                    minimum: integer,
111                    maximum: integer,
112                    total: integer,
113                    count: 1,
114                },
115            );
116            return Ok(());
117        }
118
119        let stat: &mut IntStat = self
120            .integer
121            .get_mut(key)
122            .ok_or(io::Error::other("must exist"))?;
123
124        stat.process_value(integer);
125
126        Ok(())
127    }
128
129    pub fn process_str(&mut self, key: &str, string: String) -> Result<(), io::Error> {
130        if !self.string.contains_key(key) {
131            self.string.insert(
132                key.into(),
133                StrStat {
134                    minimum: string.clone(),
135                    maximum: string,
136                    count: 1,
137                },
138            );
139            return Ok(());
140        }
141
142        let stat: &mut StrStat = self
143            .string
144            .get_mut(key)
145            .ok_or(io::Error::other("must exist"))?;
146
147        stat.process_value(string);
148
149        Ok(())
150    }
151}
152
153impl BasicStats {
154    pub fn jsonl2stats<I>(
155        jsonl: I,
156        reals: Vec<String>,
157        ints: Vec<String>,
158        strs: Vec<String>,
159    ) -> Result<Self, io::Error>
160    where
161        I: Iterator<Item = Result<Map<String, Value>, io::Error>>,
162    {
163        let mut stats = BasicStats::default();
164        for robj in jsonl {
165            let mut obj: Map<String, Value> = robj?;
166
167            for rkey in &reals {
168                let oval: Option<Value> = obj.remove(rkey);
169                let of64: Option<f64> = oval.and_then(|v| v.as_f64());
170                if let Some(val) = of64 {
171                    stats.process_real(rkey, val)?;
172                }
173            }
174
175            for ikey in &ints {
176                let oval: Option<Value> = obj.remove(ikey);
177                let oi64: Option<i64> = oval.and_then(|v| v.as_i64());
178                if let Some(val) = oi64 {
179                    stats.process_int(ikey, val)?;
180                }
181            }
182
183            for skey in &strs {
184                if let Some(Value::String(s)) = obj.remove(skey) {
185                    stats.process_str(skey, s)?;
186                }
187            }
188        }
189        Ok(stats)
190    }
191}
192
193impl BasicStats {
194    pub fn to_writer<W>(&self, wtr: W) -> Result<(), io::Error>
195    where
196        W: Write,
197    {
198        serde_json::to_writer(wtr, self).map_err(io::Error::other)
199    }
200}
201
202pub fn jsonl2objs<I>(jsonl: I) -> impl Iterator<Item = Result<Map<String, Value>, io::Error>>
203where
204    I: Iterator<Item = Result<String, io::Error>>,
205{
206    jsonl.map(|rline| rline.and_then(|line| serde_json::from_str(&line).map_err(io::Error::other)))
207}
208
209pub fn stdin2jsonl() -> impl Iterator<Item = Result<String, io::Error>> {
210    io::stdin().lock().lines()
211}
212
213pub fn stdin2jsonl2stats2stdout(
214    reals: Vec<String>,
215    ints: Vec<String>,
216    strs: Vec<String>,
217) -> Result<(), io::Error> {
218    let jsonl = stdin2jsonl();
219    let objs = jsonl2objs(jsonl);
220    let stat: BasicStats = BasicStats::jsonl2stats(objs, reals, ints, strs)?;
221
222    let o = io::stdout();
223    let mut ol = o.lock();
224
225    stat.to_writer(&mut ol)?;
226
227    ol.flush()
228}
229
230pub fn env_key2val2splited(split_char: &'static str) -> impl Fn(&'static str) -> Vec<String> {
231    move |env_key: &'static str| {
232        let s: String = std::env::var(env_key).unwrap_or_default();
233        let splited = s.split(split_char);
234        splited.map(|s| s.into()).collect()
235    }
236}
237
238pub const SPLIT_CHAR_DEFAULT: &str = ",";
239
240pub const ENV_KEY_REAL: &str = "ENV_REALS";
241
242pub const ENV_KEY_INT: &str = "ENV_INTS";
243
244pub const ENV_KEY_STR: &str = "ENV_STRS";
245
246pub fn stdin2jsonl2stats2stdout_default() -> Result<(), io::Error> {
247    let key2v = env_key2val2splited(SPLIT_CHAR_DEFAULT);
248
249    stdin2jsonl2stats2stdout(key2v(ENV_KEY_REAL), key2v(ENV_KEY_INT), key2v(ENV_KEY_STR))
250}