analyse_json/json/ndjson/
stats.rs1use owo_colors::{OwoColorize, Stream};
2use serde::{Deserialize, Serialize};
3use std::fmt;
4use std::io::IsTerminal;
5use std::iter::Sum;
6use std::ops::Add;
7
8use crate::json::IndexMap;
9
10#[derive(Debug, PartialEq, Eq, Default, Clone, Serialize, Deserialize)]
12pub struct Stats {
13 pub keys_count: IndexMap<String, usize>,
14 pub line_count: usize,
15 pub bad_lines: Vec<String>,
16 pub keys_types_count: IndexMap<String, usize>,
17 pub empty_lines: Vec<String>,
18 }
20
21impl Stats {
22 pub fn new() -> Stats {
23 Stats {
24 keys_count: IndexMap::new(),
25 line_count: 0,
26 bad_lines: Vec::new(),
27 keys_types_count: IndexMap::new(),
28 empty_lines: Vec::new(),
29 }
30 }
31
32 pub fn key_occurance(&self) -> IndexMap<String, f64> {
33 self.keys_count
34 .iter()
35 .map(|(k, v)| (k.to_owned(), 100f64 * *v as f64 / self.line_count as f64))
36 .collect()
37 }
38
39 pub fn key_type_occurance(&self) -> IndexMap<String, f64> {
40 self.keys_types_count
41 .iter()
42 .map(|(k, v)| (k.to_owned(), 100f64 * *v as f64 / self.line_count as f64))
43 .collect()
44 }
45}
46
47impl fmt::Display for Stats {
48 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49 let stream = Stream::Stdout;
50 writeln!(f, "Keys:\n{:#?}\n", self.keys_count.keys())?;
51 writeln!(f, "Key occurance counts:\n{:#?}", self.keys_count)?;
52 writeln!(f, "\nKey occurance rate:")?;
53 for (k, v) in self.key_occurance() {
54 writeln!(f, "{}: {:.3}%", k, v)?;
55 }
56 writeln!(f, "\nKey type occurance rate:")?;
57 for (k, v) in self.key_type_occurance() {
58 writeln!(f, "{}: {:.3}%", k, v)?;
59 }
60 if !self.bad_lines.is_empty() {
61 writeln!(
62 f,
63 "{}\n{:?}",
64 "Corrupted lines:".if_supports_color(stream, |text| text.red()),
65 self.bad_lines.if_supports_color(stream, |text| text.red())
66 )?;
67 }
68 if !self.empty_lines.is_empty() {
69 writeln!(
70 f,
71 "{}\n{:?}",
72 "Empty lines:".if_supports_color(stream, |text| text.red()),
73 self.empty_lines
74 .if_supports_color(stream, |text| text.red())
75 )?;
76 }
77 Ok(())
78 }
79}
80
81impl Stats {
82 pub fn print(&self) -> std::result::Result<(), serde_json::Error> {
83 if std::io::stdout().is_terminal() {
84 println!("{}", self);
85 Ok(())
86 } else {
87 let json_out = serde_json::to_string_pretty(self)?;
88 println!("{}", json_out);
89 Ok(())
90 }
91 }
92}
93
94#[derive(Debug, PartialEq, Eq, Default, Clone, Serialize, Deserialize)]
95pub struct FileStats {
96 pub file_path: String,
97 pub stats: Stats,
98}
99
100impl FileStats {
101 pub fn new(file_path: String, stats: Stats) -> Self {
102 Self { file_path, stats }
103 }
104}
105
106impl Add for FileStats {
107 type Output = Stats;
108
109 fn add(self, rhs: Self) -> Self::Output {
110 let mut output = self.stats;
111
112 for (k, v) in rhs.stats.keys_count {
113 let counter = output.keys_count.entry(k).or_insert(0);
114 *counter += v
115 }
116
117 for (k, v) in rhs.stats.keys_types_count {
118 let counter = output.keys_types_count.entry(k).or_insert(0);
119 *counter += v
120 }
121
122 output.line_count += rhs.stats.line_count;
123
124 output.bad_lines = output
125 .bad_lines
126 .into_iter()
127 .map(|line_id| format!("{}:{line_id}", self.file_path))
128 .collect();
129 output.bad_lines.extend(
130 rhs.stats
131 .bad_lines
132 .into_iter()
133 .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
134 );
135
136 output.empty_lines = output
137 .empty_lines
138 .into_iter()
139 .map(|line_id| format!("{}:{line_id}", self.file_path))
140 .collect();
141 output.empty_lines.extend(
142 rhs.stats
143 .empty_lines
144 .into_iter()
145 .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
146 );
147
148 output
149 }
150}
151
152impl Add<&Self> for FileStats {
153 type Output = Stats;
154
155 fn add(self, rhs: &Self) -> Self::Output {
156 self.add(rhs.clone())
157 }
158}
159
160impl<'a> Sum<&'a FileStats> for Stats {
161 fn sum<I: Iterator<Item = &'a FileStats>>(iter: I) -> Stats {
162 iter.fold(Self::default(), |acc, x| acc + x)
163 }
164}
165
166impl Add<FileStats> for Stats {
167 type Output = Self;
168
169 fn add(self, rhs: FileStats) -> Self::Output {
170 let mut output = self;
171
172 for (k, v) in rhs.stats.keys_count {
173 let counter = output.keys_count.entry(k).or_insert(0);
174 *counter += v
175 }
176
177 for (k, v) in rhs.stats.keys_types_count {
178 let counter = output.keys_types_count.entry(k).or_insert(0);
179 *counter += v
180 }
181
182 output.line_count += rhs.stats.line_count;
183
184 output.bad_lines.extend(
185 rhs.stats
186 .bad_lines
187 .into_iter()
188 .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
189 );
190
191 output.empty_lines.extend(
192 rhs.stats
193 .empty_lines
194 .into_iter()
195 .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
196 );
197
198 output
199 }
200}
201
202impl Add<&FileStats> for Stats {
203 type Output = Self;
204
205 fn add(self, rhs: &FileStats) -> Self::Output {
206 self.add(rhs.clone())
207 }
208}