analyse_json/json/ndjson/
stats.rs

1use owo_colors::{OwoColorize, Stream};
2use serde::{Deserialize, Serialize};
3use std::fmt;
4use std::io::IsTerminal;
5use std::iter::Sum;
6use std::ops::Add;
7
8use crate::json::IndexMap;
9
10/// Container for the data collected about the JSONs along the way
11#[derive(Debug, PartialEq, Eq, Default, Clone, Serialize, Deserialize)]
12pub struct Stats {
13    pub keys_count: IndexMap<String, usize>,
14    pub line_count: usize,
15    pub bad_lines: Vec<String>,
16    pub keys_types_count: IndexMap<String, usize>,
17    pub empty_lines: Vec<String>,
18    // TODO: Add this: pub json_count: usize,
19}
20
21impl Stats {
22    pub fn new() -> Stats {
23        Stats {
24            keys_count: IndexMap::new(),
25            line_count: 0,
26            bad_lines: Vec::new(),
27            keys_types_count: IndexMap::new(),
28            empty_lines: Vec::new(),
29        }
30    }
31
32    pub fn key_occurance(&self) -> IndexMap<String, f64> {
33        self.keys_count
34            .iter()
35            .map(|(k, v)| (k.to_owned(), 100f64 * *v as f64 / self.line_count as f64))
36            .collect()
37    }
38
39    pub fn key_type_occurance(&self) -> IndexMap<String, f64> {
40        self.keys_types_count
41            .iter()
42            .map(|(k, v)| (k.to_owned(), 100f64 * *v as f64 / self.line_count as f64))
43            .collect()
44    }
45}
46
47impl fmt::Display for Stats {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        let stream = Stream::Stdout;
50        writeln!(f, "Keys:\n{:#?}\n", self.keys_count.keys())?;
51        writeln!(f, "Key occurance counts:\n{:#?}", self.keys_count)?;
52        writeln!(f, "\nKey occurance rate:")?;
53        for (k, v) in self.key_occurance() {
54            writeln!(f, "{}: {:.3}%", k, v)?;
55        }
56        writeln!(f, "\nKey type occurance rate:")?;
57        for (k, v) in self.key_type_occurance() {
58            writeln!(f, "{}: {:.3}%", k, v)?;
59        }
60        if !self.bad_lines.is_empty() {
61            writeln!(
62                f,
63                "{}\n{:?}",
64                "Corrupted lines:".if_supports_color(stream, |text| text.red()),
65                self.bad_lines.if_supports_color(stream, |text| text.red())
66            )?;
67        }
68        if !self.empty_lines.is_empty() {
69            writeln!(
70                f,
71                "{}\n{:?}",
72                "Empty lines:".if_supports_color(stream, |text| text.red()),
73                self.empty_lines
74                    .if_supports_color(stream, |text| text.red())
75            )?;
76        }
77        Ok(())
78    }
79}
80
81impl Stats {
82    pub fn print(&self) -> std::result::Result<(), serde_json::Error> {
83        if std::io::stdout().is_terminal() {
84            println!("{}", self);
85            Ok(())
86        } else {
87            let json_out = serde_json::to_string_pretty(self)?;
88            println!("{}", json_out);
89            Ok(())
90        }
91    }
92}
93
94#[derive(Debug, PartialEq, Eq, Default, Clone, Serialize, Deserialize)]
95pub struct FileStats {
96    pub file_path: String,
97    pub stats: Stats,
98}
99
100impl FileStats {
101    pub fn new(file_path: String, stats: Stats) -> Self {
102        Self { file_path, stats }
103    }
104}
105
106impl Add for FileStats {
107    type Output = Stats;
108
109    fn add(self, rhs: Self) -> Self::Output {
110        let mut output = self.stats;
111
112        for (k, v) in rhs.stats.keys_count {
113            let counter = output.keys_count.entry(k).or_insert(0);
114            *counter += v
115        }
116
117        for (k, v) in rhs.stats.keys_types_count {
118            let counter = output.keys_types_count.entry(k).or_insert(0);
119            *counter += v
120        }
121
122        output.line_count += rhs.stats.line_count;
123
124        output.bad_lines = output
125            .bad_lines
126            .into_iter()
127            .map(|line_id| format!("{}:{line_id}", self.file_path))
128            .collect();
129        output.bad_lines.extend(
130            rhs.stats
131                .bad_lines
132                .into_iter()
133                .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
134        );
135
136        output.empty_lines = output
137            .empty_lines
138            .into_iter()
139            .map(|line_id| format!("{}:{line_id}", self.file_path))
140            .collect();
141        output.empty_lines.extend(
142            rhs.stats
143                .empty_lines
144                .into_iter()
145                .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
146        );
147
148        output
149    }
150}
151
152impl Add<&Self> for FileStats {
153    type Output = Stats;
154
155    fn add(self, rhs: &Self) -> Self::Output {
156        self.add(rhs.clone())
157    }
158}
159
160impl<'a> Sum<&'a FileStats> for Stats {
161    fn sum<I: Iterator<Item = &'a FileStats>>(iter: I) -> Stats {
162        iter.fold(Self::default(), |acc, x| acc + x)
163    }
164}
165
166impl Add<FileStats> for Stats {
167    type Output = Self;
168
169    fn add(self, rhs: FileStats) -> Self::Output {
170        let mut output = self;
171
172        for (k, v) in rhs.stats.keys_count {
173            let counter = output.keys_count.entry(k).or_insert(0);
174            *counter += v
175        }
176
177        for (k, v) in rhs.stats.keys_types_count {
178            let counter = output.keys_types_count.entry(k).or_insert(0);
179            *counter += v
180        }
181
182        output.line_count += rhs.stats.line_count;
183
184        output.bad_lines.extend(
185            rhs.stats
186                .bad_lines
187                .into_iter()
188                .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
189        );
190
191        output.empty_lines.extend(
192            rhs.stats
193                .empty_lines
194                .into_iter()
195                .map(|line_id| format!("{}:{line_id}", rhs.file_path)),
196        );
197
198        output
199    }
200}
201
202impl Add<&FileStats> for Stats {
203    type Output = Self;
204
205    fn add(self, rhs: &FileStats) -> Self::Output {
206        self.add(rhs.clone())
207    }
208}