1use super::hashable_value::HashableValue;
2use itertools::Itertools;
3use nu_engine::command_prelude::*;
4
5use std::collections::HashMap;
6
7#[derive(Clone)]
8pub struct Histogram;
9
10enum PercentageCalcMethod {
11 Normalize,
12 Relative,
13}
14
15impl Command for Histogram {
16 fn name(&self) -> &str {
17 "histogram"
18 }
19
20 fn signature(&self) -> Signature {
21 Signature::build("histogram")
22 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::table())])
23 .optional(
24 "column-name",
25 SyntaxShape::String,
26 "Column name to calc frequency, no need to provide if input is a list.",
27 )
28 .optional(
29 "frequency-column-name",
30 SyntaxShape::String,
31 "Histogram's frequency column, default to be frequency column output.",
32 )
33 .param(
34 Flag::new("percentage-type")
35 .short('t')
36 .arg(SyntaxShape::String)
37 .desc(
38 "percentage calculate method, can be 'normalize' or 'relative', in \
39 'normalize', defaults to be 'normalize'",
40 )
41 .completion(Completion::new_list(&["normalize", "relative"])),
42 )
43 .category(Category::Chart)
44 }
45
46 fn description(&self) -> &str {
47 "Creates a new table with a histogram based on the column name passed in."
48 }
49
50 fn examples(&self) -> Vec<Example<'_>> {
51 vec![
52 Example {
53 description: "Compute a histogram of file types",
54 example: "ls | histogram type",
55 result: None,
56 },
57 Example {
58 description: "Compute a histogram for the types of files, with frequency column \
59 named freq",
60 example: "ls | histogram type freq",
61 result: None,
62 },
63 Example {
64 description: "Compute a histogram for a list of numbers",
65 example: "[1 2 1] | histogram",
66 result: Some(Value::test_list(vec![
67 Value::test_record(record! {
68 "value" => Value::test_int(1),
69 "count" => Value::test_int(2),
70 "quantile" => Value::test_float(0.6666666666666666),
71 "percentage" => Value::test_string("66.67%"),
72 "frequency" => Value::test_string("******************************************************************"),
73 }),
74 Value::test_record(record! {
75 "value" => Value::test_int(2),
76 "count" => Value::test_int(1),
77 "quantile" => Value::test_float(0.3333333333333333),
78 "percentage" => Value::test_string("33.33%"),
79 "frequency" => Value::test_string("*********************************"),
80 }),
81 ])),
82 },
83 Example {
84 description: "Compute a histogram for a list of numbers, and percentage is based \
85 on the maximum value",
86 example: "[1 2 3 1 1 1 2 2 1 1] | histogram --percentage-type relative",
87 result: None,
88 },
89 ]
90 }
91
92 fn run(
93 &self,
94 engine_state: &EngineState,
95 stack: &mut Stack,
96 call: &Call,
97 input: PipelineData,
98 ) -> Result<PipelineData, ShellError> {
99 let column_name: Option<Spanned<String>> = call.opt(engine_state, stack, 0)?;
101 let frequency_name_arg = call.opt::<Spanned<String>>(engine_state, stack, 1)?;
102 let frequency_column_name = match frequency_name_arg {
103 Some(inner) => {
104 let forbidden_column_names = ["value", "count", "quantile", "percentage"];
105 if forbidden_column_names.contains(&inner.item.as_str()) {
106 return Err(ShellError::TypeMismatch {
107 err_message: format!(
108 "frequency-column-name can't be {}",
109 forbidden_column_names
110 .iter()
111 .map(|val| format!("'{val}'"))
112 .collect::<Vec<_>>()
113 .join(", ")
114 ),
115 span: inner.span,
116 });
117 }
118 inner.item
119 }
120 None => "frequency".to_string(),
121 };
122
123 let calc_method: Option<Spanned<String>> =
124 call.get_flag(engine_state, stack, "percentage-type")?;
125 let calc_method = match calc_method {
126 None => PercentageCalcMethod::Normalize,
127 Some(inner) => match inner.item.as_str() {
128 "normalize" => PercentageCalcMethod::Normalize,
129 "relative" => PercentageCalcMethod::Relative,
130 _ => {
131 return Err(ShellError::TypeMismatch {
132 err_message: "calc method can only be 'normalize' or 'relative'"
133 .to_string(),
134 span: inner.span,
135 });
136 }
137 },
138 };
139
140 let span = call.head;
141 let data_as_value = input.into_value(span)?;
142 let value_span = data_as_value.span();
143 run_histogram(
145 data_as_value.into_list()?,
146 column_name,
147 frequency_column_name,
148 calc_method,
149 span,
150 value_span,
152 )
153 }
154}
155
156fn run_histogram(
157 values: Vec<Value>,
158 column_name: Option<Spanned<String>>,
159 freq_column: String,
160 calc_method: PercentageCalcMethod,
161 head_span: Span,
162 list_span: Span,
163) -> Result<PipelineData, ShellError> {
164 let mut inputs = vec![];
165 match column_name {
167 None => {
168 for v in values {
171 match v {
172 Value::Error { error, .. } => return Err(*error),
174 _ => {
175 let t = v.get_type();
176 let span = v.span();
177 inputs.push(HashableValue::from_value(v, head_span).map_err(|_| {
178 ShellError::UnsupportedInput {
179 msg: "Since column-name was not provided, only lists of hashable \
180 values are supported."
181 .to_string(),
182 input: format!("input type: {t:?}"),
183 msg_span: head_span,
184 input_span: span,
185 }
186 })?)
187 }
188 }
189 }
190 }
191 Some(ref col) => {
192 let col_name = &col.item;
198 for v in values {
199 match v {
200 Value::Record { val, .. } => {
202 if let Some(v) = val.get(col_name)
203 && let Ok(v) = HashableValue::from_value(v.clone(), head_span)
204 {
205 inputs.push(v);
206 }
207 }
208 Value::Error { error, .. } => return Err(*error),
210 _ => continue,
211 }
212 }
213
214 if inputs.is_empty() {
215 return Err(ShellError::CantFindColumn {
216 col_name: col_name.clone(),
217 span: Some(head_span),
218 src_span: list_span,
219 });
220 }
221 }
222 }
223
224 let value_column_name = column_name
225 .map(|x| x.item)
226 .unwrap_or_else(|| "value".to_string());
227 Ok(histogram_impl(
228 inputs,
229 &value_column_name,
230 calc_method,
231 &freq_column,
232 head_span,
233 ))
234}
235
236fn histogram_impl(
237 inputs: Vec<HashableValue>,
238 value_column_name: &str,
239 calc_method: PercentageCalcMethod,
240 freq_column: &str,
241 span: Span,
242) -> PipelineData {
243 let mut counter = HashMap::new();
246 let mut max_cnt = 0;
247 let total_cnt = inputs.len();
248 for i in inputs {
249 let new_cnt = *counter.get(&i).unwrap_or(&0) + 1;
250 counter.insert(i, new_cnt);
251 if new_cnt > max_cnt {
252 max_cnt = new_cnt;
253 }
254 }
255
256 let mut result = vec![];
257 const MAX_FREQ_COUNT: f64 = 100.0;
258 for (val, count) in counter.into_iter().sorted() {
259 let quantile = match calc_method {
260 PercentageCalcMethod::Normalize => count as f64 / total_cnt as f64,
261 PercentageCalcMethod::Relative => count as f64 / max_cnt as f64,
262 };
263
264 let percentage = format!("{:.2}%", quantile * 100_f64);
265 let freq = "*".repeat((MAX_FREQ_COUNT * quantile).floor() as usize);
266
267 result.push((
268 count, Value::record(
270 record! {
271 value_column_name => val.into_value(),
272 "count" => Value::int(count, span),
273 "quantile" => Value::float(quantile, span),
274 "percentage" => Value::string(percentage, span),
275 freq_column => Value::string(freq, span),
276 },
277 span,
278 ),
279 ));
280 }
281 result.sort_by(|a, b| b.0.cmp(&a.0));
282 Value::list(result.into_iter().map(|x| x.1).collect(), span).into_pipeline_data()
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_examples() {
291 use crate::test_examples;
292
293 test_examples(Histogram)
294 }
295}