1use super::hashable_value::HashableValue;
2use itertools::Itertools;
3use nu_engine::command_prelude::*;
4
5use std::collections::HashMap;
6
7#[derive(Clone)]
8pub struct Histogram;
9
10enum PercentageCalcMethod {
11 Normalize,
12 Relative,
13}
14
15impl Command for Histogram {
16 fn name(&self) -> &str {
17 "histogram"
18 }
19
20 fn signature(&self) -> Signature {
21 Signature::build("histogram")
22 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::table()),])
23 .optional("column-name", SyntaxShape::String, "Column name to calc frequency, no need to provide if input is a list.")
24 .optional("frequency-column-name", SyntaxShape::String, "Histogram's frequency column, default to be frequency column output.")
25 .named("percentage-type", SyntaxShape::String, "percentage calculate method, can be 'normalize' or 'relative', in 'normalize', defaults to be 'normalize'", Some('t'))
26 .category(Category::Chart)
27 }
28
29 fn description(&self) -> &str {
30 "Creates a new table with a histogram based on the column name passed in."
31 }
32
33 fn examples(&self) -> Vec<Example> {
34 vec![
35 Example {
36 description: "Compute a histogram of file types",
37 example: "ls | histogram type",
38 result: None,
39 },
40 Example {
41 description: "Compute a histogram for the types of files, with frequency column named freq",
42 example: "ls | histogram type freq",
43 result: None,
44 },
45 Example {
46 description: "Compute a histogram for a list of numbers",
47 example: "[1 2 1] | histogram",
48 result: Some(Value::test_list(vec![
49 Value::test_record(record! {
50 "value" => Value::test_int(1),
51 "count" => Value::test_int(2),
52 "quantile" => Value::test_float(0.6666666666666666),
53 "percentage" => Value::test_string("66.67%"),
54 "frequency" => Value::test_string("******************************************************************"),
55 }),
56 Value::test_record(record! {
57 "value" => Value::test_int(2),
58 "count" => Value::test_int(1),
59 "quantile" => Value::test_float(0.3333333333333333),
60 "percentage" => Value::test_string("33.33%"),
61 "frequency" => Value::test_string("*********************************"),
62 }),
63 ])),
64 },
65 Example {
66 description: "Compute a histogram for a list of numbers, and percentage is based on the maximum value",
67 example: "[1 2 3 1 1 1 2 2 1 1] | histogram --percentage-type relative",
68 result: None,
69 },
70 ]
71 }
72
73 fn run(
74 &self,
75 engine_state: &EngineState,
76 stack: &mut Stack,
77 call: &Call,
78 input: PipelineData,
79 ) -> Result<PipelineData, ShellError> {
80 let column_name: Option<Spanned<String>> = call.opt(engine_state, stack, 0)?;
82 let frequency_name_arg = call.opt::<Spanned<String>>(engine_state, stack, 1)?;
83 let frequency_column_name = match frequency_name_arg {
84 Some(inner) => {
85 let forbidden_column_names = ["value", "count", "quantile", "percentage"];
86 if forbidden_column_names.contains(&inner.item.as_str()) {
87 return Err(ShellError::TypeMismatch {
88 err_message: format!(
89 "frequency-column-name can't be {}",
90 forbidden_column_names
91 .iter()
92 .map(|val| format!("'{val}'"))
93 .collect::<Vec<_>>()
94 .join(", ")
95 ),
96 span: inner.span,
97 });
98 }
99 inner.item
100 }
101 None => "frequency".to_string(),
102 };
103
104 let calc_method: Option<Spanned<String>> =
105 call.get_flag(engine_state, stack, "percentage-type")?;
106 let calc_method = match calc_method {
107 None => PercentageCalcMethod::Normalize,
108 Some(inner) => match inner.item.as_str() {
109 "normalize" => PercentageCalcMethod::Normalize,
110 "relative" => PercentageCalcMethod::Relative,
111 _ => {
112 return Err(ShellError::TypeMismatch {
113 err_message: "calc method can only be 'normalize' or 'relative'"
114 .to_string(),
115 span: inner.span,
116 });
117 }
118 },
119 };
120
121 let span = call.head;
122 let data_as_value = input.into_value(span)?;
123 let value_span = data_as_value.span();
124 run_histogram(
126 data_as_value.into_list()?,
127 column_name,
128 frequency_column_name,
129 calc_method,
130 span,
131 value_span,
133 )
134 }
135}
136
137fn run_histogram(
138 values: Vec<Value>,
139 column_name: Option<Spanned<String>>,
140 freq_column: String,
141 calc_method: PercentageCalcMethod,
142 head_span: Span,
143 list_span: Span,
144) -> Result<PipelineData, ShellError> {
145 let mut inputs = vec![];
146 match column_name {
148 None => {
149 for v in values {
152 match v {
153 Value::Error { error, .. } => return Err(*error),
155 _ => {
156 let t = v.get_type();
157 let span = v.span();
158 inputs.push(HashableValue::from_value(v, head_span).map_err(|_| {
159 ShellError::UnsupportedInput { msg: "Since column-name was not provided, only lists of hashable values are supported.".to_string(), input: format!(
160 "input type: {t:?}"
161 ), msg_span: head_span, input_span: span }
162 })?)
163 }
164 }
165 }
166 }
167 Some(ref col) => {
168 let col_name = &col.item;
174 for v in values {
175 match v {
176 Value::Record { val, .. } => {
178 if let Some(v) = val.get(col_name) {
179 if let Ok(v) = HashableValue::from_value(v.clone(), head_span) {
180 inputs.push(v);
181 }
182 }
183 }
184 Value::Error { error, .. } => return Err(*error),
186 _ => continue,
187 }
188 }
189
190 if inputs.is_empty() {
191 return Err(ShellError::CantFindColumn {
192 col_name: col_name.clone(),
193 span: Some(head_span),
194 src_span: list_span,
195 });
196 }
197 }
198 }
199
200 let value_column_name = column_name
201 .map(|x| x.item)
202 .unwrap_or_else(|| "value".to_string());
203 Ok(histogram_impl(
204 inputs,
205 &value_column_name,
206 calc_method,
207 &freq_column,
208 head_span,
209 ))
210}
211
212fn histogram_impl(
213 inputs: Vec<HashableValue>,
214 value_column_name: &str,
215 calc_method: PercentageCalcMethod,
216 freq_column: &str,
217 span: Span,
218) -> PipelineData {
219 let mut counter = HashMap::new();
222 let mut max_cnt = 0;
223 let total_cnt = inputs.len();
224 for i in inputs {
225 let new_cnt = *counter.get(&i).unwrap_or(&0) + 1;
226 counter.insert(i, new_cnt);
227 if new_cnt > max_cnt {
228 max_cnt = new_cnt;
229 }
230 }
231
232 let mut result = vec![];
233 const MAX_FREQ_COUNT: f64 = 100.0;
234 for (val, count) in counter.into_iter().sorted() {
235 let quantile = match calc_method {
236 PercentageCalcMethod::Normalize => count as f64 / total_cnt as f64,
237 PercentageCalcMethod::Relative => count as f64 / max_cnt as f64,
238 };
239
240 let percentage = format!("{:.2}%", quantile * 100_f64);
241 let freq = "*".repeat((MAX_FREQ_COUNT * quantile).floor() as usize);
242
243 result.push((
244 count, Value::record(
246 record! {
247 value_column_name => val.into_value(),
248 "count" => Value::int(count, span),
249 "quantile" => Value::float(quantile, span),
250 "percentage" => Value::string(percentage, span),
251 freq_column => Value::string(freq, span),
252 },
253 span,
254 ),
255 ));
256 }
257 result.sort_by(|a, b| b.0.cmp(&a.0));
258 Value::list(result.into_iter().map(|x| x.1).collect(), span).into_pipeline_data()
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn test_examples() {
267 use crate::test_examples;
268
269 test_examples(Histogram)
270 }
271}