1use super::hashable_value::HashableValue;
2use itertools::Itertools;
3use nu_engine::command_prelude::*;
4
5use std::collections::HashMap;
6
7#[derive(Clone)]
8pub struct Histogram;
9
10enum PercentageCalcMethod {
11 Normalize,
12 Relative,
13}
14
15impl Command for Histogram {
16 fn name(&self) -> &str {
17 "histogram"
18 }
19
20 fn signature(&self) -> Signature {
21 Signature::build("histogram")
22 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::table()),])
23 .optional("column-name", SyntaxShape::String, "Column name to calc frequency, no need to provide if input is a list.")
24 .optional("frequency-column-name", SyntaxShape::String, "Histogram's frequency column, default to be frequency column output.")
25 .named("percentage-type", SyntaxShape::String, "percentage calculate method, can be 'normalize' or 'relative', in 'normalize', defaults to be 'normalize'", Some('t'))
26 .category(Category::Chart)
27 }
28
29 fn description(&self) -> &str {
30 "Creates a new table with a histogram based on the column name passed in."
31 }
32
33 fn examples(&self) -> Vec<Example> {
34 vec![
35 Example {
36 description: "Compute a histogram of file types",
37 example: "ls | histogram type",
38 result: None,
39 },
40 Example {
41 description:
42 "Compute a histogram for the types of files, with frequency column named freq",
43 example: "ls | histogram type freq",
44 result: None,
45 },
46 Example {
47 description: "Compute a histogram for a list of numbers",
48 example: "[1 2 1] | histogram",
49 result: Some(Value::test_list (
50 vec![Value::test_record(record! {
51 "value" => Value::test_int(1),
52 "count" => Value::test_int(2),
53 "quantile" => Value::test_float(0.6666666666666666),
54 "percentage" => Value::test_string("66.67%"),
55 "frequency" => Value::test_string("******************************************************************"),
56 }),
57 Value::test_record(record! {
58 "value" => Value::test_int(2),
59 "count" => Value::test_int(1),
60 "quantile" => Value::test_float(0.3333333333333333),
61 "percentage" => Value::test_string("33.33%"),
62 "frequency" => Value::test_string("*********************************"),
63 })],
64 )
65 ),
66 },
67 Example {
68 description: "Compute a histogram for a list of numbers, and percentage is based on the maximum value",
69 example: "[1 2 3 1 1 1 2 2 1 1] | histogram --percentage-type relative",
70 result: None,
71 }
72 ]
73 }
74
75 fn run(
76 &self,
77 engine_state: &EngineState,
78 stack: &mut Stack,
79 call: &Call,
80 input: PipelineData,
81 ) -> Result<PipelineData, ShellError> {
82 let column_name: Option<Spanned<String>> = call.opt(engine_state, stack, 0)?;
84 let frequency_name_arg = call.opt::<Spanned<String>>(engine_state, stack, 1)?;
85 let frequency_column_name = match frequency_name_arg {
86 Some(inner) => {
87 let forbidden_column_names = ["value", "count", "quantile", "percentage"];
88 if forbidden_column_names.contains(&inner.item.as_str()) {
89 return Err(ShellError::TypeMismatch {
90 err_message: format!(
91 "frequency-column-name can't be {}",
92 forbidden_column_names
93 .iter()
94 .map(|val| format!("'{}'", val))
95 .collect::<Vec<_>>()
96 .join(", ")
97 ),
98 span: inner.span,
99 });
100 }
101 inner.item
102 }
103 None => "frequency".to_string(),
104 };
105
106 let calc_method: Option<Spanned<String>> =
107 call.get_flag(engine_state, stack, "percentage-type")?;
108 let calc_method = match calc_method {
109 None => PercentageCalcMethod::Normalize,
110 Some(inner) => match inner.item.as_str() {
111 "normalize" => PercentageCalcMethod::Normalize,
112 "relative" => PercentageCalcMethod::Relative,
113 _ => {
114 return Err(ShellError::TypeMismatch {
115 err_message: "calc method can only be 'normalize' or 'relative'"
116 .to_string(),
117 span: inner.span,
118 })
119 }
120 },
121 };
122
123 let span = call.head;
124 let data_as_value = input.into_value(span)?;
125 let value_span = data_as_value.span();
126 run_histogram(
128 data_as_value.into_list()?,
129 column_name,
130 frequency_column_name,
131 calc_method,
132 span,
133 value_span,
135 )
136 }
137}
138
139fn run_histogram(
140 values: Vec<Value>,
141 column_name: Option<Spanned<String>>,
142 freq_column: String,
143 calc_method: PercentageCalcMethod,
144 head_span: Span,
145 list_span: Span,
146) -> Result<PipelineData, ShellError> {
147 let mut inputs = vec![];
148 match column_name {
150 None => {
151 for v in values {
154 match v {
155 Value::Error { error, .. } => return Err(*error),
157 _ => {
158 let t = v.get_type();
159 let span = v.span();
160 inputs.push(HashableValue::from_value(v, head_span).map_err(|_| {
161 ShellError::UnsupportedInput { msg: "Since column-name was not provided, only lists of hashable values are supported.".to_string(), input: format!(
162 "input type: {t:?}"
163 ), msg_span: head_span, input_span: span }
164 })?)
165 }
166 }
167 }
168 }
169 Some(ref col) => {
170 let col_name = &col.item;
176 for v in values {
177 match v {
178 Value::Record { val, .. } => {
180 if let Some(v) = val.get(col_name) {
181 if let Ok(v) = HashableValue::from_value(v.clone(), head_span) {
182 inputs.push(v);
183 }
184 }
185 }
186 Value::Error { error, .. } => return Err(*error),
188 _ => continue,
189 }
190 }
191
192 if inputs.is_empty() {
193 return Err(ShellError::CantFindColumn {
194 col_name: col_name.clone(),
195 span: Some(head_span),
196 src_span: list_span,
197 });
198 }
199 }
200 }
201
202 let value_column_name = column_name
203 .map(|x| x.item)
204 .unwrap_or_else(|| "value".to_string());
205 Ok(histogram_impl(
206 inputs,
207 &value_column_name,
208 calc_method,
209 &freq_column,
210 head_span,
211 ))
212}
213
214fn histogram_impl(
215 inputs: Vec<HashableValue>,
216 value_column_name: &str,
217 calc_method: PercentageCalcMethod,
218 freq_column: &str,
219 span: Span,
220) -> PipelineData {
221 let mut counter = HashMap::new();
224 let mut max_cnt = 0;
225 let total_cnt = inputs.len();
226 for i in inputs {
227 let new_cnt = *counter.get(&i).unwrap_or(&0) + 1;
228 counter.insert(i, new_cnt);
229 if new_cnt > max_cnt {
230 max_cnt = new_cnt;
231 }
232 }
233
234 let mut result = vec![];
235 const MAX_FREQ_COUNT: f64 = 100.0;
236 for (val, count) in counter.into_iter().sorted() {
237 let quantile = match calc_method {
238 PercentageCalcMethod::Normalize => count as f64 / total_cnt as f64,
239 PercentageCalcMethod::Relative => count as f64 / max_cnt as f64,
240 };
241
242 let percentage = format!("{:.2}%", quantile * 100_f64);
243 let freq = "*".repeat((MAX_FREQ_COUNT * quantile).floor() as usize);
244
245 result.push((
246 count, Value::record(
248 record! {
249 value_column_name => val.into_value(),
250 "count" => Value::int(count, span),
251 "quantile" => Value::float(quantile, span),
252 "percentage" => Value::string(percentage, span),
253 freq_column => Value::string(freq, span),
254 },
255 span,
256 ),
257 ));
258 }
259 result.sort_by(|a, b| b.0.cmp(&a.0));
260 Value::list(result.into_iter().map(|x| x.1).collect(), span).into_pipeline_data()
261}
262
263#[cfg(test)]
264mod tests {
265 use super::*;
266
267 #[test]
268 fn test_examples() {
269 use crate::test_examples;
270
271 test_examples(Histogram)
272 }
273}