1use super::hashable_value::HashableValue;
2use itertools::Itertools;
3use nu_engine::command_prelude::*;
4
5use std::collections::HashMap;
6
7#[derive(Clone)]
8pub struct Histogram;
9
10enum PercentageCalcMethod {
11 Normalize,
12 Relative,
13}
14
15impl Command for Histogram {
16 fn name(&self) -> &str {
17 "histogram"
18 }
19
20 fn signature(&self) -> Signature {
21 Signature::build("histogram")
22 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::table())])
23 .optional(
24 "column-name",
25 SyntaxShape::String,
26 "Column name to calc frequency, no need to provide if input is a list.",
27 )
28 .optional(
29 "frequency-column-name",
30 SyntaxShape::String,
31 "Histogram's frequency column, default to be frequency column output.",
32 )
33 .param(
34 Flag::new("percentage-type")
35 .short('t')
36 .arg(SyntaxShape::String)
37 .desc(
38 "percentage calculate method, can be 'normalize' or 'relative', in \
39 'normalize', defaults to be 'normalize'",
40 )
41 .completion(Completion::new_list(&["normalize", "relative"])),
42 )
43 .category(Category::Chart)
44 }
45
46 fn description(&self) -> &str {
47 "Creates a new table with a histogram based on the column name passed in."
48 }
49
50 fn examples(&self) -> Vec<Example<'_>> {
51 vec![
52 Example {
53 description: "Compute a histogram of file types",
54 example: "ls | histogram type",
55 result: None,
56 },
57 Example {
58 description: "Compute a histogram for the types of files, with frequency column \
59 named freq",
60 example: "ls | histogram type freq",
61 result: None,
62 },
63 Example {
64 description: "Compute a histogram for a list of numbers",
65 example: "[1 2 1] | histogram",
66 result: Some(Value::test_list(vec![
67 Value::test_record(record! {
68 "value" => Value::test_int(1),
69 "count" => Value::test_int(2),
70 "quantile" => Value::test_float(0.6666666666666666),
71 "percentage" => Value::test_string("66.67%"),
72 "frequency" => Value::test_string("******************************************************************"),
73 }),
74 Value::test_record(record! {
75 "value" => Value::test_int(2),
76 "count" => Value::test_int(1),
77 "quantile" => Value::test_float(0.3333333333333333),
78 "percentage" => Value::test_string("33.33%"),
79 "frequency" => Value::test_string("*********************************"),
80 }),
81 ])),
82 },
83 Example {
84 description: "Compute a histogram for a list of numbers, and percentage is based \
85 on the maximum value",
86 example: "[1 2 3 1 1 1 2 2 1 1] | histogram --percentage-type relative",
87 result: None,
88 },
89 ]
90 }
91
92 fn run(
93 &self,
94 engine_state: &EngineState,
95 stack: &mut Stack,
96 call: &Call,
97 input: PipelineData,
98 ) -> Result<PipelineData, ShellError> {
99 let column_name: Option<Spanned<String>> = call.opt(engine_state, stack, 0)?;
101 let frequency_name_arg = call.opt::<Spanned<String>>(engine_state, stack, 1)?;
102 let frequency_column_name = match frequency_name_arg {
103 Some(inner) => {
104 let forbidden_column_names = ["value", "count", "quantile", "percentage"];
105 if forbidden_column_names.contains(&inner.item.as_str()) {
106 return Err(ShellError::TypeMismatch {
107 err_message: format!(
108 "frequency-column-name can't be {}",
109 forbidden_column_names
110 .iter()
111 .map(|val| format!("'{val}'"))
112 .collect::<Vec<_>>()
113 .join(", ")
114 ),
115 span: inner.span,
116 });
117 }
118 inner.item
119 }
120 None => "frequency".to_string(),
121 };
122
123 let calc_method: Option<Spanned<String>> =
124 call.get_flag(engine_state, stack, "percentage-type")?;
125 let calc_method = match calc_method {
126 None => PercentageCalcMethod::Normalize,
127 Some(inner) => match inner.item.as_str() {
128 "normalize" => PercentageCalcMethod::Normalize,
129 "relative" => PercentageCalcMethod::Relative,
130 _ => {
131 return Err(ShellError::TypeMismatch {
132 err_message: "calc method can only be 'normalize' or 'relative'"
133 .to_string(),
134 span: inner.span,
135 });
136 }
137 },
138 };
139
140 let span = call.head;
141 let data_as_value = input.into_value(span)?;
142 let value_span = data_as_value.span();
143 run_histogram(
145 data_as_value.into_list()?,
146 column_name,
147 frequency_column_name,
148 calc_method,
149 span,
150 value_span,
152 )
153 }
154}
155
156fn run_histogram(
157 values: Vec<Value>,
158 column_name: Option<Spanned<String>>,
159 freq_column: String,
160 calc_method: PercentageCalcMethod,
161 head_span: Span,
162 list_span: Span,
163) -> Result<PipelineData, ShellError> {
164 let mut inputs = vec![];
165 match column_name {
167 None => {
168 for v in values {
171 match v {
172 Value::Error { error, .. } => return Err(*error),
174 _ => {
175 let t = v.get_type();
176 let span = v.span();
177 inputs.push(HashableValue::from_value(v, head_span).map_err(|_| {
178 ShellError::UnsupportedInput {
179 msg: "Since column-name was not provided, only lists of hashable \
180 values are supported."
181 .to_string(),
182 input: format!("input type: {t:?}"),
183 msg_span: head_span,
184 input_span: span,
185 }
186 })?)
187 }
188 }
189 }
190 }
191 Some(ref col) => {
192 let col_name = &col.item;
198 for v in values {
199 match v {
200 Value::Record { val, .. } => {
202 if let Some(v) = val.get(col_name)
203 && let Ok(v) = HashableValue::from_value(v.clone(), head_span)
204 {
205 inputs.push(v);
206 }
207 }
208 Value::Error { error, .. } => return Err(*error),
210 _ => continue,
211 }
212 }
213
214 if inputs.is_empty() {
215 return Err(ShellError::CantFindColumn {
216 col_name: col_name.clone(),
217 span: Some(head_span),
218 src_span: list_span,
219 });
220 }
221 }
222 }
223
224 let forbidden_colun_names = ["count", "quantile", "percentage", freq_column.as_str()];
225 let value_column_name = column_name
226 .map(|x| x.item)
227 .map(|name| {
228 if forbidden_colun_names.contains(&name.as_str()) {
229 "value".to_string()
230 } else {
231 name
232 }
233 })
234 .unwrap_or_else(|| "value".to_string());
235
236 Ok(histogram_impl(
237 inputs,
238 &value_column_name,
239 calc_method,
240 &freq_column,
241 head_span,
242 ))
243}
244
245fn histogram_impl(
246 inputs: Vec<HashableValue>,
247 value_column_name: &str,
248 calc_method: PercentageCalcMethod,
249 freq_column: &str,
250 span: Span,
251) -> PipelineData {
252 let mut counter = HashMap::new();
255 let mut max_cnt = 0;
256 let total_cnt = inputs.len();
257 for i in inputs {
258 let new_cnt = *counter.get(&i).unwrap_or(&0) + 1;
259 counter.insert(i, new_cnt);
260 if new_cnt > max_cnt {
261 max_cnt = new_cnt;
262 }
263 }
264
265 let mut result = vec![];
266 const MAX_FREQ_COUNT: f64 = 100.0;
267 for (val, count) in counter.into_iter().sorted() {
268 let quantile = match calc_method {
269 PercentageCalcMethod::Normalize => count as f64 / total_cnt as f64,
270 PercentageCalcMethod::Relative => count as f64 / max_cnt as f64,
271 };
272
273 let percentage = format!("{:.2}%", quantile * 100_f64);
274 let freq = "*".repeat((MAX_FREQ_COUNT * quantile).floor() as usize);
275
276 result.push((
277 count, Value::record(
279 record! {
280 value_column_name => val.into_value(),
281 "count" => Value::int(count, span),
282 "quantile" => Value::float(quantile, span),
283 "percentage" => Value::string(percentage, span),
284 freq_column => Value::string(freq, span),
285 },
286 span,
287 ),
288 ));
289 }
290 result.sort_by(|a, b| b.0.cmp(&a.0));
291 Value::list(result.into_iter().map(|x| x.1).collect(), span).into_pipeline_data()
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297
298 #[test]
299 fn test_examples() -> nu_test_support::Result {
300 nu_test_support::test().examples(Histogram)
301 }
302}