nu_command/filters/
group_by.rs

1use indexmap::IndexMap;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, IntoValue, engine::Closure};
4
5#[derive(Clone)]
6pub struct GroupBy;
7
8impl Command for GroupBy {
9    fn name(&self) -> &str {
10        "group-by"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("group-by")
15            .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
16            .switch(
17                "to-table",
18                "Return a table with \"groups\" and \"items\" columns",
19                None,
20            )
21            .rest(
22                "grouper",
23                SyntaxShape::OneOf(vec![
24                    SyntaxShape::CellPath,
25                    SyntaxShape::Closure(None),
26                    SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
27                ]),
28                "The path to the column to group on.",
29            )
30            .category(Category::Filters)
31    }
32
33    fn description(&self) -> &str {
34        "Splits a list or table into groups, and returns a record containing those groups."
35    }
36
37    fn extra_description(&self) -> &str {
38        r#"the group-by command makes some assumptions:
39    - if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
40    - datetime is formatted based on your configuration setting. use `format date` to change the format.
41    - filesize is formatted based on your configuration setting. use `format filesize` to change the format.
42    - some nushell values are not supported, such as closures."#
43    }
44
45    fn run(
46        &self,
47        engine_state: &EngineState,
48        stack: &mut Stack,
49        call: &Call,
50        input: PipelineData,
51    ) -> Result<PipelineData, ShellError> {
52        group_by(engine_state, stack, call, input)
53    }
54
55    fn examples(&self) -> Vec<Example<'_>> {
56        vec![
57            Example {
58                description: "Group items by the \"type\" column's values",
59                example: r#"ls | group-by type"#,
60                result: None,
61            },
62            Example {
63                description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column",
64                example: r#"open cool.json | group-by foo?"#,
65                result: None,
66            },
67            Example {
68                description: "Group using a block which is evaluated against each input value",
69                example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
70                result: Some(Value::test_record(record! {
71                    "txt" => Value::test_list(vec![
72                        Value::test_string("foo.txt"),
73                        Value::test_string("baz.txt"),
74                    ]),
75                    "csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
76                })),
77            },
78            Example {
79                description: "You can also group by raw values by leaving out the argument",
80                example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
81                result: Some(Value::test_record(record! {
82                    "1" => Value::test_list(vec![
83                        Value::test_string("1"),
84                        Value::test_string("1"),
85                        Value::test_string("1"),
86                        Value::test_string("1"),
87                    ]),
88                    "3" => Value::test_list(vec![
89                        Value::test_string("3"),
90                        Value::test_string("3"),
91                    ]),
92                    "2" => Value::test_list(vec![Value::test_string("2")]),
93                })),
94            },
95            Example {
96                description: "You can also output a table instead of a record",
97                example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
98                result: Some(Value::test_list(vec![
99                    Value::test_record(record! {
100                        "group" => Value::test_string("1"),
101                        "items" => Value::test_list(vec![
102                            Value::test_string("1"),
103                            Value::test_string("1"),
104                            Value::test_string("1"),
105                            Value::test_string("1"),
106                        ]),
107                    }),
108                    Value::test_record(record! {
109                        "group" => Value::test_string("3"),
110                        "items" => Value::test_list(vec![
111                            Value::test_string("3"),
112                            Value::test_string("3"),
113                        ]),
114                    }),
115                    Value::test_record(record! {
116                        "group" => Value::test_string("2"),
117                        "items" => Value::test_list(vec![Value::test_string("2")]),
118                    }),
119                ])),
120            },
121            Example {
122                description: "Group bools, whether they are strings or actual bools",
123                example: r#"[true "true" false "false"] | group-by"#,
124                result: Some(Value::test_record(record! {
125                    "true" => Value::test_list(vec![
126                        Value::test_bool(true),
127                        Value::test_string("true"),
128                    ]),
129                    "false" => Value::test_list(vec![
130                        Value::test_bool(false),
131                        Value::test_string("false"),
132                    ]),
133                })),
134            },
135            Example {
136                description: "Group items by multiple columns' values",
137                example: r#"[
138        [name, lang, year];
139        [andres, rb, "2019"],
140        [jt, rs, "2019"],
141        [storm, rs, "2021"]
142    ]
143    | group-by lang year"#,
144                result: Some(Value::test_record(record! {
145                    "rb" => Value::test_record(record! {
146                        "2019" => Value::test_list(
147                            vec![Value::test_record(record! {
148                                    "name" => Value::test_string("andres"),
149                                    "lang" => Value::test_string("rb"),
150                                    "year" => Value::test_string("2019"),
151                            })],
152                        ),
153                    }),
154                    "rs" => Value::test_record(record! {
155                            "2019" => Value::test_list(
156                                vec![Value::test_record(record! {
157                                        "name" => Value::test_string("jt"),
158                                        "lang" => Value::test_string("rs"),
159                                        "year" => Value::test_string("2019"),
160                                })],
161                            ),
162                            "2021" => Value::test_list(
163                                vec![Value::test_record(record! {
164                                        "name" => Value::test_string("storm"),
165                                        "lang" => Value::test_string("rs"),
166                                        "year" => Value::test_string("2021"),
167                                })],
168                            ),
169                    }),
170                })),
171            },
172            Example {
173                description: "Group items by multiple columns' values",
174                example: r#"[
175        [name, lang, year];
176        [andres, rb, "2019"],
177        [jt, rs, "2019"],
178        [storm, rs, "2021"]
179    ]
180    | group-by lang year --to-table"#,
181                result: Some(Value::test_list(vec![
182                    Value::test_record(record! {
183                        "lang" => Value::test_string("rb"),
184                        "year" => Value::test_string("2019"),
185                        "items" => Value::test_list(vec![
186                            Value::test_record(record! {
187                                "name" => Value::test_string("andres"),
188                                "lang" => Value::test_string("rb"),
189                                "year" => Value::test_string("2019"),
190                            })
191                        ]),
192                    }),
193                    Value::test_record(record! {
194                        "lang" => Value::test_string("rs"),
195                        "year" => Value::test_string("2019"),
196                        "items" => Value::test_list(vec![
197                            Value::test_record(record! {
198                                "name" => Value::test_string("jt"),
199                                "lang" => Value::test_string("rs"),
200                                "year" => Value::test_string("2019"),
201                            })
202                        ]),
203                    }),
204                    Value::test_record(record! {
205                        "lang" => Value::test_string("rs"),
206                        "year" => Value::test_string("2021"),
207                        "items" => Value::test_list(vec![
208                            Value::test_record(record! {
209                                "name" => Value::test_string("storm"),
210                                "lang" => Value::test_string("rs"),
211                                "year" => Value::test_string("2021"),
212                            })
213                        ]),
214                    }),
215                ])),
216            },
217        ]
218    }
219}
220
221pub fn group_by(
222    engine_state: &EngineState,
223    stack: &mut Stack,
224    call: &Call,
225    input: PipelineData,
226) -> Result<PipelineData, ShellError> {
227    let head = call.head;
228    let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
229    let to_table = call.has_flag(engine_state, stack, "to-table")?;
230    let config = engine_state.get_config();
231
232    let values: Vec<Value> = input.into_iter().collect();
233    if values.is_empty() {
234        let val = if to_table {
235            Value::list(Vec::new(), head)
236        } else {
237            Value::record(Record::new(), head)
238        };
239        return Ok(val.into_pipeline_data());
240    }
241
242    let grouped = match &groupers[..] {
243        [first, rest @ ..] => {
244            let mut grouped = Grouped::new(first.as_ref(), values, config, engine_state, stack)?;
245            for grouper in rest {
246                grouped.subgroup(grouper.as_ref(), config, engine_state, stack)?;
247            }
248            grouped
249        }
250        [] => Grouped::empty(values, config),
251    };
252
253    let value = if to_table {
254        let column_names = groupers_to_column_names(&groupers)?;
255        grouped.into_table(&column_names, head)
256    } else {
257        grouped.into_record(head)
258    };
259
260    Ok(value.into_pipeline_data())
261}
262
263fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
264    if groupers.is_empty() {
265        return Ok(vec!["group".into(), "items".into()]);
266    }
267
268    let mut closure_idx: usize = 0;
269    let grouper_names = groupers.iter().map(|grouper| {
270        grouper.as_ref().map(|item| match item {
271            Grouper::CellPath { val } => val.to_column_name(),
272            Grouper::Closure { .. } => {
273                closure_idx += 1;
274                format!("closure_{}", closure_idx - 1)
275            }
276        })
277    });
278
279    let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
280
281    for name in grouper_names {
282        if name.item == "items" {
283            return Err(ShellError::GenericError {
284                error: "grouper arguments can't be named `items`".into(),
285                msg: "here".into(),
286                span: Some(name.span),
287                help: Some("instead of a cell-path, try using a closure: { get items }".into()),
288                inner: vec![],
289            });
290        }
291
292        if let Some(conflicting_name) = name_set
293            .iter()
294            .find(|elem| elem.as_ref().item == name.item.as_str())
295        {
296            return Err(ShellError::GenericError {
297                error: "grouper arguments result in colliding column names".into(),
298                msg: "duplicate column names".into(),
299                span: Some(conflicting_name.span.append(name.span)),
300                help: Some(
301                    "instead of a cell-path, try using a closure or renaming columns".into(),
302                ),
303                inner: vec![ShellError::ColumnDefinedTwice {
304                    col_name: conflicting_name.item.clone(),
305                    first_use: conflicting_name.span,
306                    second_use: name.span,
307                }],
308            });
309        }
310
311        name_set.push(name);
312    }
313
314    let column_names: Vec<String> = name_set
315        .into_iter()
316        .map(|elem| elem.item)
317        .chain(["items".into()])
318        .collect();
319    Ok(column_names)
320}
321
322fn group_cell_path(
323    column_name: &CellPath,
324    values: Vec<Value>,
325    config: &nu_protocol::Config,
326) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
327    let mut groups = IndexMap::<_, Vec<_>>::new();
328
329    for value in values.into_iter() {
330        let key = value.follow_cell_path(&column_name.members)?;
331
332        if key.is_nothing() {
333            continue; // likely the result of a failed optional access, ignore this value
334        }
335
336        let key = key.to_abbreviated_string(config);
337        groups.entry(key).or_default().push(value);
338    }
339
340    Ok(groups)
341}
342
343fn group_closure(
344    values: Vec<Value>,
345    span: Span,
346    closure: Closure,
347    engine_state: &EngineState,
348    stack: &mut Stack,
349) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
350    let mut groups = IndexMap::<_, Vec<_>>::new();
351    let mut closure = ClosureEval::new(engine_state, stack, closure);
352    let config = engine_state.get_config();
353
354    for value in values {
355        let key = closure
356            .run_with_value(value.clone())?
357            .into_value(span)?
358            .to_abbreviated_string(config);
359
360        groups.entry(key).or_default().push(value);
361    }
362
363    Ok(groups)
364}
365
366enum Grouper {
367    CellPath { val: CellPath },
368    Closure { val: Box<Closure> },
369}
370
371impl FromValue for Grouper {
372    fn from_value(v: Value) -> Result<Self, ShellError> {
373        match v {
374            Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
375            Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
376            _ => Err(ShellError::TypeMismatch {
377                err_message: "unsupported grouper type".to_string(),
378                span: v.span(),
379            }),
380        }
381    }
382}
383
384struct Grouped {
385    groups: Tree,
386}
387
388enum Tree {
389    Leaf(IndexMap<String, Vec<Value>>),
390    Branch(IndexMap<String, Grouped>),
391}
392
393impl Grouped {
394    fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
395        let mut groups = IndexMap::<_, Vec<_>>::new();
396
397        for value in values.into_iter() {
398            let key = value.to_abbreviated_string(config);
399            groups.entry(key).or_default().push(value);
400        }
401
402        Self {
403            groups: Tree::Leaf(groups),
404        }
405    }
406
407    fn new(
408        grouper: Spanned<&Grouper>,
409        values: Vec<Value>,
410        config: &nu_protocol::Config,
411        engine_state: &EngineState,
412        stack: &mut Stack,
413    ) -> Result<Self, ShellError> {
414        let groups = match grouper.item {
415            Grouper::CellPath { val } => group_cell_path(val, values, config)?,
416            Grouper::Closure { val } => group_closure(
417                values,
418                grouper.span,
419                Closure::clone(val),
420                engine_state,
421                stack,
422            )?,
423        };
424        Ok(Self {
425            groups: Tree::Leaf(groups),
426        })
427    }
428
429    fn subgroup(
430        &mut self,
431        grouper: Spanned<&Grouper>,
432        config: &nu_protocol::Config,
433        engine_state: &EngineState,
434        stack: &mut Stack,
435    ) -> Result<(), ShellError> {
436        let groups = match &mut self.groups {
437            Tree::Leaf(groups) => std::mem::take(groups)
438                .into_iter()
439                .map(|(key, values)| -> Result<_, ShellError> {
440                    let leaf = Self::new(grouper, values, config, engine_state, stack)?;
441                    Ok((key, leaf))
442                })
443                .collect::<Result<IndexMap<_, _>, ShellError>>()?,
444            Tree::Branch(nested_groups) => {
445                let mut nested_groups = std::mem::take(nested_groups);
446                for v in nested_groups.values_mut() {
447                    v.subgroup(grouper, config, engine_state, stack)?;
448                }
449                nested_groups
450            }
451        };
452        self.groups = Tree::Branch(groups);
453        Ok(())
454    }
455
456    fn into_table(self, column_names: &[String], head: Span) -> Value {
457        self._into_table(head)
458            .into_iter()
459            .map(|row| {
460                row.into_iter()
461                    .rev()
462                    .zip(column_names)
463                    .map(|(val, key)| (key.clone(), val))
464                    .collect::<Record>()
465                    .into_value(head)
466            })
467            .collect::<Vec<_>>()
468            .into_value(head)
469    }
470
471    fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
472        match self.groups {
473            Tree::Leaf(leaf) => leaf
474                .into_iter()
475                .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
476                .collect::<Vec<Vec<Value>>>(),
477            Tree::Branch(branch) => branch
478                .into_iter()
479                .flat_map(|(group, items)| {
480                    let mut inner = items._into_table(head);
481                    for row in &mut inner {
482                        row.push(group.clone().into_value(head));
483                    }
484                    inner
485                })
486                .collect(),
487        }
488    }
489
490    fn into_record(self, head: Span) -> Value {
491        match self.groups {
492            Tree::Leaf(leaf) => Value::record(
493                leaf.into_iter()
494                    .map(|(k, v)| (k, v.into_value(head)))
495                    .collect(),
496                head,
497            ),
498            Tree::Branch(branch) => {
499                let values = branch
500                    .into_iter()
501                    .map(|(k, v)| (k, v.into_record(head)))
502                    .collect();
503                Value::record(values, head)
504            }
505        }
506    }
507}
508
509#[cfg(test)]
510mod test {
511    use super::*;
512
513    #[test]
514    fn test_examples() {
515        use crate::test_examples;
516
517        test_examples(GroupBy {})
518    }
519}