nu_command/filters/
group_by.rs

1use indexmap::IndexMap;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, IntoValue, engine::Closure};
4
5#[derive(Clone)]
6pub struct GroupBy;
7
8impl Command for GroupBy {
9    fn name(&self) -> &str {
10        "group-by"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("group-by")
15            .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
16            .switch(
17                "to-table",
18                "Return a table with \"groups\" and \"items\" columns",
19                None,
20            )
21            .rest(
22                "grouper",
23                SyntaxShape::OneOf(vec![
24                    SyntaxShape::CellPath,
25                    SyntaxShape::Closure(None),
26                    SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
27                ]),
28                "The path to the column to group on.",
29            )
30            .category(Category::Filters)
31    }
32
33    fn description(&self) -> &str {
34        "Splits a list or table into groups, and returns a record containing those groups."
35    }
36
37    fn extra_description(&self) -> &str {
38        r#"the group-by command makes some assumptions:
39    - if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
40    - datetime is formatted based on your configuration setting. use `format date` to change the format.
41    - filesize is formatted based on your configuration setting. use `format filesize` to change the format.
42    - some nushell values are not supported, such as closures."#
43    }
44
45    fn run(
46        &self,
47        engine_state: &EngineState,
48        stack: &mut Stack,
49        call: &Call,
50        input: PipelineData,
51    ) -> Result<PipelineData, ShellError> {
52        group_by(engine_state, stack, call, input)
53    }
54
55    fn examples(&self) -> Vec<Example<'_>> {
56        vec![
57            Example {
58                description: "Group items by the \"type\" column's values",
59                example: r#"ls | group-by type"#,
60                result: None,
61            },
62            Example {
63                description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column",
64                example: r#"open cool.json | group-by foo?"#,
65                result: None,
66            },
67            Example {
68                description: "Group using a block which is evaluated against each input value",
69                example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
70                result: Some(Value::test_record(record! {
71                    "txt" => Value::test_list(vec![
72                        Value::test_string("foo.txt"),
73                        Value::test_string("baz.txt"),
74                    ]),
75                    "csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
76                })),
77            },
78            Example {
79                description: "You can also group by raw values by leaving out the argument",
80                example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
81                result: Some(Value::test_record(record! {
82                    "1" => Value::test_list(vec![
83                        Value::test_string("1"),
84                        Value::test_string("1"),
85                        Value::test_string("1"),
86                        Value::test_string("1"),
87                    ]),
88                    "3" => Value::test_list(vec![
89                        Value::test_string("3"),
90                        Value::test_string("3"),
91                    ]),
92                    "2" => Value::test_list(vec![Value::test_string("2")]),
93                })),
94            },
95            Example {
96                description: "You can also output a table instead of a record",
97                example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
98                result: Some(Value::test_list(vec![
99                    Value::test_record(record! {
100                        "group" => Value::test_string("1"),
101                        "items" => Value::test_list(vec![
102                            Value::test_string("1"),
103                            Value::test_string("1"),
104                            Value::test_string("1"),
105                            Value::test_string("1"),
106                        ]),
107                    }),
108                    Value::test_record(record! {
109                        "group" => Value::test_string("3"),
110                        "items" => Value::test_list(vec![
111                            Value::test_string("3"),
112                            Value::test_string("3"),
113                        ]),
114                    }),
115                    Value::test_record(record! {
116                        "group" => Value::test_string("2"),
117                        "items" => Value::test_list(vec![Value::test_string("2")]),
118                    }),
119                ])),
120            },
121            Example {
122                description: "Group bools, whether they are strings or actual bools",
123                example: r#"[true "true" false "false"] | group-by"#,
124                result: Some(Value::test_record(record! {
125                    "true" => Value::test_list(vec![
126                        Value::test_bool(true),
127                        Value::test_string("true"),
128                    ]),
129                    "false" => Value::test_list(vec![
130                        Value::test_bool(false),
131                        Value::test_string("false"),
132                    ]),
133                })),
134            },
135            Example {
136                description: "Group items by multiple columns' values",
137                example: r#"[
138        [name, lang, year];
139        [andres, rb, "2019"],
140        [jt, rs, "2019"],
141        [storm, rs, "2021"]
142    ]
143    | group-by lang year"#,
144                result: Some(Value::test_record(record! {
145                    "rb" => Value::test_record(record! {
146                        "2019" => Value::test_list(
147                            vec![Value::test_record(record! {
148                                    "name" => Value::test_string("andres"),
149                                    "lang" => Value::test_string("rb"),
150                                    "year" => Value::test_string("2019"),
151                            })],
152                        ),
153                    }),
154                    "rs" => Value::test_record(record! {
155                            "2019" => Value::test_list(
156                                vec![Value::test_record(record! {
157                                        "name" => Value::test_string("jt"),
158                                        "lang" => Value::test_string("rs"),
159                                        "year" => Value::test_string("2019"),
160                                })],
161                            ),
162                            "2021" => Value::test_list(
163                                vec![Value::test_record(record! {
164                                        "name" => Value::test_string("storm"),
165                                        "lang" => Value::test_string("rs"),
166                                        "year" => Value::test_string("2021"),
167                                })],
168                            ),
169                    }),
170                })),
171            },
172            Example {
173                description: "Group items by multiple columns' values",
174                example: r#"[
175        [name, lang, year];
176        [andres, rb, "2019"],
177        [jt, rs, "2019"],
178        [storm, rs, "2021"]
179    ]
180    | group-by lang year --to-table"#,
181                result: Some(Value::test_list(vec![
182                    Value::test_record(record! {
183                        "lang" => Value::test_string("rb"),
184                        "year" => Value::test_string("2019"),
185                        "items" => Value::test_list(vec![
186                            Value::test_record(record! {
187                                "name" => Value::test_string("andres"),
188                                "lang" => Value::test_string("rb"),
189                                "year" => Value::test_string("2019"),
190                            })
191                        ]),
192                    }),
193                    Value::test_record(record! {
194                        "lang" => Value::test_string("rs"),
195                        "year" => Value::test_string("2019"),
196                        "items" => Value::test_list(vec![
197                            Value::test_record(record! {
198                                "name" => Value::test_string("jt"),
199                                "lang" => Value::test_string("rs"),
200                                "year" => Value::test_string("2019"),
201                            })
202                        ]),
203                    }),
204                    Value::test_record(record! {
205                        "lang" => Value::test_string("rs"),
206                        "year" => Value::test_string("2021"),
207                        "items" => Value::test_list(vec![
208                            Value::test_record(record! {
209                                "name" => Value::test_string("storm"),
210                                "lang" => Value::test_string("rs"),
211                                "year" => Value::test_string("2021"),
212                            })
213                        ]),
214                    }),
215                ])),
216            },
217            Example {
218                description: "Group items by column and delete the original",
219                example: r#"[
220        [name, lang, year];
221        [andres, rb, "2019"],
222        [jt, rs, "2019"],
223        [storm, rs, "2021"]
224    ]
225    | group-by lang | update cells { reject lang }"#,
226                #[cfg(test)] // Cannot test this example, it requires the nu-cmd-extra crate.
227                result: None,
228                #[cfg(not(test))]
229                result: Some(Value::test_record(record! {
230                        "rb" => Value::test_list(vec![Value::test_record(record! {
231                                        "name" => Value::test_string("andres"),
232                                        "year" => Value::test_string("2019"),
233                                })],
234                            ),
235                        "rs" => Value::test_list(
236                                    vec![
237                                    Value::test_record(record! {
238                                            "name" => Value::test_string("jt"),
239                                            "year" => Value::test_string("2019"),
240                                    }),
241                                    Value::test_record(record! {
242                                            "name" => Value::test_string("storm"),
243                                            "year" => Value::test_string("2021"),
244                                    })
245                            ]),
246                })),
247            },
248        ]
249    }
250}
251
252pub fn group_by(
253    engine_state: &EngineState,
254    stack: &mut Stack,
255    call: &Call,
256    input: PipelineData,
257) -> Result<PipelineData, ShellError> {
258    let head = call.head;
259    let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
260    let to_table = call.has_flag(engine_state, stack, "to-table")?;
261    let config = engine_state.get_config();
262
263    let values: Vec<Value> = input.into_iter().collect();
264    if values.is_empty() {
265        let val = if to_table {
266            Value::list(Vec::new(), head)
267        } else {
268            Value::record(Record::new(), head)
269        };
270        return Ok(val.into_pipeline_data());
271    }
272
273    let grouped = match &groupers[..] {
274        [first, rest @ ..] => {
275            let mut grouped = Grouped::new(first.as_ref(), values, config, engine_state, stack)?;
276            for grouper in rest {
277                grouped.subgroup(grouper.as_ref(), config, engine_state, stack)?;
278            }
279            grouped
280        }
281        [] => Grouped::empty(values, config),
282    };
283
284    let value = if to_table {
285        let column_names = groupers_to_column_names(&groupers)?;
286        grouped.into_table(&column_names, head)
287    } else {
288        grouped.into_record(head)
289    };
290
291    Ok(value.into_pipeline_data())
292}
293
294fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
295    if groupers.is_empty() {
296        return Ok(vec!["group".into(), "items".into()]);
297    }
298
299    let mut closure_idx: usize = 0;
300    let grouper_names = groupers.iter().map(|grouper| {
301        grouper.as_ref().map(|item| match item {
302            Grouper::CellPath { val } => val.to_column_name(),
303            Grouper::Closure { .. } => {
304                closure_idx += 1;
305                format!("closure_{}", closure_idx - 1)
306            }
307        })
308    });
309
310    let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
311
312    for name in grouper_names {
313        if name.item == "items" {
314            return Err(ShellError::GenericError {
315                error: "grouper arguments can't be named `items`".into(),
316                msg: "here".into(),
317                span: Some(name.span),
318                help: Some("instead of a cell-path, try using a closure: { get items }".into()),
319                inner: vec![],
320            });
321        }
322
323        if let Some(conflicting_name) = name_set
324            .iter()
325            .find(|elem| elem.as_ref().item == name.item.as_str())
326        {
327            return Err(ShellError::GenericError {
328                error: "grouper arguments result in colliding column names".into(),
329                msg: "duplicate column names".into(),
330                span: Some(conflicting_name.span.append(name.span)),
331                help: Some(
332                    "instead of a cell-path, try using a closure or renaming columns".into(),
333                ),
334                inner: vec![ShellError::ColumnDefinedTwice {
335                    col_name: conflicting_name.item.clone(),
336                    first_use: conflicting_name.span,
337                    second_use: name.span,
338                }],
339            });
340        }
341
342        name_set.push(name);
343    }
344
345    let column_names: Vec<String> = name_set
346        .into_iter()
347        .map(|elem| elem.item)
348        .chain(["items".into()])
349        .collect();
350    Ok(column_names)
351}
352
353fn group_cell_path(
354    column_name: &CellPath,
355    values: Vec<Value>,
356    config: &nu_protocol::Config,
357) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
358    let mut groups = IndexMap::<_, Vec<_>>::new();
359
360    for value in values.into_iter() {
361        let key = value.follow_cell_path(&column_name.members)?;
362
363        if key.is_nothing() {
364            continue; // likely the result of a failed optional access, ignore this value
365        }
366
367        let key = key.to_abbreviated_string(config);
368        groups.entry(key).or_default().push(value);
369    }
370
371    Ok(groups)
372}
373
374fn group_closure(
375    values: Vec<Value>,
376    span: Span,
377    closure: Closure,
378    engine_state: &EngineState,
379    stack: &mut Stack,
380) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
381    let mut groups = IndexMap::<_, Vec<_>>::new();
382    let mut closure = ClosureEval::new(engine_state, stack, closure);
383    let config = engine_state.get_config();
384
385    for value in values {
386        let key = closure
387            .run_with_value(value.clone())?
388            .into_value(span)?
389            .to_abbreviated_string(config);
390
391        groups.entry(key).or_default().push(value);
392    }
393
394    Ok(groups)
395}
396
397enum Grouper {
398    CellPath { val: CellPath },
399    Closure { val: Box<Closure> },
400}
401
402impl FromValue for Grouper {
403    fn from_value(v: Value) -> Result<Self, ShellError> {
404        match v {
405            Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
406            Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
407            _ => Err(ShellError::TypeMismatch {
408                err_message: "unsupported grouper type".to_string(),
409                span: v.span(),
410            }),
411        }
412    }
413}
414
415struct Grouped {
416    groups: Tree,
417}
418
419enum Tree {
420    Leaf(IndexMap<String, Vec<Value>>),
421    Branch(IndexMap<String, Grouped>),
422}
423
424impl Grouped {
425    fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
426        let mut groups = IndexMap::<_, Vec<_>>::new();
427
428        for value in values.into_iter() {
429            let key = value.to_abbreviated_string(config);
430            groups.entry(key).or_default().push(value);
431        }
432
433        Self {
434            groups: Tree::Leaf(groups),
435        }
436    }
437
438    fn new(
439        grouper: Spanned<&Grouper>,
440        values: Vec<Value>,
441        config: &nu_protocol::Config,
442        engine_state: &EngineState,
443        stack: &mut Stack,
444    ) -> Result<Self, ShellError> {
445        let groups = match grouper.item {
446            Grouper::CellPath { val } => group_cell_path(val, values, config)?,
447            Grouper::Closure { val } => group_closure(
448                values,
449                grouper.span,
450                Closure::clone(val),
451                engine_state,
452                stack,
453            )?,
454        };
455        Ok(Self {
456            groups: Tree::Leaf(groups),
457        })
458    }
459
460    fn subgroup(
461        &mut self,
462        grouper: Spanned<&Grouper>,
463        config: &nu_protocol::Config,
464        engine_state: &EngineState,
465        stack: &mut Stack,
466    ) -> Result<(), ShellError> {
467        let groups = match &mut self.groups {
468            Tree::Leaf(groups) => std::mem::take(groups)
469                .into_iter()
470                .map(|(key, values)| -> Result<_, ShellError> {
471                    let leaf = Self::new(grouper, values, config, engine_state, stack)?;
472                    Ok((key, leaf))
473                })
474                .collect::<Result<IndexMap<_, _>, ShellError>>()?,
475            Tree::Branch(nested_groups) => {
476                let mut nested_groups = std::mem::take(nested_groups);
477                for v in nested_groups.values_mut() {
478                    v.subgroup(grouper, config, engine_state, stack)?;
479                }
480                nested_groups
481            }
482        };
483        self.groups = Tree::Branch(groups);
484        Ok(())
485    }
486
487    fn into_table(self, column_names: &[String], head: Span) -> Value {
488        self._into_table(head)
489            .into_iter()
490            .map(|row| {
491                row.into_iter()
492                    .rev()
493                    .zip(column_names)
494                    .map(|(val, key)| (key.clone(), val))
495                    .collect::<Record>()
496                    .into_value(head)
497            })
498            .collect::<Vec<_>>()
499            .into_value(head)
500    }
501
502    fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
503        match self.groups {
504            Tree::Leaf(leaf) => leaf
505                .into_iter()
506                .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
507                .collect::<Vec<Vec<Value>>>(),
508            Tree::Branch(branch) => branch
509                .into_iter()
510                .flat_map(|(group, items)| {
511                    let mut inner = items._into_table(head);
512                    for row in &mut inner {
513                        row.push(group.clone().into_value(head));
514                    }
515                    inner
516                })
517                .collect(),
518        }
519    }
520
521    fn into_record(self, head: Span) -> Value {
522        match self.groups {
523            Tree::Leaf(leaf) => Value::record(
524                leaf.into_iter()
525                    .map(|(k, v)| (k, v.into_value(head)))
526                    .collect(),
527                head,
528            ),
529            Tree::Branch(branch) => {
530                let values = branch
531                    .into_iter()
532                    .map(|(k, v)| (k, v.into_record(head)))
533                    .collect();
534                Value::record(values, head)
535            }
536        }
537    }
538}
539
540#[cfg(test)]
541mod test {
542    use super::*;
543
544    #[test]
545    fn test_examples() {
546        use crate::test_examples;
547
548        test_examples(GroupBy {})
549    }
550}