Skip to main content

nu_command/filters/
group_by.rs

1use indexmap::IndexMap;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, IntoValue, engine::Closure, shell_error::generic::GenericError};
4
5#[derive(Clone)]
6pub struct GroupBy;
7
8impl Command for GroupBy {
9    fn name(&self) -> &str {
10        "group-by"
11    }
12
13    fn signature(&self) -> Signature {
14        Signature::build("group-by")
15            .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
16            .switch(
17                "to-table",
18                "Return a table with \"groups\" and \"items\" columns.",
19                None,
20            )
21            .switch(
22                "prune",
23                "Remove a column after grouping, if applicable.",
24                None,
25            )
26            .rest(
27                "grouper",
28                SyntaxShape::OneOf(vec![
29                    SyntaxShape::CellPath,
30                    SyntaxShape::Closure(None),
31                    SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
32                ]),
33                "The path to the column to group on.",
34            )
35            .category(Category::Filters)
36    }
37
38    fn description(&self) -> &str {
39        "Splits a list or table into groups, and returns a record containing those groups."
40    }
41
42    fn extra_description(&self) -> &str {
43        r#"the group-by command makes some assumptions:
44    - if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
45    - datetime is formatted based on your configuration setting. use `format date` to change the format.
46    - filesize is formatted based on your configuration setting. use `format filesize` to change the format.
47    - some nushell values are not supported, such as closures."#
48    }
49
50    fn run(
51        &self,
52        engine_state: &EngineState,
53        stack: &mut Stack,
54        call: &Call,
55        input: PipelineData,
56    ) -> Result<PipelineData, ShellError> {
57        group_by(engine_state, stack, call, input)
58    }
59
60    fn examples(&self) -> Vec<Example<'_>> {
61        vec![
62            Example {
63                description: "Group items by the \"type\" column's values.",
64                example: "ls | group-by type",
65                result: None,
66            },
67            Example {
68                description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column.",
69                example: "open cool.json | group-by foo?",
70                result: None,
71            },
72            Example {
73                description: "Group using a block which is evaluated against each input value.",
74                example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
75                result: Some(Value::test_record(record! {
76                    "txt" => Value::test_list(vec![
77                        Value::test_string("foo.txt"),
78                        Value::test_string("baz.txt"),
79                    ]),
80                    "csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
81                })),
82            },
83            Example {
84                description: "You can also group by raw values by leaving out the argument.",
85                example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
86                result: Some(Value::test_record(record! {
87                    "1" => Value::test_list(vec![
88                        Value::test_string("1"),
89                        Value::test_string("1"),
90                        Value::test_string("1"),
91                        Value::test_string("1"),
92                    ]),
93                    "3" => Value::test_list(vec![
94                        Value::test_string("3"),
95                        Value::test_string("3"),
96                    ]),
97                    "2" => Value::test_list(vec![Value::test_string("2")]),
98                })),
99            },
100            Example {
101                description: "You can also output a table instead of a record.",
102                example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
103                result: Some(Value::test_list(vec![
104                    Value::test_record(record! {
105                        "group" => Value::test_string("1"),
106                        "items" => Value::test_list(vec![
107                            Value::test_string("1"),
108                            Value::test_string("1"),
109                            Value::test_string("1"),
110                            Value::test_string("1"),
111                        ]),
112                    }),
113                    Value::test_record(record! {
114                        "group" => Value::test_string("3"),
115                        "items" => Value::test_list(vec![
116                            Value::test_string("3"),
117                            Value::test_string("3"),
118                        ]),
119                    }),
120                    Value::test_record(record! {
121                        "group" => Value::test_string("2"),
122                        "items" => Value::test_list(vec![Value::test_string("2")]),
123                    }),
124                ])),
125            },
126            Example {
127                description: "Group bools, whether they are strings or actual bools.",
128                example: r#"[true "true" false "false"] | group-by"#,
129                result: Some(Value::test_record(record! {
130                    "true" => Value::test_list(vec![
131                        Value::test_bool(true),
132                        Value::test_string("true"),
133                    ]),
134                    "false" => Value::test_list(vec![
135                        Value::test_bool(false),
136                        Value::test_string("false"),
137                    ]),
138                })),
139            },
140            Example {
141                description: "Group items by multiple columns' values.",
142                example: r#"[
143        [name, lang, year];
144        [andres, rb, "2019"],
145        [jt, rs, "2019"],
146        [storm, rs, "2021"]
147    ]
148    | group-by lang year"#,
149                result: Some(Value::test_record(record! {
150                    "rb" => Value::test_record(record! {
151                        "2019" => Value::test_list(
152                            vec![Value::test_record(record! {
153                                    "name" => Value::test_string("andres"),
154                                    "lang" => Value::test_string("rb"),
155                                    "year" => Value::test_string("2019"),
156                            })],
157                        ),
158                    }),
159                    "rs" => Value::test_record(record! {
160                            "2019" => Value::test_list(
161                                vec![Value::test_record(record! {
162                                        "name" => Value::test_string("jt"),
163                                        "lang" => Value::test_string("rs"),
164                                        "year" => Value::test_string("2019"),
165                                })],
166                            ),
167                            "2021" => Value::test_list(
168                                vec![Value::test_record(record! {
169                                        "name" => Value::test_string("storm"),
170                                        "lang" => Value::test_string("rs"),
171                                        "year" => Value::test_string("2021"),
172                                })],
173                            ),
174                    }),
175                })),
176            },
177            Example {
178                description: "Group items by multiple columns' values.",
179                example: r#"[
180        [name, lang, year];
181        [andres, rb, "2019"],
182        [jt, rs, "2019"],
183        [storm, rs, "2021"]
184    ]
185    | group-by lang year --to-table"#,
186                result: Some(Value::test_list(vec![
187                    Value::test_record(record! {
188                        "lang" => Value::test_string("rb"),
189                        "year" => Value::test_string("2019"),
190                        "items" => Value::test_list(vec![
191                            Value::test_record(record! {
192                                "name" => Value::test_string("andres"),
193                                "lang" => Value::test_string("rb"),
194                                "year" => Value::test_string("2019"),
195                            })
196                        ]),
197                    }),
198                    Value::test_record(record! {
199                        "lang" => Value::test_string("rs"),
200                        "year" => Value::test_string("2019"),
201                        "items" => Value::test_list(vec![
202                            Value::test_record(record! {
203                                "name" => Value::test_string("jt"),
204                                "lang" => Value::test_string("rs"),
205                                "year" => Value::test_string("2019"),
206                            })
207                        ]),
208                    }),
209                    Value::test_record(record! {
210                        "lang" => Value::test_string("rs"),
211                        "year" => Value::test_string("2021"),
212                        "items" => Value::test_list(vec![
213                            Value::test_record(record! {
214                                "name" => Value::test_string("storm"),
215                                "lang" => Value::test_string("rs"),
216                                "year" => Value::test_string("2021"),
217                            })
218                        ]),
219                    }),
220                ])),
221            },
222            Example {
223                description: "Group items by column and delete the original.",
224                example: r#"[
225        [name, lang, year];
226        [andres, rb, "2019"],
227        [jt, rs, "2019"],
228        [storm, rs, "2021"]
229    ]
230    | group-by lang --prune"#,
231                #[cfg(test)] // Cannot test this example, it requires the nu-cmd-extra crate.
232                result: None,
233                #[cfg(not(test))]
234                result: Some(Value::test_record(record! {
235                        "rb" => Value::test_list(vec![Value::test_record(record! {
236                                        "name" => Value::test_string("andres"),
237                                        "year" => Value::test_string("2019"),
238                                })],
239                            ),
240                        "rs" => Value::test_list(
241                                    vec![
242                                    Value::test_record(record! {
243                                            "name" => Value::test_string("jt"),
244                                            "year" => Value::test_string("2019"),
245                                    }),
246                                    Value::test_record(record! {
247                                            "name" => Value::test_string("storm"),
248                                            "year" => Value::test_string("2021"),
249                                    })
250                            ]),
251                })),
252            },
253        ]
254    }
255}
256
257pub fn group_by(
258    engine_state: &EngineState,
259    stack: &mut Stack,
260    call: &Call,
261    input: PipelineData,
262) -> Result<PipelineData, ShellError> {
263    let head = call.head;
264    let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
265    let to_table = call.has_flag(engine_state, stack, "to-table")?;
266    let prune = call.has_flag(engine_state, stack, "prune")?;
267    let config = &stack.get_config(engine_state);
268
269    let values: Vec<Value> = input.into_iter().collect();
270    if values.is_empty() {
271        let val = if to_table {
272            Value::list(Vec::new(), head)
273        } else {
274            Value::record(Record::new(), head)
275        };
276        return Ok(val.into_pipeline_data());
277    }
278
279    let grouped = match &groupers[..] {
280        [first, rest @ ..] => {
281            let mut grouped =
282                Grouped::new(first.as_ref(), prune, values, config, engine_state, stack)?;
283            for grouper in rest {
284                grouped.subgroup(grouper.as_ref(), prune, config, engine_state, stack)?;
285            }
286            grouped
287        }
288        [] => Grouped::empty(values, config),
289    };
290
291    let value = if to_table {
292        let column_names = groupers_to_column_names(&groupers)?;
293        grouped.into_table(&column_names, head)
294    } else {
295        grouped.into_record(head)
296    };
297
298    Ok(value.into_pipeline_data())
299}
300
301fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
302    if groupers.is_empty() {
303        return Ok(vec!["group".into(), "items".into()]);
304    }
305
306    let mut closure_idx: usize = 0;
307    let grouper_names = groupers.iter().map(|grouper| {
308        grouper.as_ref().map(|item| match item {
309            Grouper::CellPath { val } => val.to_column_name(),
310            Grouper::Closure { .. } => {
311                closure_idx += 1;
312                format!("closure_{}", closure_idx - 1)
313            }
314        })
315    });
316
317    let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
318
319    for name in grouper_names {
320        if name.item == "items" {
321            return Err(ShellError::Generic(
322                GenericError::new(
323                    "grouper arguments can't be named `items`",
324                    "here",
325                    name.span,
326                )
327                .with_help("instead of a cell-path, try using a closure: { get items }"),
328            ));
329        }
330
331        if let Some(conflicting_name) = name_set
332            .iter()
333            .find(|elem| elem.as_ref().item == name.item.as_str())
334        {
335            return Err(ShellError::Generic(
336                GenericError::new(
337                    "grouper arguments result in colliding column names",
338                    "duplicate column names",
339                    conflicting_name.span.append(name.span),
340                )
341                .with_help("instead of a cell-path, try using a closure or renaming columns")
342                .with_inner([ShellError::ColumnDefinedTwice {
343                    col_name: conflicting_name.item.clone(),
344                    first_use: conflicting_name.span,
345                    second_use: name.span,
346                }]),
347            ));
348        }
349
350        name_set.push(name);
351    }
352
353    let column_names: Vec<String> = name_set
354        .into_iter()
355        .map(|elem| elem.item)
356        .chain(["items".into()])
357        .collect();
358    Ok(column_names)
359}
360
361fn group_cell_path(
362    column_name: &CellPath,
363    prune: bool,
364    values: Vec<Value>,
365    config: &nu_protocol::Config,
366) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
367    let mut groups = IndexMap::<_, Vec<_>>::new();
368
369    for mut value in values.into_iter() {
370        let key = value.follow_cell_path(&column_name.members)?;
371
372        if key.is_nothing() {
373            continue; // likely the result of a failed optional access, ignore this value
374        }
375
376        let key = key.to_expanded_string(", ", config);
377
378        if prune {
379            // it's okay if this fails since pruning is best-effort
380            let _ = value.remove_data_at_cell_path(&column_name.members);
381
382            // also try pruning parent, if it has now become empty
383            let parent = column_name.members.split_last().map(|(_, head)| head);
384
385            if let Some(parent) = parent
386                && let Ok(parent_value) = value.follow_cell_path(parent)
387                && parent_value.is_empty()
388            {
389                let _ = value.remove_data_at_cell_path(parent);
390            }
391        }
392
393        groups.entry(key).or_default().push(value);
394    }
395
396    Ok(groups)
397}
398
399fn group_closure(
400    values: Vec<Value>,
401    span: Span,
402    closure: Closure,
403    engine_state: &EngineState,
404    stack: &mut Stack,
405) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
406    let mut groups = IndexMap::<_, Vec<_>>::new();
407    let mut closure = ClosureEval::new(engine_state, stack, closure);
408    let config = &stack.get_config(engine_state);
409
410    for value in values {
411        let key = closure
412            .run_with_value(value.clone())?
413            .into_value(span)?
414            .to_expanded_string(", ", config);
415
416        groups.entry(key).or_default().push(value);
417    }
418
419    Ok(groups)
420}
421
422enum Grouper {
423    CellPath { val: CellPath },
424    Closure { val: Box<Closure> },
425}
426
427impl FromValue for Grouper {
428    fn from_value(v: Value) -> Result<Self, ShellError> {
429        match v {
430            Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
431            Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
432            _ => Err(ShellError::TypeMismatch {
433                err_message: "unsupported grouper type".to_string(),
434                span: v.span(),
435            }),
436        }
437    }
438}
439
440struct Grouped {
441    groups: Tree,
442}
443
444enum Tree {
445    Leaf(IndexMap<String, Vec<Value>>),
446    Branch(IndexMap<String, Grouped>),
447}
448
449impl Grouped {
450    fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
451        let mut groups = IndexMap::<_, Vec<_>>::new();
452
453        for value in values.into_iter() {
454            let key = value.to_expanded_string(", ", config);
455            groups.entry(key).or_default().push(value);
456        }
457
458        Self {
459            groups: Tree::Leaf(groups),
460        }
461    }
462
463    fn new(
464        grouper: Spanned<&Grouper>,
465        prune: bool,
466        values: Vec<Value>,
467        config: &nu_protocol::Config,
468        engine_state: &EngineState,
469        stack: &mut Stack,
470    ) -> Result<Self, ShellError> {
471        let groups = match grouper.item {
472            Grouper::CellPath { val } => group_cell_path(val, prune, values, config)?,
473            Grouper::Closure { val } => group_closure(
474                values,
475                grouper.span,
476                Closure::clone(val),
477                engine_state,
478                stack,
479            )?,
480        };
481        Ok(Self {
482            groups: Tree::Leaf(groups),
483        })
484    }
485
486    fn subgroup(
487        &mut self,
488        grouper: Spanned<&Grouper>,
489        prune: bool,
490        config: &nu_protocol::Config,
491        engine_state: &EngineState,
492        stack: &mut Stack,
493    ) -> Result<(), ShellError> {
494        let groups = match &mut self.groups {
495            Tree::Leaf(groups) => std::mem::take(groups)
496                .into_iter()
497                .map(|(key, values)| -> Result<_, ShellError> {
498                    let leaf = Self::new(grouper, prune, values, config, engine_state, stack)?;
499                    Ok((key, leaf))
500                })
501                .collect::<Result<IndexMap<_, _>, ShellError>>()?,
502            Tree::Branch(nested_groups) => {
503                let mut nested_groups = std::mem::take(nested_groups);
504                for v in nested_groups.values_mut() {
505                    v.subgroup(grouper, prune, config, engine_state, stack)?;
506                }
507                nested_groups
508            }
509        };
510        self.groups = Tree::Branch(groups);
511        Ok(())
512    }
513
514    fn into_table(self, column_names: &[String], head: Span) -> Value {
515        self._into_table(head)
516            .into_iter()
517            .map(|row| {
518                row.into_iter()
519                    .rev()
520                    .zip(column_names)
521                    .map(|(val, key)| (key.clone(), val))
522                    .collect::<Record>()
523                    .into_value(head)
524            })
525            .collect::<Vec<_>>()
526            .into_value(head)
527    }
528
529    fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
530        match self.groups {
531            Tree::Leaf(leaf) => leaf
532                .into_iter()
533                .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
534                .collect::<Vec<Vec<Value>>>(),
535            Tree::Branch(branch) => branch
536                .into_iter()
537                .flat_map(|(group, items)| {
538                    let mut inner = items._into_table(head);
539                    for row in &mut inner {
540                        row.push(group.clone().into_value(head));
541                    }
542                    inner
543                })
544                .collect(),
545        }
546    }
547
548    fn into_record(self, head: Span) -> Value {
549        match self.groups {
550            Tree::Leaf(leaf) => Value::record(
551                leaf.into_iter()
552                    .map(|(k, v)| (k, v.into_value(head)))
553                    .collect(),
554                head,
555            ),
556            Tree::Branch(branch) => {
557                let values = branch
558                    .into_iter()
559                    .map(|(k, v)| (k, v.into_record(head)))
560                    .collect();
561                Value::record(values, head)
562            }
563        }
564    }
565}
566
567#[cfg(test)]
568mod test {
569    use super::*;
570
571    #[test]
572    fn test_examples() -> nu_test_support::Result {
573        nu_test_support::test().examples(GroupBy)
574    }
575}