1use indexmap::IndexMap;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, IntoValue, engine::Closure, shell_error::generic::GenericError};
4
5#[derive(Clone)]
6pub struct GroupBy;
7
8impl Command for GroupBy {
9 fn name(&self) -> &str {
10 "group-by"
11 }
12
13 fn signature(&self) -> Signature {
14 Signature::build("group-by")
15 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
16 .switch(
17 "to-table",
18 "Return a table with \"groups\" and \"items\" columns.",
19 None,
20 )
21 .switch(
22 "prune",
23 "Remove a column after grouping, if applicable.",
24 None,
25 )
26 .rest(
27 "grouper",
28 SyntaxShape::OneOf(vec![
29 SyntaxShape::CellPath,
30 SyntaxShape::Closure(None),
31 SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
32 ]),
33 "The path to the column to group on.",
34 )
35 .category(Category::Filters)
36 }
37
38 fn description(&self) -> &str {
39 "Splits a list or table into groups, and returns a record containing those groups."
40 }
41
42 fn extra_description(&self) -> &str {
43 r#"the group-by command makes some assumptions:
44 - if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
45 - datetime is formatted based on your configuration setting. use `format date` to change the format.
46 - filesize is formatted based on your configuration setting. use `format filesize` to change the format.
47 - some nushell values are not supported, such as closures."#
48 }
49
50 fn run(
51 &self,
52 engine_state: &EngineState,
53 stack: &mut Stack,
54 call: &Call,
55 input: PipelineData,
56 ) -> Result<PipelineData, ShellError> {
57 group_by(engine_state, stack, call, input)
58 }
59
60 fn examples(&self) -> Vec<Example<'_>> {
61 vec![
62 Example {
63 description: "Group items by the \"type\" column's values.",
64 example: "ls | group-by type",
65 result: None,
66 },
67 Example {
68 description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column.",
69 example: "open cool.json | group-by foo?",
70 result: None,
71 },
72 Example {
73 description: "Group using a block which is evaluated against each input value.",
74 example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
75 result: Some(Value::test_record(record! {
76 "txt" => Value::test_list(vec![
77 Value::test_string("foo.txt"),
78 Value::test_string("baz.txt"),
79 ]),
80 "csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
81 })),
82 },
83 Example {
84 description: "You can also group by raw values by leaving out the argument.",
85 example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
86 result: Some(Value::test_record(record! {
87 "1" => Value::test_list(vec![
88 Value::test_string("1"),
89 Value::test_string("1"),
90 Value::test_string("1"),
91 Value::test_string("1"),
92 ]),
93 "3" => Value::test_list(vec![
94 Value::test_string("3"),
95 Value::test_string("3"),
96 ]),
97 "2" => Value::test_list(vec![Value::test_string("2")]),
98 })),
99 },
100 Example {
101 description: "You can also output a table instead of a record.",
102 example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
103 result: Some(Value::test_list(vec![
104 Value::test_record(record! {
105 "group" => Value::test_string("1"),
106 "items" => Value::test_list(vec![
107 Value::test_string("1"),
108 Value::test_string("1"),
109 Value::test_string("1"),
110 Value::test_string("1"),
111 ]),
112 }),
113 Value::test_record(record! {
114 "group" => Value::test_string("3"),
115 "items" => Value::test_list(vec![
116 Value::test_string("3"),
117 Value::test_string("3"),
118 ]),
119 }),
120 Value::test_record(record! {
121 "group" => Value::test_string("2"),
122 "items" => Value::test_list(vec![Value::test_string("2")]),
123 }),
124 ])),
125 },
126 Example {
127 description: "Group bools, whether they are strings or actual bools.",
128 example: r#"[true "true" false "false"] | group-by"#,
129 result: Some(Value::test_record(record! {
130 "true" => Value::test_list(vec![
131 Value::test_bool(true),
132 Value::test_string("true"),
133 ]),
134 "false" => Value::test_list(vec![
135 Value::test_bool(false),
136 Value::test_string("false"),
137 ]),
138 })),
139 },
140 Example {
141 description: "Group items by multiple columns' values.",
142 example: r#"[
143 [name, lang, year];
144 [andres, rb, "2019"],
145 [jt, rs, "2019"],
146 [storm, rs, "2021"]
147 ]
148 | group-by lang year"#,
149 result: Some(Value::test_record(record! {
150 "rb" => Value::test_record(record! {
151 "2019" => Value::test_list(
152 vec![Value::test_record(record! {
153 "name" => Value::test_string("andres"),
154 "lang" => Value::test_string("rb"),
155 "year" => Value::test_string("2019"),
156 })],
157 ),
158 }),
159 "rs" => Value::test_record(record! {
160 "2019" => Value::test_list(
161 vec![Value::test_record(record! {
162 "name" => Value::test_string("jt"),
163 "lang" => Value::test_string("rs"),
164 "year" => Value::test_string("2019"),
165 })],
166 ),
167 "2021" => Value::test_list(
168 vec![Value::test_record(record! {
169 "name" => Value::test_string("storm"),
170 "lang" => Value::test_string("rs"),
171 "year" => Value::test_string("2021"),
172 })],
173 ),
174 }),
175 })),
176 },
177 Example {
178 description: "Group items by multiple columns' values.",
179 example: r#"[
180 [name, lang, year];
181 [andres, rb, "2019"],
182 [jt, rs, "2019"],
183 [storm, rs, "2021"]
184 ]
185 | group-by lang year --to-table"#,
186 result: Some(Value::test_list(vec![
187 Value::test_record(record! {
188 "lang" => Value::test_string("rb"),
189 "year" => Value::test_string("2019"),
190 "items" => Value::test_list(vec![
191 Value::test_record(record! {
192 "name" => Value::test_string("andres"),
193 "lang" => Value::test_string("rb"),
194 "year" => Value::test_string("2019"),
195 })
196 ]),
197 }),
198 Value::test_record(record! {
199 "lang" => Value::test_string("rs"),
200 "year" => Value::test_string("2019"),
201 "items" => Value::test_list(vec![
202 Value::test_record(record! {
203 "name" => Value::test_string("jt"),
204 "lang" => Value::test_string("rs"),
205 "year" => Value::test_string("2019"),
206 })
207 ]),
208 }),
209 Value::test_record(record! {
210 "lang" => Value::test_string("rs"),
211 "year" => Value::test_string("2021"),
212 "items" => Value::test_list(vec![
213 Value::test_record(record! {
214 "name" => Value::test_string("storm"),
215 "lang" => Value::test_string("rs"),
216 "year" => Value::test_string("2021"),
217 })
218 ]),
219 }),
220 ])),
221 },
222 Example {
223 description: "Group items by column and delete the original.",
224 example: r#"[
225 [name, lang, year];
226 [andres, rb, "2019"],
227 [jt, rs, "2019"],
228 [storm, rs, "2021"]
229 ]
230 | group-by lang --prune"#,
231 #[cfg(test)] result: None,
233 #[cfg(not(test))]
234 result: Some(Value::test_record(record! {
235 "rb" => Value::test_list(vec![Value::test_record(record! {
236 "name" => Value::test_string("andres"),
237 "year" => Value::test_string("2019"),
238 })],
239 ),
240 "rs" => Value::test_list(
241 vec![
242 Value::test_record(record! {
243 "name" => Value::test_string("jt"),
244 "year" => Value::test_string("2019"),
245 }),
246 Value::test_record(record! {
247 "name" => Value::test_string("storm"),
248 "year" => Value::test_string("2021"),
249 })
250 ]),
251 })),
252 },
253 ]
254 }
255}
256
257pub fn group_by(
258 engine_state: &EngineState,
259 stack: &mut Stack,
260 call: &Call,
261 input: PipelineData,
262) -> Result<PipelineData, ShellError> {
263 let head = call.head;
264 let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
265 let to_table = call.has_flag(engine_state, stack, "to-table")?;
266 let prune = call.has_flag(engine_state, stack, "prune")?;
267 let config = &stack.get_config(engine_state);
268
269 let values: Vec<Value> = input.into_iter().collect();
270 if values.is_empty() {
271 let val = if to_table {
272 Value::list(Vec::new(), head)
273 } else {
274 Value::record(Record::new(), head)
275 };
276 return Ok(val.into_pipeline_data());
277 }
278
279 let grouped = match &groupers[..] {
280 [first, rest @ ..] => {
281 let mut grouped =
282 Grouped::new(first.as_ref(), prune, values, config, engine_state, stack)?;
283 for grouper in rest {
284 grouped.subgroup(grouper.as_ref(), prune, config, engine_state, stack)?;
285 }
286 grouped
287 }
288 [] => Grouped::empty(values, config),
289 };
290
291 let value = if to_table {
292 let column_names = groupers_to_column_names(&groupers)?;
293 grouped.into_table(&column_names, head)
294 } else {
295 grouped.into_record(head)
296 };
297
298 Ok(value.into_pipeline_data())
299}
300
301fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
302 if groupers.is_empty() {
303 return Ok(vec!["group".into(), "items".into()]);
304 }
305
306 let mut closure_idx: usize = 0;
307 let grouper_names = groupers.iter().map(|grouper| {
308 grouper.as_ref().map(|item| match item {
309 Grouper::CellPath { val } => val.to_column_name(),
310 Grouper::Closure { .. } => {
311 closure_idx += 1;
312 format!("closure_{}", closure_idx - 1)
313 }
314 })
315 });
316
317 let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
318
319 for name in grouper_names {
320 if name.item == "items" {
321 return Err(ShellError::Generic(
322 GenericError::new(
323 "grouper arguments can't be named `items`",
324 "here",
325 name.span,
326 )
327 .with_help("instead of a cell-path, try using a closure: { get items }"),
328 ));
329 }
330
331 if let Some(conflicting_name) = name_set
332 .iter()
333 .find(|elem| elem.as_ref().item == name.item.as_str())
334 {
335 return Err(ShellError::Generic(
336 GenericError::new(
337 "grouper arguments result in colliding column names",
338 "duplicate column names",
339 conflicting_name.span.append(name.span),
340 )
341 .with_help("instead of a cell-path, try using a closure or renaming columns")
342 .with_inner([ShellError::ColumnDefinedTwice {
343 col_name: conflicting_name.item.clone(),
344 first_use: conflicting_name.span,
345 second_use: name.span,
346 }]),
347 ));
348 }
349
350 name_set.push(name);
351 }
352
353 let column_names: Vec<String> = name_set
354 .into_iter()
355 .map(|elem| elem.item)
356 .chain(["items".into()])
357 .collect();
358 Ok(column_names)
359}
360
361fn group_cell_path(
362 column_name: &CellPath,
363 prune: bool,
364 values: Vec<Value>,
365 config: &nu_protocol::Config,
366) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
367 let mut groups = IndexMap::<_, Vec<_>>::new();
368
369 for mut value in values.into_iter() {
370 let key = value.follow_cell_path(&column_name.members)?;
371
372 if key.is_nothing() {
373 continue; }
375
376 let key = key.to_expanded_string(", ", config);
377
378 if prune {
379 let _ = value.remove_data_at_cell_path(&column_name.members);
381
382 let parent = column_name.members.split_last().map(|(_, head)| head);
384
385 if let Some(parent) = parent
386 && let Ok(parent_value) = value.follow_cell_path(parent)
387 && parent_value.is_empty()
388 {
389 let _ = value.remove_data_at_cell_path(parent);
390 }
391 }
392
393 groups.entry(key).or_default().push(value);
394 }
395
396 Ok(groups)
397}
398
399fn group_closure(
400 values: Vec<Value>,
401 span: Span,
402 closure: Closure,
403 engine_state: &EngineState,
404 stack: &mut Stack,
405) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
406 let mut groups = IndexMap::<_, Vec<_>>::new();
407 let mut closure = ClosureEval::new(engine_state, stack, closure);
408 let config = &stack.get_config(engine_state);
409
410 for value in values {
411 let key = closure
412 .run_with_value(value.clone())?
413 .into_value(span)?
414 .to_expanded_string(", ", config);
415
416 groups.entry(key).or_default().push(value);
417 }
418
419 Ok(groups)
420}
421
422enum Grouper {
423 CellPath { val: CellPath },
424 Closure { val: Box<Closure> },
425}
426
427impl FromValue for Grouper {
428 fn from_value(v: Value) -> Result<Self, ShellError> {
429 match v {
430 Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
431 Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
432 _ => Err(ShellError::TypeMismatch {
433 err_message: "unsupported grouper type".to_string(),
434 span: v.span(),
435 }),
436 }
437 }
438}
439
440struct Grouped {
441 groups: Tree,
442}
443
444enum Tree {
445 Leaf(IndexMap<String, Vec<Value>>),
446 Branch(IndexMap<String, Grouped>),
447}
448
449impl Grouped {
450 fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
451 let mut groups = IndexMap::<_, Vec<_>>::new();
452
453 for value in values.into_iter() {
454 let key = value.to_expanded_string(", ", config);
455 groups.entry(key).or_default().push(value);
456 }
457
458 Self {
459 groups: Tree::Leaf(groups),
460 }
461 }
462
463 fn new(
464 grouper: Spanned<&Grouper>,
465 prune: bool,
466 values: Vec<Value>,
467 config: &nu_protocol::Config,
468 engine_state: &EngineState,
469 stack: &mut Stack,
470 ) -> Result<Self, ShellError> {
471 let groups = match grouper.item {
472 Grouper::CellPath { val } => group_cell_path(val, prune, values, config)?,
473 Grouper::Closure { val } => group_closure(
474 values,
475 grouper.span,
476 Closure::clone(val),
477 engine_state,
478 stack,
479 )?,
480 };
481 Ok(Self {
482 groups: Tree::Leaf(groups),
483 })
484 }
485
486 fn subgroup(
487 &mut self,
488 grouper: Spanned<&Grouper>,
489 prune: bool,
490 config: &nu_protocol::Config,
491 engine_state: &EngineState,
492 stack: &mut Stack,
493 ) -> Result<(), ShellError> {
494 let groups = match &mut self.groups {
495 Tree::Leaf(groups) => std::mem::take(groups)
496 .into_iter()
497 .map(|(key, values)| -> Result<_, ShellError> {
498 let leaf = Self::new(grouper, prune, values, config, engine_state, stack)?;
499 Ok((key, leaf))
500 })
501 .collect::<Result<IndexMap<_, _>, ShellError>>()?,
502 Tree::Branch(nested_groups) => {
503 let mut nested_groups = std::mem::take(nested_groups);
504 for v in nested_groups.values_mut() {
505 v.subgroup(grouper, prune, config, engine_state, stack)?;
506 }
507 nested_groups
508 }
509 };
510 self.groups = Tree::Branch(groups);
511 Ok(())
512 }
513
514 fn into_table(self, column_names: &[String], head: Span) -> Value {
515 self._into_table(head)
516 .into_iter()
517 .map(|row| {
518 row.into_iter()
519 .rev()
520 .zip(column_names)
521 .map(|(val, key)| (key.clone(), val))
522 .collect::<Record>()
523 .into_value(head)
524 })
525 .collect::<Vec<_>>()
526 .into_value(head)
527 }
528
529 fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
530 match self.groups {
531 Tree::Leaf(leaf) => leaf
532 .into_iter()
533 .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
534 .collect::<Vec<Vec<Value>>>(),
535 Tree::Branch(branch) => branch
536 .into_iter()
537 .flat_map(|(group, items)| {
538 let mut inner = items._into_table(head);
539 for row in &mut inner {
540 row.push(group.clone().into_value(head));
541 }
542 inner
543 })
544 .collect(),
545 }
546 }
547
548 fn into_record(self, head: Span) -> Value {
549 match self.groups {
550 Tree::Leaf(leaf) => Value::record(
551 leaf.into_iter()
552 .map(|(k, v)| (k, v.into_value(head)))
553 .collect(),
554 head,
555 ),
556 Tree::Branch(branch) => {
557 let values = branch
558 .into_iter()
559 .map(|(k, v)| (k, v.into_record(head)))
560 .collect();
561 Value::record(values, head)
562 }
563 }
564 }
565}
566
567#[cfg(test)]
568mod test {
569 use super::*;
570
571 #[test]
572 fn test_examples() -> nu_test_support::Result {
573 nu_test_support::test().examples(GroupBy)
574 }
575}