1use indexmap::IndexMap;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, IntoValue, engine::Closure};
4
5#[derive(Clone)]
6pub struct GroupBy;
7
8impl Command for GroupBy {
9 fn name(&self) -> &str {
10 "group-by"
11 }
12
13 fn signature(&self) -> Signature {
14 Signature::build("group-by")
15 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
16 .switch(
17 "to-table",
18 "Return a table with \"groups\" and \"items\" columns",
19 None,
20 )
21 .rest(
22 "grouper",
23 SyntaxShape::OneOf(vec![
24 SyntaxShape::CellPath,
25 SyntaxShape::Closure(None),
26 SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
27 ]),
28 "The path to the column to group on.",
29 )
30 .category(Category::Filters)
31 }
32
33 fn description(&self) -> &str {
34 "Splits a list or table into groups, and returns a record containing those groups."
35 }
36
37 fn extra_description(&self) -> &str {
38 r#"the group-by command makes some assumptions:
39 - if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
40 - datetime is formatted based on your configuration setting. use `format date` to change the format.
41 - filesize is formatted based on your configuration setting. use `format filesize` to change the format.
42 - some nushell values are not supported, such as closures."#
43 }
44
45 fn run(
46 &self,
47 engine_state: &EngineState,
48 stack: &mut Stack,
49 call: &Call,
50 input: PipelineData,
51 ) -> Result<PipelineData, ShellError> {
52 group_by(engine_state, stack, call, input)
53 }
54
55 fn examples(&self) -> Vec<Example<'_>> {
56 vec![
57 Example {
58 description: "Group items by the \"type\" column's values",
59 example: r#"ls | group-by type"#,
60 result: None,
61 },
62 Example {
63 description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column",
64 example: r#"open cool.json | group-by foo?"#,
65 result: None,
66 },
67 Example {
68 description: "Group using a block which is evaluated against each input value",
69 example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
70 result: Some(Value::test_record(record! {
71 "txt" => Value::test_list(vec![
72 Value::test_string("foo.txt"),
73 Value::test_string("baz.txt"),
74 ]),
75 "csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
76 })),
77 },
78 Example {
79 description: "You can also group by raw values by leaving out the argument",
80 example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
81 result: Some(Value::test_record(record! {
82 "1" => Value::test_list(vec![
83 Value::test_string("1"),
84 Value::test_string("1"),
85 Value::test_string("1"),
86 Value::test_string("1"),
87 ]),
88 "3" => Value::test_list(vec![
89 Value::test_string("3"),
90 Value::test_string("3"),
91 ]),
92 "2" => Value::test_list(vec![Value::test_string("2")]),
93 })),
94 },
95 Example {
96 description: "You can also output a table instead of a record",
97 example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
98 result: Some(Value::test_list(vec![
99 Value::test_record(record! {
100 "group" => Value::test_string("1"),
101 "items" => Value::test_list(vec![
102 Value::test_string("1"),
103 Value::test_string("1"),
104 Value::test_string("1"),
105 Value::test_string("1"),
106 ]),
107 }),
108 Value::test_record(record! {
109 "group" => Value::test_string("3"),
110 "items" => Value::test_list(vec![
111 Value::test_string("3"),
112 Value::test_string("3"),
113 ]),
114 }),
115 Value::test_record(record! {
116 "group" => Value::test_string("2"),
117 "items" => Value::test_list(vec![Value::test_string("2")]),
118 }),
119 ])),
120 },
121 Example {
122 description: "Group bools, whether they are strings or actual bools",
123 example: r#"[true "true" false "false"] | group-by"#,
124 result: Some(Value::test_record(record! {
125 "true" => Value::test_list(vec![
126 Value::test_bool(true),
127 Value::test_string("true"),
128 ]),
129 "false" => Value::test_list(vec![
130 Value::test_bool(false),
131 Value::test_string("false"),
132 ]),
133 })),
134 },
135 Example {
136 description: "Group items by multiple columns' values",
137 example: r#"[
138 [name, lang, year];
139 [andres, rb, "2019"],
140 [jt, rs, "2019"],
141 [storm, rs, "2021"]
142 ]
143 | group-by lang year"#,
144 result: Some(Value::test_record(record! {
145 "rb" => Value::test_record(record! {
146 "2019" => Value::test_list(
147 vec![Value::test_record(record! {
148 "name" => Value::test_string("andres"),
149 "lang" => Value::test_string("rb"),
150 "year" => Value::test_string("2019"),
151 })],
152 ),
153 }),
154 "rs" => Value::test_record(record! {
155 "2019" => Value::test_list(
156 vec![Value::test_record(record! {
157 "name" => Value::test_string("jt"),
158 "lang" => Value::test_string("rs"),
159 "year" => Value::test_string("2019"),
160 })],
161 ),
162 "2021" => Value::test_list(
163 vec![Value::test_record(record! {
164 "name" => Value::test_string("storm"),
165 "lang" => Value::test_string("rs"),
166 "year" => Value::test_string("2021"),
167 })],
168 ),
169 }),
170 })),
171 },
172 Example {
173 description: "Group items by multiple columns' values",
174 example: r#"[
175 [name, lang, year];
176 [andres, rb, "2019"],
177 [jt, rs, "2019"],
178 [storm, rs, "2021"]
179 ]
180 | group-by lang year --to-table"#,
181 result: Some(Value::test_list(vec![
182 Value::test_record(record! {
183 "lang" => Value::test_string("rb"),
184 "year" => Value::test_string("2019"),
185 "items" => Value::test_list(vec![
186 Value::test_record(record! {
187 "name" => Value::test_string("andres"),
188 "lang" => Value::test_string("rb"),
189 "year" => Value::test_string("2019"),
190 })
191 ]),
192 }),
193 Value::test_record(record! {
194 "lang" => Value::test_string("rs"),
195 "year" => Value::test_string("2019"),
196 "items" => Value::test_list(vec![
197 Value::test_record(record! {
198 "name" => Value::test_string("jt"),
199 "lang" => Value::test_string("rs"),
200 "year" => Value::test_string("2019"),
201 })
202 ]),
203 }),
204 Value::test_record(record! {
205 "lang" => Value::test_string("rs"),
206 "year" => Value::test_string("2021"),
207 "items" => Value::test_list(vec![
208 Value::test_record(record! {
209 "name" => Value::test_string("storm"),
210 "lang" => Value::test_string("rs"),
211 "year" => Value::test_string("2021"),
212 })
213 ]),
214 }),
215 ])),
216 },
217 Example {
218 description: "Group items by column and delete the original",
219 example: r#"[
220 [name, lang, year];
221 [andres, rb, "2019"],
222 [jt, rs, "2019"],
223 [storm, rs, "2021"]
224 ]
225 | group-by lang | update cells { reject lang }"#,
226 #[cfg(test)] result: None,
228 #[cfg(not(test))]
229 result: Some(Value::test_record(record! {
230 "rb" => Value::test_list(vec![Value::test_record(record! {
231 "name" => Value::test_string("andres"),
232 "year" => Value::test_string("2019"),
233 })],
234 ),
235 "rs" => Value::test_list(
236 vec![
237 Value::test_record(record! {
238 "name" => Value::test_string("jt"),
239 "year" => Value::test_string("2019"),
240 }),
241 Value::test_record(record! {
242 "name" => Value::test_string("storm"),
243 "year" => Value::test_string("2021"),
244 })
245 ]),
246 })),
247 },
248 ]
249 }
250}
251
252pub fn group_by(
253 engine_state: &EngineState,
254 stack: &mut Stack,
255 call: &Call,
256 input: PipelineData,
257) -> Result<PipelineData, ShellError> {
258 let head = call.head;
259 let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
260 let to_table = call.has_flag(engine_state, stack, "to-table")?;
261 let config = engine_state.get_config();
262
263 let values: Vec<Value> = input.into_iter().collect();
264 if values.is_empty() {
265 let val = if to_table {
266 Value::list(Vec::new(), head)
267 } else {
268 Value::record(Record::new(), head)
269 };
270 return Ok(val.into_pipeline_data());
271 }
272
273 let grouped = match &groupers[..] {
274 [first, rest @ ..] => {
275 let mut grouped = Grouped::new(first.as_ref(), values, config, engine_state, stack)?;
276 for grouper in rest {
277 grouped.subgroup(grouper.as_ref(), config, engine_state, stack)?;
278 }
279 grouped
280 }
281 [] => Grouped::empty(values, config),
282 };
283
284 let value = if to_table {
285 let column_names = groupers_to_column_names(&groupers)?;
286 grouped.into_table(&column_names, head)
287 } else {
288 grouped.into_record(head)
289 };
290
291 Ok(value.into_pipeline_data())
292}
293
294fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
295 if groupers.is_empty() {
296 return Ok(vec!["group".into(), "items".into()]);
297 }
298
299 let mut closure_idx: usize = 0;
300 let grouper_names = groupers.iter().map(|grouper| {
301 grouper.as_ref().map(|item| match item {
302 Grouper::CellPath { val } => val.to_column_name(),
303 Grouper::Closure { .. } => {
304 closure_idx += 1;
305 format!("closure_{}", closure_idx - 1)
306 }
307 })
308 });
309
310 let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
311
312 for name in grouper_names {
313 if name.item == "items" {
314 return Err(ShellError::GenericError {
315 error: "grouper arguments can't be named `items`".into(),
316 msg: "here".into(),
317 span: Some(name.span),
318 help: Some("instead of a cell-path, try using a closure: { get items }".into()),
319 inner: vec![],
320 });
321 }
322
323 if let Some(conflicting_name) = name_set
324 .iter()
325 .find(|elem| elem.as_ref().item == name.item.as_str())
326 {
327 return Err(ShellError::GenericError {
328 error: "grouper arguments result in colliding column names".into(),
329 msg: "duplicate column names".into(),
330 span: Some(conflicting_name.span.append(name.span)),
331 help: Some(
332 "instead of a cell-path, try using a closure or renaming columns".into(),
333 ),
334 inner: vec![ShellError::ColumnDefinedTwice {
335 col_name: conflicting_name.item.clone(),
336 first_use: conflicting_name.span,
337 second_use: name.span,
338 }],
339 });
340 }
341
342 name_set.push(name);
343 }
344
345 let column_names: Vec<String> = name_set
346 .into_iter()
347 .map(|elem| elem.item)
348 .chain(["items".into()])
349 .collect();
350 Ok(column_names)
351}
352
353fn group_cell_path(
354 column_name: &CellPath,
355 values: Vec<Value>,
356 config: &nu_protocol::Config,
357) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
358 let mut groups = IndexMap::<_, Vec<_>>::new();
359
360 for value in values.into_iter() {
361 let key = value.follow_cell_path(&column_name.members)?;
362
363 if key.is_nothing() {
364 continue; }
366
367 let key = key.to_abbreviated_string(config);
368 groups.entry(key).or_default().push(value);
369 }
370
371 Ok(groups)
372}
373
374fn group_closure(
375 values: Vec<Value>,
376 span: Span,
377 closure: Closure,
378 engine_state: &EngineState,
379 stack: &mut Stack,
380) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
381 let mut groups = IndexMap::<_, Vec<_>>::new();
382 let mut closure = ClosureEval::new(engine_state, stack, closure);
383 let config = engine_state.get_config();
384
385 for value in values {
386 let key = closure
387 .run_with_value(value.clone())?
388 .into_value(span)?
389 .to_abbreviated_string(config);
390
391 groups.entry(key).or_default().push(value);
392 }
393
394 Ok(groups)
395}
396
397enum Grouper {
398 CellPath { val: CellPath },
399 Closure { val: Box<Closure> },
400}
401
402impl FromValue for Grouper {
403 fn from_value(v: Value) -> Result<Self, ShellError> {
404 match v {
405 Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
406 Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
407 _ => Err(ShellError::TypeMismatch {
408 err_message: "unsupported grouper type".to_string(),
409 span: v.span(),
410 }),
411 }
412 }
413}
414
415struct Grouped {
416 groups: Tree,
417}
418
419enum Tree {
420 Leaf(IndexMap<String, Vec<Value>>),
421 Branch(IndexMap<String, Grouped>),
422}
423
424impl Grouped {
425 fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
426 let mut groups = IndexMap::<_, Vec<_>>::new();
427
428 for value in values.into_iter() {
429 let key = value.to_abbreviated_string(config);
430 groups.entry(key).or_default().push(value);
431 }
432
433 Self {
434 groups: Tree::Leaf(groups),
435 }
436 }
437
438 fn new(
439 grouper: Spanned<&Grouper>,
440 values: Vec<Value>,
441 config: &nu_protocol::Config,
442 engine_state: &EngineState,
443 stack: &mut Stack,
444 ) -> Result<Self, ShellError> {
445 let groups = match grouper.item {
446 Grouper::CellPath { val } => group_cell_path(val, values, config)?,
447 Grouper::Closure { val } => group_closure(
448 values,
449 grouper.span,
450 Closure::clone(val),
451 engine_state,
452 stack,
453 )?,
454 };
455 Ok(Self {
456 groups: Tree::Leaf(groups),
457 })
458 }
459
460 fn subgroup(
461 &mut self,
462 grouper: Spanned<&Grouper>,
463 config: &nu_protocol::Config,
464 engine_state: &EngineState,
465 stack: &mut Stack,
466 ) -> Result<(), ShellError> {
467 let groups = match &mut self.groups {
468 Tree::Leaf(groups) => std::mem::take(groups)
469 .into_iter()
470 .map(|(key, values)| -> Result<_, ShellError> {
471 let leaf = Self::new(grouper, values, config, engine_state, stack)?;
472 Ok((key, leaf))
473 })
474 .collect::<Result<IndexMap<_, _>, ShellError>>()?,
475 Tree::Branch(nested_groups) => {
476 let mut nested_groups = std::mem::take(nested_groups);
477 for v in nested_groups.values_mut() {
478 v.subgroup(grouper, config, engine_state, stack)?;
479 }
480 nested_groups
481 }
482 };
483 self.groups = Tree::Branch(groups);
484 Ok(())
485 }
486
487 fn into_table(self, column_names: &[String], head: Span) -> Value {
488 self._into_table(head)
489 .into_iter()
490 .map(|row| {
491 row.into_iter()
492 .rev()
493 .zip(column_names)
494 .map(|(val, key)| (key.clone(), val))
495 .collect::<Record>()
496 .into_value(head)
497 })
498 .collect::<Vec<_>>()
499 .into_value(head)
500 }
501
502 fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
503 match self.groups {
504 Tree::Leaf(leaf) => leaf
505 .into_iter()
506 .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
507 .collect::<Vec<Vec<Value>>>(),
508 Tree::Branch(branch) => branch
509 .into_iter()
510 .flat_map(|(group, items)| {
511 let mut inner = items._into_table(head);
512 for row in &mut inner {
513 row.push(group.clone().into_value(head));
514 }
515 inner
516 })
517 .collect(),
518 }
519 }
520
521 fn into_record(self, head: Span) -> Value {
522 match self.groups {
523 Tree::Leaf(leaf) => Value::record(
524 leaf.into_iter()
525 .map(|(k, v)| (k, v.into_value(head)))
526 .collect(),
527 head,
528 ),
529 Tree::Branch(branch) => {
530 let values = branch
531 .into_iter()
532 .map(|(k, v)| (k, v.into_record(head)))
533 .collect();
534 Value::record(values, head)
535 }
536 }
537 }
538}
539
540#[cfg(test)]
541mod test {
542 use super::*;
543
544 #[test]
545 fn test_examples() {
546 use crate::test_examples;
547
548 test_examples(GroupBy {})
549 }
550}