1use indexmap::IndexMap;
2use nu_engine::{ClosureEval, command_prelude::*};
3use nu_protocol::{FromValue, IntoValue, engine::Closure};
4
5#[derive(Clone)]
6pub struct GroupBy;
7
8impl Command for GroupBy {
9 fn name(&self) -> &str {
10 "group-by"
11 }
12
13 fn signature(&self) -> Signature {
14 Signature::build("group-by")
15 .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
16 .switch(
17 "to-table",
18 "Return a table with \"groups\" and \"items\" columns",
19 None,
20 )
21 .rest(
22 "grouper",
23 SyntaxShape::OneOf(vec![
24 SyntaxShape::CellPath,
25 SyntaxShape::Closure(None),
26 SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
27 ]),
28 "The path to the column to group on.",
29 )
30 .category(Category::Filters)
31 }
32
33 fn description(&self) -> &str {
34 "Splits a list or table into groups, and returns a record containing those groups."
35 }
36
37 fn extra_description(&self) -> &str {
38 r#"the group-by command makes some assumptions:
39 - if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
40 - datetime is formatted based on your configuration setting. use `format date` to change the format.
41 - filesize is formatted based on your configuration setting. use `format filesize` to change the format.
42 - some nushell values are not supported, such as closures."#
43 }
44
45 fn run(
46 &self,
47 engine_state: &EngineState,
48 stack: &mut Stack,
49 call: &Call,
50 input: PipelineData,
51 ) -> Result<PipelineData, ShellError> {
52 group_by(engine_state, stack, call, input)
53 }
54
55 fn examples(&self) -> Vec<Example<'_>> {
56 vec![
57 Example {
58 description: "Group items by the \"type\" column's values",
59 example: r#"ls | group-by type"#,
60 result: None,
61 },
62 Example {
63 description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column",
64 example: r#"open cool.json | group-by foo?"#,
65 result: None,
66 },
67 Example {
68 description: "Group using a block which is evaluated against each input value",
69 example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
70 result: Some(Value::test_record(record! {
71 "txt" => Value::test_list(vec![
72 Value::test_string("foo.txt"),
73 Value::test_string("baz.txt"),
74 ]),
75 "csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
76 })),
77 },
78 Example {
79 description: "You can also group by raw values by leaving out the argument",
80 example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
81 result: Some(Value::test_record(record! {
82 "1" => Value::test_list(vec![
83 Value::test_string("1"),
84 Value::test_string("1"),
85 Value::test_string("1"),
86 Value::test_string("1"),
87 ]),
88 "3" => Value::test_list(vec![
89 Value::test_string("3"),
90 Value::test_string("3"),
91 ]),
92 "2" => Value::test_list(vec![Value::test_string("2")]),
93 })),
94 },
95 Example {
96 description: "You can also output a table instead of a record",
97 example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
98 result: Some(Value::test_list(vec![
99 Value::test_record(record! {
100 "group" => Value::test_string("1"),
101 "items" => Value::test_list(vec![
102 Value::test_string("1"),
103 Value::test_string("1"),
104 Value::test_string("1"),
105 Value::test_string("1"),
106 ]),
107 }),
108 Value::test_record(record! {
109 "group" => Value::test_string("3"),
110 "items" => Value::test_list(vec![
111 Value::test_string("3"),
112 Value::test_string("3"),
113 ]),
114 }),
115 Value::test_record(record! {
116 "group" => Value::test_string("2"),
117 "items" => Value::test_list(vec![Value::test_string("2")]),
118 }),
119 ])),
120 },
121 Example {
122 description: "Group bools, whether they are strings or actual bools",
123 example: r#"[true "true" false "false"] | group-by"#,
124 result: Some(Value::test_record(record! {
125 "true" => Value::test_list(vec![
126 Value::test_bool(true),
127 Value::test_string("true"),
128 ]),
129 "false" => Value::test_list(vec![
130 Value::test_bool(false),
131 Value::test_string("false"),
132 ]),
133 })),
134 },
135 Example {
136 description: "Group items by multiple columns' values",
137 example: r#"[
138 [name, lang, year];
139 [andres, rb, "2019"],
140 [jt, rs, "2019"],
141 [storm, rs, "2021"]
142 ]
143 | group-by lang year"#,
144 result: Some(Value::test_record(record! {
145 "rb" => Value::test_record(record! {
146 "2019" => Value::test_list(
147 vec![Value::test_record(record! {
148 "name" => Value::test_string("andres"),
149 "lang" => Value::test_string("rb"),
150 "year" => Value::test_string("2019"),
151 })],
152 ),
153 }),
154 "rs" => Value::test_record(record! {
155 "2019" => Value::test_list(
156 vec![Value::test_record(record! {
157 "name" => Value::test_string("jt"),
158 "lang" => Value::test_string("rs"),
159 "year" => Value::test_string("2019"),
160 })],
161 ),
162 "2021" => Value::test_list(
163 vec![Value::test_record(record! {
164 "name" => Value::test_string("storm"),
165 "lang" => Value::test_string("rs"),
166 "year" => Value::test_string("2021"),
167 })],
168 ),
169 }),
170 })),
171 },
172 Example {
173 description: "Group items by multiple columns' values",
174 example: r#"[
175 [name, lang, year];
176 [andres, rb, "2019"],
177 [jt, rs, "2019"],
178 [storm, rs, "2021"]
179 ]
180 | group-by lang year --to-table"#,
181 result: Some(Value::test_list(vec![
182 Value::test_record(record! {
183 "lang" => Value::test_string("rb"),
184 "year" => Value::test_string("2019"),
185 "items" => Value::test_list(vec![
186 Value::test_record(record! {
187 "name" => Value::test_string("andres"),
188 "lang" => Value::test_string("rb"),
189 "year" => Value::test_string("2019"),
190 })
191 ]),
192 }),
193 Value::test_record(record! {
194 "lang" => Value::test_string("rs"),
195 "year" => Value::test_string("2019"),
196 "items" => Value::test_list(vec![
197 Value::test_record(record! {
198 "name" => Value::test_string("jt"),
199 "lang" => Value::test_string("rs"),
200 "year" => Value::test_string("2019"),
201 })
202 ]),
203 }),
204 Value::test_record(record! {
205 "lang" => Value::test_string("rs"),
206 "year" => Value::test_string("2021"),
207 "items" => Value::test_list(vec![
208 Value::test_record(record! {
209 "name" => Value::test_string("storm"),
210 "lang" => Value::test_string("rs"),
211 "year" => Value::test_string("2021"),
212 })
213 ]),
214 }),
215 ])),
216 },
217 ]
218 }
219}
220
221pub fn group_by(
222 engine_state: &EngineState,
223 stack: &mut Stack,
224 call: &Call,
225 input: PipelineData,
226) -> Result<PipelineData, ShellError> {
227 let head = call.head;
228 let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
229 let to_table = call.has_flag(engine_state, stack, "to-table")?;
230 let config = engine_state.get_config();
231
232 let values: Vec<Value> = input.into_iter().collect();
233 if values.is_empty() {
234 let val = if to_table {
235 Value::list(Vec::new(), head)
236 } else {
237 Value::record(Record::new(), head)
238 };
239 return Ok(val.into_pipeline_data());
240 }
241
242 let grouped = match &groupers[..] {
243 [first, rest @ ..] => {
244 let mut grouped = Grouped::new(first.as_ref(), values, config, engine_state, stack)?;
245 for grouper in rest {
246 grouped.subgroup(grouper.as_ref(), config, engine_state, stack)?;
247 }
248 grouped
249 }
250 [] => Grouped::empty(values, config),
251 };
252
253 let value = if to_table {
254 let column_names = groupers_to_column_names(&groupers)?;
255 grouped.into_table(&column_names, head)
256 } else {
257 grouped.into_record(head)
258 };
259
260 Ok(value.into_pipeline_data())
261}
262
263fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
264 if groupers.is_empty() {
265 return Ok(vec!["group".into(), "items".into()]);
266 }
267
268 let mut closure_idx: usize = 0;
269 let grouper_names = groupers.iter().map(|grouper| {
270 grouper.as_ref().map(|item| match item {
271 Grouper::CellPath { val } => val.to_column_name(),
272 Grouper::Closure { .. } => {
273 closure_idx += 1;
274 format!("closure_{}", closure_idx - 1)
275 }
276 })
277 });
278
279 let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
280
281 for name in grouper_names {
282 if name.item == "items" {
283 return Err(ShellError::GenericError {
284 error: "grouper arguments can't be named `items`".into(),
285 msg: "here".into(),
286 span: Some(name.span),
287 help: Some("instead of a cell-path, try using a closure: { get items }".into()),
288 inner: vec![],
289 });
290 }
291
292 if let Some(conflicting_name) = name_set
293 .iter()
294 .find(|elem| elem.as_ref().item == name.item.as_str())
295 {
296 return Err(ShellError::GenericError {
297 error: "grouper arguments result in colliding column names".into(),
298 msg: "duplicate column names".into(),
299 span: Some(conflicting_name.span.append(name.span)),
300 help: Some(
301 "instead of a cell-path, try using a closure or renaming columns".into(),
302 ),
303 inner: vec![ShellError::ColumnDefinedTwice {
304 col_name: conflicting_name.item.clone(),
305 first_use: conflicting_name.span,
306 second_use: name.span,
307 }],
308 });
309 }
310
311 name_set.push(name);
312 }
313
314 let column_names: Vec<String> = name_set
315 .into_iter()
316 .map(|elem| elem.item)
317 .chain(["items".into()])
318 .collect();
319 Ok(column_names)
320}
321
322fn group_cell_path(
323 column_name: &CellPath,
324 values: Vec<Value>,
325 config: &nu_protocol::Config,
326) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
327 let mut groups = IndexMap::<_, Vec<_>>::new();
328
329 for value in values.into_iter() {
330 let key = value.follow_cell_path(&column_name.members)?;
331
332 if key.is_nothing() {
333 continue; }
335
336 let key = key.to_abbreviated_string(config);
337 groups.entry(key).or_default().push(value);
338 }
339
340 Ok(groups)
341}
342
343fn group_closure(
344 values: Vec<Value>,
345 span: Span,
346 closure: Closure,
347 engine_state: &EngineState,
348 stack: &mut Stack,
349) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
350 let mut groups = IndexMap::<_, Vec<_>>::new();
351 let mut closure = ClosureEval::new(engine_state, stack, closure);
352 let config = engine_state.get_config();
353
354 for value in values {
355 let key = closure
356 .run_with_value(value.clone())?
357 .into_value(span)?
358 .to_abbreviated_string(config);
359
360 groups.entry(key).or_default().push(value);
361 }
362
363 Ok(groups)
364}
365
366enum Grouper {
367 CellPath { val: CellPath },
368 Closure { val: Box<Closure> },
369}
370
371impl FromValue for Grouper {
372 fn from_value(v: Value) -> Result<Self, ShellError> {
373 match v {
374 Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
375 Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
376 _ => Err(ShellError::TypeMismatch {
377 err_message: "unsupported grouper type".to_string(),
378 span: v.span(),
379 }),
380 }
381 }
382}
383
384struct Grouped {
385 groups: Tree,
386}
387
388enum Tree {
389 Leaf(IndexMap<String, Vec<Value>>),
390 Branch(IndexMap<String, Grouped>),
391}
392
393impl Grouped {
394 fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
395 let mut groups = IndexMap::<_, Vec<_>>::new();
396
397 for value in values.into_iter() {
398 let key = value.to_abbreviated_string(config);
399 groups.entry(key).or_default().push(value);
400 }
401
402 Self {
403 groups: Tree::Leaf(groups),
404 }
405 }
406
407 fn new(
408 grouper: Spanned<&Grouper>,
409 values: Vec<Value>,
410 config: &nu_protocol::Config,
411 engine_state: &EngineState,
412 stack: &mut Stack,
413 ) -> Result<Self, ShellError> {
414 let groups = match grouper.item {
415 Grouper::CellPath { val } => group_cell_path(val, values, config)?,
416 Grouper::Closure { val } => group_closure(
417 values,
418 grouper.span,
419 Closure::clone(val),
420 engine_state,
421 stack,
422 )?,
423 };
424 Ok(Self {
425 groups: Tree::Leaf(groups),
426 })
427 }
428
429 fn subgroup(
430 &mut self,
431 grouper: Spanned<&Grouper>,
432 config: &nu_protocol::Config,
433 engine_state: &EngineState,
434 stack: &mut Stack,
435 ) -> Result<(), ShellError> {
436 let groups = match &mut self.groups {
437 Tree::Leaf(groups) => std::mem::take(groups)
438 .into_iter()
439 .map(|(key, values)| -> Result<_, ShellError> {
440 let leaf = Self::new(grouper, values, config, engine_state, stack)?;
441 Ok((key, leaf))
442 })
443 .collect::<Result<IndexMap<_, _>, ShellError>>()?,
444 Tree::Branch(nested_groups) => {
445 let mut nested_groups = std::mem::take(nested_groups);
446 for v in nested_groups.values_mut() {
447 v.subgroup(grouper, config, engine_state, stack)?;
448 }
449 nested_groups
450 }
451 };
452 self.groups = Tree::Branch(groups);
453 Ok(())
454 }
455
456 fn into_table(self, column_names: &[String], head: Span) -> Value {
457 self._into_table(head)
458 .into_iter()
459 .map(|row| {
460 row.into_iter()
461 .rev()
462 .zip(column_names)
463 .map(|(val, key)| (key.clone(), val))
464 .collect::<Record>()
465 .into_value(head)
466 })
467 .collect::<Vec<_>>()
468 .into_value(head)
469 }
470
471 fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
472 match self.groups {
473 Tree::Leaf(leaf) => leaf
474 .into_iter()
475 .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
476 .collect::<Vec<Vec<Value>>>(),
477 Tree::Branch(branch) => branch
478 .into_iter()
479 .flat_map(|(group, items)| {
480 let mut inner = items._into_table(head);
481 for row in &mut inner {
482 row.push(group.clone().into_value(head));
483 }
484 inner
485 })
486 .collect(),
487 }
488 }
489
490 fn into_record(self, head: Span) -> Value {
491 match self.groups {
492 Tree::Leaf(leaf) => Value::record(
493 leaf.into_iter()
494 .map(|(k, v)| (k, v.into_value(head)))
495 .collect(),
496 head,
497 ),
498 Tree::Branch(branch) => {
499 let values = branch
500 .into_iter()
501 .map(|(k, v)| (k, v.into_record(head)))
502 .collect();
503 Value::record(values, head)
504 }
505 }
506 }
507}
508
509#[cfg(test)]
510mod test {
511 use super::*;
512
513 #[test]
514 fn test_examples() {
515 use crate::test_examples;
516
517 test_examples(GroupBy {})
518 }
519}