use indexmap::IndexMap;
use nu_engine::{ClosureEval, command_prelude::*};
use nu_protocol::{FromValue, IntoValue, engine::Closure};
#[derive(Clone)]
pub struct GroupBy;
impl Command for GroupBy {
fn name(&self) -> &str {
"group-by"
}
fn signature(&self) -> Signature {
Signature::build("group-by")
.input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)])
.switch(
"to-table",
"Return a table with \"groups\" and \"items\" columns",
None,
)
.rest(
"grouper",
SyntaxShape::OneOf(vec![
SyntaxShape::CellPath,
SyntaxShape::Closure(None),
SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
]),
"The path to the column to group on.",
)
.category(Category::Filters)
}
fn description(&self) -> &str {
"Splits a list or table into groups, and returns a record containing those groups."
}
fn extra_description(&self) -> &str {
r#"the group-by command makes some assumptions:
- if the input data is not a string, the grouper will convert the key to string but the values will remain in their original format. e.g. with bools, "true" and true would be in the same group (see example).
- datetime is formatted based on your configuration setting. use `format date` to change the format.
- filesize is formatted based on your configuration setting. use `format filesize` to change the format.
- some nushell values are not supported, such as closures."#
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
group_by(engine_state, stack, call, input)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group items by the \"type\" column's values",
example: r#"ls | group-by type"#,
result: None,
},
Example {
description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column",
example: r#"open cool.json | group-by foo?"#,
result: None,
},
Example {
description: "Group using a block which is evaluated against each input value",
example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }",
result: Some(Value::test_record(record! {
"txt" => Value::test_list(vec![
Value::test_string("foo.txt"),
Value::test_string("baz.txt"),
]),
"csv" => Value::test_list(vec![Value::test_string("bar.csv")]),
})),
},
Example {
description: "You can also group by raw values by leaving out the argument",
example: "['1' '3' '1' '3' '2' '1' '1'] | group-by",
result: Some(Value::test_record(record! {
"1" => Value::test_list(vec![
Value::test_string("1"),
Value::test_string("1"),
Value::test_string("1"),
Value::test_string("1"),
]),
"3" => Value::test_list(vec![
Value::test_string("3"),
Value::test_string("3"),
]),
"2" => Value::test_list(vec![Value::test_string("2")]),
})),
},
Example {
description: "You can also output a table instead of a record",
example: "['1' '3' '1' '3' '2' '1' '1'] | group-by --to-table",
result: Some(Value::test_list(vec![
Value::test_record(record! {
"group" => Value::test_string("1"),
"items" => Value::test_list(vec![
Value::test_string("1"),
Value::test_string("1"),
Value::test_string("1"),
Value::test_string("1"),
]),
}),
Value::test_record(record! {
"group" => Value::test_string("3"),
"items" => Value::test_list(vec![
Value::test_string("3"),
Value::test_string("3"),
]),
}),
Value::test_record(record! {
"group" => Value::test_string("2"),
"items" => Value::test_list(vec![Value::test_string("2")]),
}),
])),
},
Example {
description: "Group bools, whether they are strings or actual bools",
example: r#"[true "true" false "false"] | group-by"#,
result: Some(Value::test_record(record! {
"true" => Value::test_list(vec![
Value::test_bool(true),
Value::test_string("true"),
]),
"false" => Value::test_list(vec![
Value::test_bool(false),
Value::test_string("false"),
]),
})),
},
Example {
description: "Group items by multiple columns' values",
example: r#"[
[name, lang, year];
[andres, rb, "2019"],
[jt, rs, "2019"],
[storm, rs, "2021"]
]
| group-by lang year"#,
result: Some(Value::test_record(record! {
"rb" => Value::test_record(record! {
"2019" => Value::test_list(
vec![Value::test_record(record! {
"name" => Value::test_string("andres"),
"lang" => Value::test_string("rb"),
"year" => Value::test_string("2019"),
})],
),
}),
"rs" => Value::test_record(record! {
"2019" => Value::test_list(
vec![Value::test_record(record! {
"name" => Value::test_string("jt"),
"lang" => Value::test_string("rs"),
"year" => Value::test_string("2019"),
})],
),
"2021" => Value::test_list(
vec![Value::test_record(record! {
"name" => Value::test_string("storm"),
"lang" => Value::test_string("rs"),
"year" => Value::test_string("2021"),
})],
),
}),
})),
},
Example {
description: "Group items by multiple columns' values",
example: r#"[
[name, lang, year];
[andres, rb, "2019"],
[jt, rs, "2019"],
[storm, rs, "2021"]
]
| group-by lang year --to-table"#,
result: Some(Value::test_list(vec![
Value::test_record(record! {
"lang" => Value::test_string("rb"),
"year" => Value::test_string("2019"),
"items" => Value::test_list(vec![
Value::test_record(record! {
"name" => Value::test_string("andres"),
"lang" => Value::test_string("rb"),
"year" => Value::test_string("2019"),
})
]),
}),
Value::test_record(record! {
"lang" => Value::test_string("rs"),
"year" => Value::test_string("2019"),
"items" => Value::test_list(vec![
Value::test_record(record! {
"name" => Value::test_string("jt"),
"lang" => Value::test_string("rs"),
"year" => Value::test_string("2019"),
})
]),
}),
Value::test_record(record! {
"lang" => Value::test_string("rs"),
"year" => Value::test_string("2021"),
"items" => Value::test_list(vec![
Value::test_record(record! {
"name" => Value::test_string("storm"),
"lang" => Value::test_string("rs"),
"year" => Value::test_string("2021"),
})
]),
}),
])),
},
]
}
}
pub fn group_by(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let head = call.head;
let groupers: Vec<Spanned<Grouper>> = call.rest(engine_state, stack, 0)?;
let to_table = call.has_flag(engine_state, stack, "to-table")?;
let config = engine_state.get_config();
let values: Vec<Value> = input.into_iter().collect();
if values.is_empty() {
let val = if to_table {
Value::list(Vec::new(), head)
} else {
Value::record(Record::new(), head)
};
return Ok(val.into_pipeline_data());
}
let grouped = match &groupers[..] {
[first, rest @ ..] => {
let mut grouped = Grouped::new(first.as_ref(), values, config, engine_state, stack)?;
for grouper in rest {
grouped.subgroup(grouper.as_ref(), config, engine_state, stack)?;
}
grouped
}
[] => Grouped::empty(values, config),
};
let value = if to_table {
let column_names = groupers_to_column_names(&groupers)?;
grouped.into_table(&column_names, head)
} else {
grouped.into_record(head)
};
Ok(value.into_pipeline_data())
}
fn groupers_to_column_names(groupers: &[Spanned<Grouper>]) -> Result<Vec<String>, ShellError> {
if groupers.is_empty() {
return Ok(vec!["group".into(), "items".into()]);
}
let mut closure_idx: usize = 0;
let grouper_names = groupers.iter().map(|grouper| {
grouper.as_ref().map(|item| match item {
Grouper::CellPath { val } => val.to_column_name(),
Grouper::Closure { .. } => {
closure_idx += 1;
format!("closure_{}", closure_idx - 1)
}
})
});
let mut name_set: Vec<Spanned<String>> = Vec::with_capacity(grouper_names.len());
for name in grouper_names {
if name.item == "items" {
return Err(ShellError::GenericError {
error: "grouper arguments can't be named `items`".into(),
msg: "here".into(),
span: Some(name.span),
help: Some("instead of a cell-path, try using a closure: { get items }".into()),
inner: vec![],
});
}
if let Some(conflicting_name) = name_set
.iter()
.find(|elem| elem.as_ref().item == name.item.as_str())
{
return Err(ShellError::GenericError {
error: "grouper arguments result in colliding column names".into(),
msg: "duplicate column names".into(),
span: Some(conflicting_name.span.append(name.span)),
help: Some(
"instead of a cell-path, try using a closure or renaming columns".into(),
),
inner: vec![ShellError::ColumnDefinedTwice {
col_name: conflicting_name.item.clone(),
first_use: conflicting_name.span,
second_use: name.span,
}],
});
}
name_set.push(name);
}
let column_names: Vec<String> = name_set
.into_iter()
.map(|elem| elem.item)
.chain(["items".into()])
.collect();
Ok(column_names)
}
fn group_cell_path(
column_name: &CellPath,
values: Vec<Value>,
config: &nu_protocol::Config,
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups = IndexMap::<_, Vec<_>>::new();
for value in values.into_iter() {
let key = value.follow_cell_path(&column_name.members)?;
if key.is_nothing() {
continue; }
let key = key.to_abbreviated_string(config);
groups.entry(key).or_default().push(value);
}
Ok(groups)
}
fn group_closure(
values: Vec<Value>,
span: Span,
closure: Closure,
engine_state: &EngineState,
stack: &mut Stack,
) -> Result<IndexMap<String, Vec<Value>>, ShellError> {
let mut groups = IndexMap::<_, Vec<_>>::new();
let mut closure = ClosureEval::new(engine_state, stack, closure);
let config = engine_state.get_config();
for value in values {
let key = closure
.run_with_value(value.clone())?
.into_value(span)?
.to_abbreviated_string(config);
groups.entry(key).or_default().push(value);
}
Ok(groups)
}
enum Grouper {
CellPath { val: CellPath },
Closure { val: Box<Closure> },
}
impl FromValue for Grouper {
fn from_value(v: Value) -> Result<Self, ShellError> {
match v {
Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }),
Value::Closure { val, .. } => Ok(Grouper::Closure { val }),
_ => Err(ShellError::TypeMismatch {
err_message: "unsupported grouper type".to_string(),
span: v.span(),
}),
}
}
}
struct Grouped {
groups: Tree,
}
enum Tree {
Leaf(IndexMap<String, Vec<Value>>),
Branch(IndexMap<String, Grouped>),
}
impl Grouped {
fn empty(values: Vec<Value>, config: &nu_protocol::Config) -> Self {
let mut groups = IndexMap::<_, Vec<_>>::new();
for value in values.into_iter() {
let key = value.to_abbreviated_string(config);
groups.entry(key).or_default().push(value);
}
Self {
groups: Tree::Leaf(groups),
}
}
fn new(
grouper: Spanned<&Grouper>,
values: Vec<Value>,
config: &nu_protocol::Config,
engine_state: &EngineState,
stack: &mut Stack,
) -> Result<Self, ShellError> {
let groups = match grouper.item {
Grouper::CellPath { val } => group_cell_path(val, values, config)?,
Grouper::Closure { val } => group_closure(
values,
grouper.span,
Closure::clone(val),
engine_state,
stack,
)?,
};
Ok(Self {
groups: Tree::Leaf(groups),
})
}
fn subgroup(
&mut self,
grouper: Spanned<&Grouper>,
config: &nu_protocol::Config,
engine_state: &EngineState,
stack: &mut Stack,
) -> Result<(), ShellError> {
let groups = match &mut self.groups {
Tree::Leaf(groups) => std::mem::take(groups)
.into_iter()
.map(|(key, values)| -> Result<_, ShellError> {
let leaf = Self::new(grouper, values, config, engine_state, stack)?;
Ok((key, leaf))
})
.collect::<Result<IndexMap<_, _>, ShellError>>()?,
Tree::Branch(nested_groups) => {
let mut nested_groups = std::mem::take(nested_groups);
for v in nested_groups.values_mut() {
v.subgroup(grouper, config, engine_state, stack)?;
}
nested_groups
}
};
self.groups = Tree::Branch(groups);
Ok(())
}
fn into_table(self, column_names: &[String], head: Span) -> Value {
self._into_table(head)
.into_iter()
.map(|row| {
row.into_iter()
.rev()
.zip(column_names)
.map(|(val, key)| (key.clone(), val))
.collect::<Record>()
.into_value(head)
})
.collect::<Vec<_>>()
.into_value(head)
}
fn _into_table(self, head: Span) -> Vec<Vec<Value>> {
match self.groups {
Tree::Leaf(leaf) => leaf
.into_iter()
.map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))])
.collect::<Vec<Vec<Value>>>(),
Tree::Branch(branch) => branch
.into_iter()
.flat_map(|(group, items)| {
let mut inner = items._into_table(head);
for row in &mut inner {
row.push(group.clone().into_value(head));
}
inner
})
.collect(),
}
}
fn into_record(self, head: Span) -> Value {
match self.groups {
Tree::Leaf(leaf) => Value::record(
leaf.into_iter()
.map(|(k, v)| (k, v.into_value(head)))
.collect(),
head,
),
Tree::Branch(branch) => {
let values = branch
.into_iter()
.map(|(k, v)| (k, v.into_record(head)))
.collect();
Value::record(values, head)
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(GroupBy {})
}
}