nu_command/filters/
uniq.rs

1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::PipelineMetadata;
4use nu_utils::IgnoreCaseExt;
5use std::collections::{HashMap, hash_map::IntoIter};
6
7#[derive(Clone)]
8pub struct Uniq;
9
10impl Command for Uniq {
11    fn name(&self) -> &str {
12        "uniq"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build("uniq")
17            .input_output_types(vec![(
18                Type::List(Box::new(Type::Any)),
19                Type::List(Box::new(Type::Any)),
20            )])
21            .switch(
22                "count",
23                "Return a table containing the distinct input values together with their counts",
24                Some('c'),
25            )
26            .switch(
27                "repeated",
28                "Return the input values that occur more than once",
29                Some('d'),
30            )
31            .switch(
32                "ignore-case",
33                "Compare input values case-insensitively",
34                Some('i'),
35            )
36            .switch(
37                "unique",
38                "Return the input values that occur once only",
39                Some('u'),
40            )
41            .category(Category::Filters)
42    }
43
44    fn description(&self) -> &str {
45        "Return the distinct values in the input."
46    }
47
48    fn search_terms(&self) -> Vec<&str> {
49        vec!["distinct", "deduplicate", "count"]
50    }
51
52    fn run(
53        &self,
54        engine_state: &EngineState,
55        stack: &mut Stack,
56        call: &Call,
57        input: PipelineData,
58    ) -> Result<PipelineData, ShellError> {
59        let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter {
60            item_mapper(ms.item, ms.flag_ignore_case, ms.index)
61        });
62
63        let metadata = input.metadata();
64        uniq(
65            engine_state,
66            stack,
67            call,
68            input.into_iter().collect(),
69            mapper,
70            metadata,
71        )
72    }
73
74    fn examples(&self) -> Vec<Example<'_>> {
75        vec![
76            Example {
77                description: "Return the distinct values of a list/table (remove duplicates so that each value occurs once only)",
78                example: "[2 3 3 4] | uniq",
79                result: Some(Value::list(
80                    vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)],
81                    Span::test_data(),
82                )),
83            },
84            Example {
85                description: "Return the input values that occur more than once",
86                example: "[1 2 2] | uniq -d",
87                result: Some(Value::list(vec![Value::test_int(2)], Span::test_data())),
88            },
89            Example {
90                description: "Return the input values that occur once only",
91                example: "[1 2 2] | uniq --unique",
92                result: Some(Value::list(vec![Value::test_int(1)], Span::test_data())),
93            },
94            Example {
95                description: "Ignore differences in case when comparing input values",
96                example: "['hello' 'goodbye' 'Hello'] | uniq --ignore-case",
97                result: Some(Value::test_list(vec![
98                    Value::test_string("hello"),
99                    Value::test_string("goodbye"),
100                ])),
101            },
102            Example {
103                description: "Return a table containing the distinct input values together with their counts",
104                example: "[1 2 2] | uniq --count",
105                result: Some(Value::test_list(vec![
106                    Value::test_record(record! {
107                        "value" => Value::test_int(1),
108                        "count" => Value::test_int(1),
109                    }),
110                    Value::test_record(record! {
111                        "value" => Value::test_int(2),
112                        "count" => Value::test_int(2),
113                    }),
114                ])),
115            },
116        ]
117    }
118}
119
120pub struct ItemMapperState {
121    pub item: Value,
122    pub flag_ignore_case: bool,
123    pub index: usize,
124}
125
126fn item_mapper(item: Value, flag_ignore_case: bool, index: usize) -> ValueCounter {
127    ValueCounter::new(item, flag_ignore_case, index)
128}
129
130pub struct ValueCounter {
131    val: Value,
132    val_to_compare: Value,
133    count: i64,
134    index: usize,
135}
136
137impl PartialEq<Self> for ValueCounter {
138    fn eq(&self, other: &Self) -> bool {
139        self.val == other.val
140    }
141}
142
143impl ValueCounter {
144    fn new(val: Value, flag_ignore_case: bool, index: usize) -> Self {
145        Self::new_vals_to_compare(val.clone(), flag_ignore_case, val, index)
146    }
147    pub fn new_vals_to_compare(
148        val: Value,
149        flag_ignore_case: bool,
150        vals_to_compare: Value,
151        index: usize,
152    ) -> Self {
153        ValueCounter {
154            val,
155            val_to_compare: if flag_ignore_case {
156                clone_to_folded_case(&vals_to_compare.with_span(Span::unknown()))
157            } else {
158                vals_to_compare.with_span(Span::unknown())
159            },
160            count: 1,
161            index,
162        }
163    }
164}
165
166fn clone_to_folded_case(value: &Value) -> Value {
167    let span = value.span();
168    match value {
169        Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span),
170        Value::List { vals: vec, .. } => {
171            Value::list(vec.iter().map(clone_to_folded_case).collect(), span)
172        }
173        Value::Record { val: record, .. } => Value::record(
174            record
175                .iter()
176                .map(|(k, v)| (k.to_owned(), clone_to_folded_case(v)))
177                .collect(),
178            span,
179        ),
180        other => other.clone(),
181    }
182}
183
184fn sort_attributes(val: Value) -> Value {
185    let span = val.span();
186    match val {
187        Value::Record { val, .. } => {
188            // TODO: sort inplace
189            let sorted = val
190                .into_owned()
191                .into_iter()
192                .sorted_by(|a, b| a.0.cmp(&b.0))
193                .collect_vec();
194
195            let record = sorted
196                .into_iter()
197                .map(|(k, v)| (k, sort_attributes(v)))
198                .collect();
199
200            Value::record(record, span)
201        }
202        Value::List { vals, .. } => {
203            Value::list(vals.into_iter().map(sort_attributes).collect_vec(), span)
204        }
205        other => other,
206    }
207}
208
209fn generate_key(engine_state: &EngineState, item: &ValueCounter) -> Result<String, ShellError> {
210    let value = sort_attributes(item.val_to_compare.clone()); //otherwise, keys could be different for Records
211    nuon::to_nuon(
212        engine_state,
213        &value,
214        nuon::ToStyle::Default,
215        Some(Span::unknown()),
216        false,
217    )
218}
219
220fn generate_results_with_count(head: Span, uniq_values: Vec<ValueCounter>) -> Vec<Value> {
221    uniq_values
222        .into_iter()
223        .map(|item| {
224            Value::record(
225                record! {
226                    "value" => item.val,
227                    "count" => Value::int(item.count, head),
228                },
229                head,
230            )
231        })
232        .collect()
233}
234
235pub fn uniq(
236    engine_state: &EngineState,
237    stack: &mut Stack,
238    call: &Call,
239    input: Vec<Value>,
240    item_mapper: Box<dyn Fn(ItemMapperState) -> ValueCounter>,
241    metadata: Option<PipelineMetadata>,
242) -> Result<PipelineData, ShellError> {
243    let head = call.head;
244    let flag_show_count = call.has_flag(engine_state, stack, "count")?;
245    let flag_show_repeated = call.has_flag(engine_state, stack, "repeated")?;
246    let flag_ignore_case = call.has_flag(engine_state, stack, "ignore-case")?;
247    let flag_only_uniques = call.has_flag(engine_state, stack, "unique")?;
248
249    let signals = engine_state.signals().clone();
250    let uniq_values = input
251        .into_iter()
252        .enumerate()
253        .map_while(|(index, item)| {
254            if signals.interrupted() {
255                return None;
256            }
257            Some(item_mapper(ItemMapperState {
258                item,
259                flag_ignore_case,
260                index,
261            }))
262        })
263        .try_fold(
264            HashMap::<String, ValueCounter>::new(),
265            |mut counter, item| {
266                let key = generate_key(engine_state, &item);
267
268                match key {
269                    Ok(key) => {
270                        match counter.get_mut(&key) {
271                            Some(x) => x.count += 1,
272                            None => {
273                                counter.insert(key, item);
274                            }
275                        };
276                        Ok(counter)
277                    }
278                    Err(err) => Err(err),
279                }
280            },
281        );
282
283    let mut uniq_values: HashMap<String, ValueCounter> = uniq_values?;
284
285    if flag_show_repeated {
286        uniq_values.retain(|_v, value_count_pair| value_count_pair.count > 1);
287    }
288
289    if flag_only_uniques {
290        uniq_values.retain(|_v, value_count_pair| value_count_pair.count == 1);
291    }
292
293    let uniq_values = sort(uniq_values.into_iter());
294
295    let result = if flag_show_count {
296        generate_results_with_count(head, uniq_values)
297    } else {
298        uniq_values.into_iter().map(|v| v.val).collect()
299    };
300
301    Ok(Value::list(result, head).into_pipeline_data_with_metadata(metadata))
302}
303
304fn sort(iter: IntoIter<String, ValueCounter>) -> Vec<ValueCounter> {
305    iter.map(|item| item.1)
306        .sorted_by(|a, b| a.index.cmp(&b.index))
307        .collect()
308}
309
310#[cfg(test)]
311mod test {
312    use super::*;
313
314    #[test]
315    fn test_examples() {
316        use crate::test_examples;
317
318        test_examples(Uniq {})
319    }
320}