Skip to main content

nu_command/filters/
uniq.rs

1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::PipelineMetadata;
4use nu_utils::IgnoreCaseExt;
5use std::collections::{HashMap, hash_map::IntoIter};
6
7#[derive(Clone)]
8pub struct Uniq;
9
10impl Command for Uniq {
11    fn name(&self) -> &str {
12        "uniq"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build("uniq")
17            .input_output_types(vec![(
18                Type::List(Box::new(Type::Any)),
19                Type::List(Box::new(Type::Any)),
20            )])
21            .switch(
22                "count",
23                "Return a table containing the distinct input values together with their counts.",
24                Some('c'),
25            )
26            .switch(
27                "repeated",
28                "Return the input values that occur more than once.",
29                Some('d'),
30            )
31            .switch(
32                "ignore-case",
33                "Compare input values case-insensitively.",
34                Some('i'),
35            )
36            .switch(
37                "unique",
38                "Return the input values that occur once only.",
39                Some('u'),
40            )
41            .category(Category::Filters)
42    }
43
44    fn description(&self) -> &str {
45        "Return the distinct values in the input."
46    }
47
48    fn search_terms(&self) -> Vec<&str> {
49        vec!["distinct", "deduplicate", "count"]
50    }
51
52    fn run(
53        &self,
54        engine_state: &EngineState,
55        stack: &mut Stack,
56        call: &Call,
57        mut input: PipelineData,
58    ) -> Result<PipelineData, ShellError> {
59        let head = call.head;
60        let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter {
61            item_mapper(ms.item, ms.flag_ignore_case, ms.index, head)
62        });
63
64        let metadata = input.take_metadata();
65        uniq(
66            engine_state,
67            stack,
68            call,
69            input.into_iter().collect(),
70            mapper,
71            metadata,
72        )
73    }
74
75    fn examples(&self) -> Vec<Example<'_>> {
76        vec![
77            Example {
78                description: "Return the distinct values of a list/table (remove duplicates so that each value occurs once only).",
79                example: "[2 3 3 4] | uniq",
80                result: Some(Value::list(
81                    vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)],
82                    Span::test_data(),
83                )),
84            },
85            Example {
86                description: "Return the input values that occur more than once.",
87                example: "[1 2 2] | uniq -d",
88                result: Some(Value::list(vec![Value::test_int(2)], Span::test_data())),
89            },
90            Example {
91                description: "Return the input values that occur once only.",
92                example: "[1 2 2] | uniq --unique",
93                result: Some(Value::list(vec![Value::test_int(1)], Span::test_data())),
94            },
95            Example {
96                description: "Ignore differences in case when comparing input values.",
97                example: "['hello' 'goodbye' 'Hello'] | uniq --ignore-case",
98                result: Some(Value::test_list(vec![
99                    Value::test_string("hello"),
100                    Value::test_string("goodbye"),
101                ])),
102            },
103            Example {
104                description: "Return a table containing the distinct input values together with their counts.",
105                example: "[1 2 2] | uniq --count",
106                result: Some(Value::test_list(vec![
107                    Value::test_record(record! {
108                        "value" => Value::test_int(1),
109                        "count" => Value::test_int(1),
110                    }),
111                    Value::test_record(record! {
112                        "value" => Value::test_int(2),
113                        "count" => Value::test_int(2),
114                    }),
115                ])),
116            },
117        ]
118    }
119}
120
121pub struct ItemMapperState {
122    pub item: Value,
123    pub flag_ignore_case: bool,
124    pub index: usize,
125    pub head: Span,
126}
127
128fn item_mapper(item: Value, flag_ignore_case: bool, index: usize, head: Span) -> ValueCounter {
129    ValueCounter::new(item, flag_ignore_case, index, head)
130}
131
132pub struct ValueCounter {
133    val: Value,
134    val_to_compare: Value,
135    count: i64,
136    index: usize,
137}
138
139impl PartialEq<Self> for ValueCounter {
140    fn eq(&self, other: &Self) -> bool {
141        self.val == other.val
142    }
143}
144
145impl ValueCounter {
146    fn new(val: Value, flag_ignore_case: bool, index: usize, head: Span) -> Self {
147        Self::new_vals_to_compare(val.clone(), flag_ignore_case, val, index, head)
148    }
149    pub fn new_vals_to_compare(
150        val: Value,
151        flag_ignore_case: bool,
152        vals_to_compare: Value,
153        index: usize,
154        head: Span,
155    ) -> Self {
156        ValueCounter {
157            val,
158            val_to_compare: if flag_ignore_case {
159                clone_to_folded_case(&vals_to_compare.with_span(head))
160            } else {
161                vals_to_compare.with_span(head)
162            },
163            count: 1,
164            index,
165        }
166    }
167}
168
169fn clone_to_folded_case(value: &Value) -> Value {
170    let span = value.span();
171    match value {
172        Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span),
173        Value::List { vals: vec, .. } => {
174            Value::list(vec.iter().map(clone_to_folded_case).collect(), span)
175        }
176        Value::Record { val: record, .. } => Value::record(
177            record
178                .iter()
179                .map(|(k, v)| (k.to_owned(), clone_to_folded_case(v)))
180                .collect(),
181            span,
182        ),
183        other => other.clone(),
184    }
185}
186
187fn sort_attributes(val: Value) -> Value {
188    let span = val.span();
189    match val {
190        Value::Record { val, .. } => {
191            // TODO: sort inplace
192            let sorted = val
193                .into_owned()
194                .into_iter()
195                .sorted_by(|a, b| a.0.cmp(&b.0))
196                .collect_vec();
197
198            let record = sorted
199                .into_iter()
200                .map(|(k, v)| (k, sort_attributes(v)))
201                .collect();
202
203            Value::record(record, span)
204        }
205        Value::List { vals, .. } => {
206            Value::list(vals.into_iter().map(sort_attributes).collect_vec(), span)
207        }
208        other => other,
209    }
210}
211
212fn generate_key(
213    engine_state: &EngineState,
214    item: &ValueCounter,
215    head: Span,
216) -> Result<String, ShellError> {
217    let value = sort_attributes(item.val_to_compare.clone()); //otherwise, keys could be different for Records
218    nuon::to_nuon(
219        engine_state,
220        &value,
221        nuon::ToNuonConfig::default().span(Some(head)),
222    )
223}
224
225fn generate_results_with_count(head: Span, uniq_values: Vec<ValueCounter>) -> Vec<Value> {
226    uniq_values
227        .into_iter()
228        .map(|item| {
229            Value::record(
230                record! {
231                    "value" => item.val,
232                    "count" => Value::int(item.count, head),
233                },
234                head,
235            )
236        })
237        .collect()
238}
239
240pub fn uniq(
241    engine_state: &EngineState,
242    stack: &mut Stack,
243    call: &Call,
244    input: Vec<Value>,
245    item_mapper: Box<dyn Fn(ItemMapperState) -> ValueCounter>,
246    metadata: Option<PipelineMetadata>,
247) -> Result<PipelineData, ShellError> {
248    let head = call.head;
249    let flag_show_count = call.has_flag(engine_state, stack, "count")?;
250    let flag_show_repeated = call.has_flag(engine_state, stack, "repeated")?;
251    let flag_ignore_case = call.has_flag(engine_state, stack, "ignore-case")?;
252    let flag_only_uniques = call.has_flag(engine_state, stack, "unique")?;
253
254    // for uniq-by command
255    let flag_keep_last = call.has_flag(engine_state, stack, "keep-last")?;
256
257    let signals = engine_state.signals().clone();
258    let uniq_values = input
259        .into_iter()
260        .enumerate()
261        .map_while(|(index, item)| {
262            if signals.interrupted() {
263                return None;
264            }
265            Some(item_mapper(ItemMapperState {
266                item,
267                flag_ignore_case,
268                index,
269                head,
270            }))
271        })
272        .try_fold(
273            HashMap::<String, ValueCounter>::new(),
274            |mut counter, item| {
275                let key = generate_key(engine_state, &item, head);
276
277                match key {
278                    Ok(key) => {
279                        match counter.get_mut(&key) {
280                            Some(x) => {
281                                if flag_keep_last {
282                                    x.val = item.val;
283                                }
284                                x.count += 1;
285                            }
286                            None => {
287                                counter.insert(key, item);
288                            }
289                        };
290                        Ok(counter)
291                    }
292                    Err(err) => Err(err),
293                }
294            },
295        );
296
297    let mut uniq_values: HashMap<String, ValueCounter> = uniq_values?;
298
299    if flag_show_repeated {
300        uniq_values.retain(|_v, value_count_pair| value_count_pair.count > 1);
301    }
302
303    if flag_only_uniques {
304        uniq_values.retain(|_v, value_count_pair| value_count_pair.count == 1);
305    }
306
307    let uniq_values = sort(uniq_values.into_iter());
308
309    let result = if flag_show_count {
310        generate_results_with_count(head, uniq_values)
311    } else {
312        uniq_values.into_iter().map(|v| v.val).collect()
313    };
314
315    Ok(Value::list(result, head).into_pipeline_data_with_metadata(metadata))
316}
317
318fn sort(iter: IntoIter<String, ValueCounter>) -> Vec<ValueCounter> {
319    iter.map(|item| item.1)
320        .sorted_by(|a, b| a.index.cmp(&b.index))
321        .collect()
322}
323
324#[cfg(test)]
325mod test {
326    use super::*;
327
328    #[test]
329    fn test_examples() -> nu_test_support::Result {
330        nu_test_support::test().examples(Uniq)
331    }
332}