Skip to main content

nu_command/filters/
uniq.rs

1use super::utils;
2#[cfg(feature = "sqlite")]
3use crate::database::QueryPlan;
4use itertools::Itertools;
5use nu_engine::command_prelude::*;
6use nu_protocol::PipelineMetadata;
7use nu_utils::IgnoreCaseExt;
8use std::collections::{HashMap, hash_map::IntoIter};
9
10#[derive(Clone)]
11pub struct Uniq;
12
13impl Command for Uniq {
14    fn name(&self) -> &str {
15        "uniq"
16    }
17
18    fn signature(&self) -> Signature {
19        Signature::build("uniq")
20            .input_output_types(vec![(
21                Type::List(Box::new(Type::Any)),
22                Type::List(Box::new(Type::Any)),
23            )])
24            .switch(
25                "count",
26                "Return a table containing the distinct input values together with their counts.",
27                Some('c'),
28            )
29            .switch(
30                "repeated",
31                "Return the input values that occur more than once.",
32                Some('d'),
33            )
34            .switch(
35                "ignore-case",
36                "Compare input values case-insensitively.",
37                Some('i'),
38            )
39            .switch(
40                "unique",
41                "Return the input values that occur once only.",
42                Some('u'),
43            )
44            .category(Category::Filters)
45    }
46
47    fn description(&self) -> &str {
48        "Return the distinct values in the input."
49    }
50
51    fn search_terms(&self) -> Vec<&str> {
52        vec!["distinct", "deduplicate", "count"]
53    }
54
55    fn run(
56        &self,
57        engine_state: &EngineState,
58        stack: &mut Stack,
59        call: &Call,
60        mut input: PipelineData,
61    ) -> Result<PipelineData, ShellError> {
62        let head = call.head;
63
64        #[cfg(feature = "sqlite")]
65        // Pushdown optimization: bare `uniq` (no flags) via SELECT DISTINCT
66        if !call.has_flag(engine_state, stack, "count")?
67            && !call.has_flag(engine_state, stack, "repeated")?
68            && !call.has_flag(engine_state, stack, "unique")?
69            && !call.has_flag(engine_state, stack, "ignore-case")?
70            && let PipelineData::Value(Value::Custom { val, .. }, metadata) = &input
71            && let Some(plan) = QueryPlan::try_from_any(val.as_any())
72        {
73            let plan = plan.with_distinct();
74            return plan
75                .execute(call.head)
76                .map(|data| data.set_metadata(metadata.clone()));
77        }
78
79        let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter {
80            item_mapper(ms.item, ms.flag_ignore_case, ms.index, head)
81        });
82
83        let metadata = input.take_metadata();
84        uniq(
85            engine_state,
86            stack,
87            call,
88            input.into_iter().collect(),
89            mapper,
90            metadata,
91        )
92    }
93
94    fn examples(&self) -> Vec<Example<'_>> {
95        vec![
96            Example {
97                description: "Return the distinct values of a list/table (remove duplicates so that each value occurs once only).",
98                example: "[2 3 3 4] | uniq",
99                result: Some(Value::list(
100                    vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)],
101                    Span::test_data(),
102                )),
103            },
104            Example {
105                description: "Return the input values that occur more than once.",
106                example: "[1 2 2] | uniq -d",
107                result: Some(Value::list(vec![Value::test_int(2)], Span::test_data())),
108            },
109            Example {
110                description: "Return the input values that occur once only.",
111                example: "[1 2 2] | uniq --unique",
112                result: Some(Value::list(vec![Value::test_int(1)], Span::test_data())),
113            },
114            Example {
115                description: "Ignore differences in case when comparing input values.",
116                example: "['hello' 'goodbye' 'Hello'] | uniq --ignore-case",
117                result: Some(Value::test_list(vec![
118                    Value::test_string("hello"),
119                    Value::test_string("goodbye"),
120                ])),
121            },
122            Example {
123                description: "Return a table containing the distinct input values together with their counts.",
124                example: "[1 2 2] | uniq --count",
125                result: Some(Value::test_list(vec![
126                    Value::test_record(record! {
127                        "value" => Value::test_int(1),
128                        "count" => Value::test_int(1),
129                    }),
130                    Value::test_record(record! {
131                        "value" => Value::test_int(2),
132                        "count" => Value::test_int(2),
133                    }),
134                ])),
135            },
136        ]
137    }
138}
139
140pub struct ItemMapperState {
141    pub item: Value,
142    pub flag_ignore_case: bool,
143    pub index: usize,
144    pub head: Span,
145}
146
147fn item_mapper(item: Value, flag_ignore_case: bool, index: usize, head: Span) -> ValueCounter {
148    ValueCounter::new(item, flag_ignore_case, index, head)
149}
150
151pub struct ValueCounter {
152    val: Value,
153    val_to_compare: Value,
154    count: i64,
155    index: usize,
156}
157
158impl PartialEq<Self> for ValueCounter {
159    fn eq(&self, other: &Self) -> bool {
160        self.val == other.val
161    }
162}
163
164impl ValueCounter {
165    fn new(val: Value, flag_ignore_case: bool, index: usize, head: Span) -> Self {
166        Self::new_vals_to_compare(val.clone(), flag_ignore_case, val, index, head)
167    }
168    pub fn new_vals_to_compare(
169        val: Value,
170        flag_ignore_case: bool,
171        vals_to_compare: Value,
172        index: usize,
173        head: Span,
174    ) -> Self {
175        ValueCounter {
176            val,
177            val_to_compare: if flag_ignore_case {
178                clone_to_folded_case(&vals_to_compare.with_span(head))
179            } else {
180                vals_to_compare.with_span(head)
181            },
182            count: 1,
183            index,
184        }
185    }
186}
187
188fn clone_to_folded_case(value: &Value) -> Value {
189    let span = value.span();
190    match value {
191        Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span),
192        Value::List { vals: vec, .. } => {
193            Value::list(vec.iter().map(clone_to_folded_case).collect(), span)
194        }
195        Value::Record { val: record, .. } => Value::record(
196            record
197                .iter()
198                .map(|(k, v)| (k.to_owned(), clone_to_folded_case(v)))
199                .collect(),
200            span,
201        ),
202        other => other.clone(),
203    }
204}
205
206fn generate_results_with_count(head: Span, uniq_values: Vec<ValueCounter>) -> Vec<Value> {
207    uniq_values
208        .into_iter()
209        .map(|item| {
210            Value::record(
211                record! {
212                    "value" => item.val,
213                    "count" => Value::int(item.count, head),
214                },
215                head,
216            )
217        })
218        .collect()
219}
220
221pub fn uniq(
222    engine_state: &EngineState,
223    stack: &mut Stack,
224    call: &Call,
225    input: Vec<Value>,
226    item_mapper: Box<dyn Fn(ItemMapperState) -> ValueCounter>,
227    metadata: Option<PipelineMetadata>,
228) -> Result<PipelineData, ShellError> {
229    let head = call.head;
230    let flag_show_count = call.has_flag(engine_state, stack, "count")?;
231    let flag_show_repeated = call.has_flag(engine_state, stack, "repeated")?;
232    let flag_ignore_case = call.has_flag(engine_state, stack, "ignore-case")?;
233    let flag_only_uniques = call.has_flag(engine_state, stack, "unique")?;
234
235    // for uniq-by command
236    let flag_keep_last = call.has_flag(engine_state, stack, "keep-last")?;
237
238    let signals = engine_state.signals().clone();
239    let uniq_values = input
240        .into_iter()
241        .enumerate()
242        .map_while(|(index, item)| {
243            if signals.interrupted() {
244                return None;
245            }
246            Some(item_mapper(ItemMapperState {
247                item,
248                flag_ignore_case,
249                index,
250                head,
251            }))
252        })
253        .try_fold(
254            HashMap::<String, ValueCounter>::new(),
255            |mut counter, item| {
256                let key = utils::value_to_key(engine_state, &item.val_to_compare, head);
257
258                match key {
259                    Ok(key) => {
260                        match counter.get_mut(&key) {
261                            Some(x) => {
262                                if flag_keep_last {
263                                    x.val = item.val;
264                                }
265                                x.count += 1;
266                            }
267                            None => {
268                                counter.insert(key, item);
269                            }
270                        };
271                        Ok(counter)
272                    }
273                    Err(err) => Err(err),
274                }
275            },
276        );
277
278    let mut uniq_values: HashMap<String, ValueCounter> = uniq_values?;
279
280    if flag_show_repeated {
281        uniq_values.retain(|_v, value_count_pair| value_count_pair.count > 1);
282    }
283
284    if flag_only_uniques {
285        uniq_values.retain(|_v, value_count_pair| value_count_pair.count == 1);
286    }
287
288    let uniq_values = sort(uniq_values.into_iter());
289
290    let result = if flag_show_count {
291        generate_results_with_count(head, uniq_values)
292    } else {
293        uniq_values.into_iter().map(|v| v.val).collect()
294    };
295
296    Ok(Value::list(result, head).into_pipeline_data_with_metadata(metadata))
297}
298
299fn sort(iter: IntoIter<String, ValueCounter>) -> Vec<ValueCounter> {
300    iter.map(|item| item.1)
301        .sorted_by(|a, b| a.index.cmp(&b.index))
302        .collect()
303}
304
305#[cfg(test)]
306mod test {
307    use super::*;
308
309    #[test]
310    fn test_examples() -> nu_test_support::Result {
311        nu_test_support::test().examples(Uniq)
312    }
313}