1use super::utils;
2#[cfg(feature = "sqlite")]
3use crate::database::QueryPlan;
4use itertools::Itertools;
5use nu_engine::command_prelude::*;
6use nu_protocol::PipelineMetadata;
7use nu_utils::IgnoreCaseExt;
8use std::collections::{HashMap, hash_map::IntoIter};
9
10#[derive(Clone)]
11pub struct Uniq;
12
13impl Command for Uniq {
14 fn name(&self) -> &str {
15 "uniq"
16 }
17
18 fn signature(&self) -> Signature {
19 Signature::build("uniq")
20 .input_output_types(vec![(
21 Type::List(Box::new(Type::Any)),
22 Type::List(Box::new(Type::Any)),
23 )])
24 .switch(
25 "count",
26 "Return a table containing the distinct input values together with their counts.",
27 Some('c'),
28 )
29 .switch(
30 "repeated",
31 "Return the input values that occur more than once.",
32 Some('d'),
33 )
34 .switch(
35 "ignore-case",
36 "Compare input values case-insensitively.",
37 Some('i'),
38 )
39 .switch(
40 "unique",
41 "Return the input values that occur once only.",
42 Some('u'),
43 )
44 .category(Category::Filters)
45 }
46
47 fn description(&self) -> &str {
48 "Return the distinct values in the input."
49 }
50
51 fn search_terms(&self) -> Vec<&str> {
52 vec!["distinct", "deduplicate", "count"]
53 }
54
55 fn run(
56 &self,
57 engine_state: &EngineState,
58 stack: &mut Stack,
59 call: &Call,
60 mut input: PipelineData,
61 ) -> Result<PipelineData, ShellError> {
62 let head = call.head;
63
64 #[cfg(feature = "sqlite")]
65 if !call.has_flag(engine_state, stack, "count")?
67 && !call.has_flag(engine_state, stack, "repeated")?
68 && !call.has_flag(engine_state, stack, "unique")?
69 && !call.has_flag(engine_state, stack, "ignore-case")?
70 && let PipelineData::Value(Value::Custom { val, .. }, metadata) = &input
71 && let Some(plan) = QueryPlan::try_from_any(val.as_any())
72 {
73 let plan = plan.with_distinct();
74 return plan
75 .execute(call.head)
76 .map(|data| data.set_metadata(metadata.clone()));
77 }
78
79 let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter {
80 item_mapper(ms.item, ms.flag_ignore_case, ms.index, head)
81 });
82
83 let metadata = input.take_metadata();
84 uniq(
85 engine_state,
86 stack,
87 call,
88 input.into_iter().collect(),
89 mapper,
90 metadata,
91 )
92 }
93
94 fn examples(&self) -> Vec<Example<'_>> {
95 vec![
96 Example {
97 description: "Return the distinct values of a list/table (remove duplicates so that each value occurs once only).",
98 example: "[2 3 3 4] | uniq",
99 result: Some(Value::list(
100 vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)],
101 Span::test_data(),
102 )),
103 },
104 Example {
105 description: "Return the input values that occur more than once.",
106 example: "[1 2 2] | uniq -d",
107 result: Some(Value::list(vec![Value::test_int(2)], Span::test_data())),
108 },
109 Example {
110 description: "Return the input values that occur once only.",
111 example: "[1 2 2] | uniq --unique",
112 result: Some(Value::list(vec![Value::test_int(1)], Span::test_data())),
113 },
114 Example {
115 description: "Ignore differences in case when comparing input values.",
116 example: "['hello' 'goodbye' 'Hello'] | uniq --ignore-case",
117 result: Some(Value::test_list(vec![
118 Value::test_string("hello"),
119 Value::test_string("goodbye"),
120 ])),
121 },
122 Example {
123 description: "Return a table containing the distinct input values together with their counts.",
124 example: "[1 2 2] | uniq --count",
125 result: Some(Value::test_list(vec![
126 Value::test_record(record! {
127 "value" => Value::test_int(1),
128 "count" => Value::test_int(1),
129 }),
130 Value::test_record(record! {
131 "value" => Value::test_int(2),
132 "count" => Value::test_int(2),
133 }),
134 ])),
135 },
136 ]
137 }
138}
139
140pub struct ItemMapperState {
141 pub item: Value,
142 pub flag_ignore_case: bool,
143 pub index: usize,
144 pub head: Span,
145}
146
147fn item_mapper(item: Value, flag_ignore_case: bool, index: usize, head: Span) -> ValueCounter {
148 ValueCounter::new(item, flag_ignore_case, index, head)
149}
150
151pub struct ValueCounter {
152 val: Value,
153 val_to_compare: Value,
154 count: i64,
155 index: usize,
156}
157
158impl PartialEq<Self> for ValueCounter {
159 fn eq(&self, other: &Self) -> bool {
160 self.val == other.val
161 }
162}
163
164impl ValueCounter {
165 fn new(val: Value, flag_ignore_case: bool, index: usize, head: Span) -> Self {
166 Self::new_vals_to_compare(val.clone(), flag_ignore_case, val, index, head)
167 }
168 pub fn new_vals_to_compare(
169 val: Value,
170 flag_ignore_case: bool,
171 vals_to_compare: Value,
172 index: usize,
173 head: Span,
174 ) -> Self {
175 ValueCounter {
176 val,
177 val_to_compare: if flag_ignore_case {
178 clone_to_folded_case(&vals_to_compare.with_span(head))
179 } else {
180 vals_to_compare.with_span(head)
181 },
182 count: 1,
183 index,
184 }
185 }
186}
187
188fn clone_to_folded_case(value: &Value) -> Value {
189 let span = value.span();
190 match value {
191 Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span),
192 Value::List { vals: vec, .. } => {
193 Value::list(vec.iter().map(clone_to_folded_case).collect(), span)
194 }
195 Value::Record { val: record, .. } => Value::record(
196 record
197 .iter()
198 .map(|(k, v)| (k.to_owned(), clone_to_folded_case(v)))
199 .collect(),
200 span,
201 ),
202 other => other.clone(),
203 }
204}
205
206fn generate_results_with_count(head: Span, uniq_values: Vec<ValueCounter>) -> Vec<Value> {
207 uniq_values
208 .into_iter()
209 .map(|item| {
210 Value::record(
211 record! {
212 "value" => item.val,
213 "count" => Value::int(item.count, head),
214 },
215 head,
216 )
217 })
218 .collect()
219}
220
221pub fn uniq(
222 engine_state: &EngineState,
223 stack: &mut Stack,
224 call: &Call,
225 input: Vec<Value>,
226 item_mapper: Box<dyn Fn(ItemMapperState) -> ValueCounter>,
227 metadata: Option<PipelineMetadata>,
228) -> Result<PipelineData, ShellError> {
229 let head = call.head;
230 let flag_show_count = call.has_flag(engine_state, stack, "count")?;
231 let flag_show_repeated = call.has_flag(engine_state, stack, "repeated")?;
232 let flag_ignore_case = call.has_flag(engine_state, stack, "ignore-case")?;
233 let flag_only_uniques = call.has_flag(engine_state, stack, "unique")?;
234
235 let flag_keep_last = call.has_flag(engine_state, stack, "keep-last")?;
237
238 let signals = engine_state.signals().clone();
239 let uniq_values = input
240 .into_iter()
241 .enumerate()
242 .map_while(|(index, item)| {
243 if signals.interrupted() {
244 return None;
245 }
246 Some(item_mapper(ItemMapperState {
247 item,
248 flag_ignore_case,
249 index,
250 head,
251 }))
252 })
253 .try_fold(
254 HashMap::<String, ValueCounter>::new(),
255 |mut counter, item| {
256 let key = utils::value_to_key(engine_state, &item.val_to_compare, head);
257
258 match key {
259 Ok(key) => {
260 match counter.get_mut(&key) {
261 Some(x) => {
262 if flag_keep_last {
263 x.val = item.val;
264 }
265 x.count += 1;
266 }
267 None => {
268 counter.insert(key, item);
269 }
270 };
271 Ok(counter)
272 }
273 Err(err) => Err(err),
274 }
275 },
276 );
277
278 let mut uniq_values: HashMap<String, ValueCounter> = uniq_values?;
279
280 if flag_show_repeated {
281 uniq_values.retain(|_v, value_count_pair| value_count_pair.count > 1);
282 }
283
284 if flag_only_uniques {
285 uniq_values.retain(|_v, value_count_pair| value_count_pair.count == 1);
286 }
287
288 let uniq_values = sort(uniq_values.into_iter());
289
290 let result = if flag_show_count {
291 generate_results_with_count(head, uniq_values)
292 } else {
293 uniq_values.into_iter().map(|v| v.val).collect()
294 };
295
296 Ok(Value::list(result, head).into_pipeline_data_with_metadata(metadata))
297}
298
299fn sort(iter: IntoIter<String, ValueCounter>) -> Vec<ValueCounter> {
300 iter.map(|item| item.1)
301 .sorted_by(|a, b| a.index.cmp(&b.index))
302 .collect()
303}
304
305#[cfg(test)]
306mod test {
307 use super::*;
308
309 #[test]
310 fn test_examples() -> nu_test_support::Result {
311 nu_test_support::test().examples(Uniq)
312 }
313}