1use itertools::Itertools;
2use nu_engine::command_prelude::*;
3use nu_protocol::PipelineMetadata;
4use nu_utils::IgnoreCaseExt;
5use std::collections::{HashMap, hash_map::IntoIter};
6
7#[derive(Clone)]
8pub struct Uniq;
9
10impl Command for Uniq {
11 fn name(&self) -> &str {
12 "uniq"
13 }
14
15 fn signature(&self) -> Signature {
16 Signature::build("uniq")
17 .input_output_types(vec![(
18 Type::List(Box::new(Type::Any)),
19 Type::List(Box::new(Type::Any)),
20 )])
21 .switch(
22 "count",
23 "Return a table containing the distinct input values together with their counts",
24 Some('c'),
25 )
26 .switch(
27 "repeated",
28 "Return the input values that occur more than once",
29 Some('d'),
30 )
31 .switch(
32 "ignore-case",
33 "Compare input values case-insensitively",
34 Some('i'),
35 )
36 .switch(
37 "unique",
38 "Return the input values that occur once only",
39 Some('u'),
40 )
41 .category(Category::Filters)
42 }
43
44 fn description(&self) -> &str {
45 "Return the distinct values in the input."
46 }
47
48 fn search_terms(&self) -> Vec<&str> {
49 vec!["distinct", "deduplicate", "count"]
50 }
51
52 fn run(
53 &self,
54 engine_state: &EngineState,
55 stack: &mut Stack,
56 call: &Call,
57 input: PipelineData,
58 ) -> Result<PipelineData, ShellError> {
59 let mapper = Box::new(move |ms: ItemMapperState| -> ValueCounter {
60 item_mapper(ms.item, ms.flag_ignore_case, ms.index)
61 });
62
63 let metadata = input.metadata();
64 uniq(
65 engine_state,
66 stack,
67 call,
68 input.into_iter().collect(),
69 mapper,
70 metadata,
71 )
72 }
73
74 fn examples(&self) -> Vec<Example<'_>> {
75 vec![
76 Example {
77 description: "Return the distinct values of a list/table (remove duplicates so that each value occurs once only)",
78 example: "[2 3 3 4] | uniq",
79 result: Some(Value::list(
80 vec![Value::test_int(2), Value::test_int(3), Value::test_int(4)],
81 Span::test_data(),
82 )),
83 },
84 Example {
85 description: "Return the input values that occur more than once",
86 example: "[1 2 2] | uniq -d",
87 result: Some(Value::list(vec![Value::test_int(2)], Span::test_data())),
88 },
89 Example {
90 description: "Return the input values that occur once only",
91 example: "[1 2 2] | uniq --unique",
92 result: Some(Value::list(vec![Value::test_int(1)], Span::test_data())),
93 },
94 Example {
95 description: "Ignore differences in case when comparing input values",
96 example: "['hello' 'goodbye' 'Hello'] | uniq --ignore-case",
97 result: Some(Value::test_list(vec![
98 Value::test_string("hello"),
99 Value::test_string("goodbye"),
100 ])),
101 },
102 Example {
103 description: "Return a table containing the distinct input values together with their counts",
104 example: "[1 2 2] | uniq --count",
105 result: Some(Value::test_list(vec![
106 Value::test_record(record! {
107 "value" => Value::test_int(1),
108 "count" => Value::test_int(1),
109 }),
110 Value::test_record(record! {
111 "value" => Value::test_int(2),
112 "count" => Value::test_int(2),
113 }),
114 ])),
115 },
116 ]
117 }
118}
119
120pub struct ItemMapperState {
121 pub item: Value,
122 pub flag_ignore_case: bool,
123 pub index: usize,
124}
125
126fn item_mapper(item: Value, flag_ignore_case: bool, index: usize) -> ValueCounter {
127 ValueCounter::new(item, flag_ignore_case, index)
128}
129
130pub struct ValueCounter {
131 val: Value,
132 val_to_compare: Value,
133 count: i64,
134 index: usize,
135}
136
137impl PartialEq<Self> for ValueCounter {
138 fn eq(&self, other: &Self) -> bool {
139 self.val == other.val
140 }
141}
142
143impl ValueCounter {
144 fn new(val: Value, flag_ignore_case: bool, index: usize) -> Self {
145 Self::new_vals_to_compare(val.clone(), flag_ignore_case, val, index)
146 }
147 pub fn new_vals_to_compare(
148 val: Value,
149 flag_ignore_case: bool,
150 vals_to_compare: Value,
151 index: usize,
152 ) -> Self {
153 ValueCounter {
154 val,
155 val_to_compare: if flag_ignore_case {
156 clone_to_folded_case(&vals_to_compare.with_span(Span::unknown()))
157 } else {
158 vals_to_compare.with_span(Span::unknown())
159 },
160 count: 1,
161 index,
162 }
163 }
164}
165
166fn clone_to_folded_case(value: &Value) -> Value {
167 let span = value.span();
168 match value {
169 Value::String { val: s, .. } => Value::string(s.clone().to_folded_case(), span),
170 Value::List { vals: vec, .. } => {
171 Value::list(vec.iter().map(clone_to_folded_case).collect(), span)
172 }
173 Value::Record { val: record, .. } => Value::record(
174 record
175 .iter()
176 .map(|(k, v)| (k.to_owned(), clone_to_folded_case(v)))
177 .collect(),
178 span,
179 ),
180 other => other.clone(),
181 }
182}
183
184fn sort_attributes(val: Value) -> Value {
185 let span = val.span();
186 match val {
187 Value::Record { val, .. } => {
188 let sorted = val
190 .into_owned()
191 .into_iter()
192 .sorted_by(|a, b| a.0.cmp(&b.0))
193 .collect_vec();
194
195 let record = sorted
196 .into_iter()
197 .map(|(k, v)| (k, sort_attributes(v)))
198 .collect();
199
200 Value::record(record, span)
201 }
202 Value::List { vals, .. } => {
203 Value::list(vals.into_iter().map(sort_attributes).collect_vec(), span)
204 }
205 other => other,
206 }
207}
208
209fn generate_key(engine_state: &EngineState, item: &ValueCounter) -> Result<String, ShellError> {
210 let value = sort_attributes(item.val_to_compare.clone()); nuon::to_nuon(
212 engine_state,
213 &value,
214 nuon::ToStyle::Default,
215 Some(Span::unknown()),
216 false,
217 )
218}
219
220fn generate_results_with_count(head: Span, uniq_values: Vec<ValueCounter>) -> Vec<Value> {
221 uniq_values
222 .into_iter()
223 .map(|item| {
224 Value::record(
225 record! {
226 "value" => item.val,
227 "count" => Value::int(item.count, head),
228 },
229 head,
230 )
231 })
232 .collect()
233}
234
235pub fn uniq(
236 engine_state: &EngineState,
237 stack: &mut Stack,
238 call: &Call,
239 input: Vec<Value>,
240 item_mapper: Box<dyn Fn(ItemMapperState) -> ValueCounter>,
241 metadata: Option<PipelineMetadata>,
242) -> Result<PipelineData, ShellError> {
243 let head = call.head;
244 let flag_show_count = call.has_flag(engine_state, stack, "count")?;
245 let flag_show_repeated = call.has_flag(engine_state, stack, "repeated")?;
246 let flag_ignore_case = call.has_flag(engine_state, stack, "ignore-case")?;
247 let flag_only_uniques = call.has_flag(engine_state, stack, "unique")?;
248
249 let signals = engine_state.signals().clone();
250 let uniq_values = input
251 .into_iter()
252 .enumerate()
253 .map_while(|(index, item)| {
254 if signals.interrupted() {
255 return None;
256 }
257 Some(item_mapper(ItemMapperState {
258 item,
259 flag_ignore_case,
260 index,
261 }))
262 })
263 .try_fold(
264 HashMap::<String, ValueCounter>::new(),
265 |mut counter, item| {
266 let key = generate_key(engine_state, &item);
267
268 match key {
269 Ok(key) => {
270 match counter.get_mut(&key) {
271 Some(x) => x.count += 1,
272 None => {
273 counter.insert(key, item);
274 }
275 };
276 Ok(counter)
277 }
278 Err(err) => Err(err),
279 }
280 },
281 );
282
283 let mut uniq_values: HashMap<String, ValueCounter> = uniq_values?;
284
285 if flag_show_repeated {
286 uniq_values.retain(|_v, value_count_pair| value_count_pair.count > 1);
287 }
288
289 if flag_only_uniques {
290 uniq_values.retain(|_v, value_count_pair| value_count_pair.count == 1);
291 }
292
293 let uniq_values = sort(uniq_values.into_iter());
294
295 let result = if flag_show_count {
296 generate_results_with_count(head, uniq_values)
297 } else {
298 uniq_values.into_iter().map(|v| v.val).collect()
299 };
300
301 Ok(Value::list(result, head).into_pipeline_data_with_metadata(metadata))
302}
303
304fn sort(iter: IntoIter<String, ValueCounter>) -> Vec<ValueCounter> {
305 iter.map(|item| item.1)
306 .sorted_by(|a, b| a.index.cmp(&b.index))
307 .collect()
308}
309
310#[cfg(test)]
311mod test {
312 use super::*;
313
314 #[test]
315 fn test_examples() {
316 use crate::test_examples;
317
318 test_examples(Uniq {})
319 }
320}