chie_shared/utils/
collections.rs

1//! Collection utility functions for working with vectors, hashmaps, and other data structures.
2
3use std::collections::HashMap;
4
5/// Deduplicate a vector while preserving order.
6///
7/// # Examples
8///
9/// ```
10/// use chie_shared::deduplicate_preserve_order;
11///
12/// let items = vec![1, 2, 3, 2, 4, 1, 5];
13/// let deduped = deduplicate_preserve_order(items);
14/// assert_eq!(deduped, vec![1, 2, 3, 4, 5]);
15///
16/// // Order is preserved - first occurrence is kept
17/// let words = vec!["hello", "world", "hello", "rust"];
18/// let deduped_words = deduplicate_preserve_order(words);
19/// assert_eq!(deduped_words, vec!["hello", "world", "rust"]);
20/// ```
21#[allow(dead_code)]
22pub fn deduplicate_preserve_order<T: Clone + Eq + std::hash::Hash>(items: Vec<T>) -> Vec<T> {
23    let mut seen = std::collections::HashSet::new();
24    items
25        .into_iter()
26        .filter(|item| seen.insert(item.clone()))
27        .collect()
28}
29
30/// Partition a vector into two vectors based on a predicate.
31/// Returns (matching, non_matching).
32///
33/// # Examples
34///
35/// ```
36/// use chie_shared::partition;
37///
38/// // Separate even and odd numbers
39/// let numbers = vec![1, 2, 3, 4, 5, 6];
40/// let (evens, odds) = partition(numbers, |n| n % 2 == 0);
41/// assert_eq!(evens, vec![2, 4, 6]);
42/// assert_eq!(odds, vec![1, 3, 5]);
43///
44/// // Filter strings by length
45/// let words = vec!["hi", "hello", "bye", "goodbye"];
46/// let (long, short) = partition(words, |w| w.len() > 3);
47/// assert_eq!(long, vec!["hello", "goodbye"]);
48/// assert_eq!(short, vec!["hi", "bye"]);
49/// ```
50#[allow(dead_code)]
51pub fn partition<T, F>(items: Vec<T>, predicate: F) -> (Vec<T>, Vec<T>)
52where
53    F: Fn(&T) -> bool,
54{
55    let mut matching = Vec::new();
56    let mut non_matching = Vec::new();
57
58    for item in items {
59        if predicate(&item) {
60            matching.push(item);
61        } else {
62            non_matching.push(item);
63        }
64    }
65
66    (matching, non_matching)
67}
68
69/// Group items by a key extraction function.
70/// Returns a HashMap where keys are the result of the key function and values are vectors of items.
71///
72/// # Examples
73///
74/// ```
75/// use chie_shared::group_by;
76///
77/// // Group numbers by their remainder when divided by 3
78/// let numbers = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
79/// let groups = group_by(numbers, |n| n % 3);
80/// assert_eq!(groups[&0], vec![3, 6, 9]);
81/// assert_eq!(groups[&1], vec![1, 4, 7]);
82/// assert_eq!(groups[&2], vec![2, 5, 8]);
83///
84/// // Group strings by their first character
85/// let words = vec!["apple", "apricot", "banana", "berry", "cherry"];
86/// let by_first = group_by(words, |w| w.chars().next().unwrap());
87/// assert_eq!(by_first[&'a'].len(), 2);
88/// assert_eq!(by_first[&'b'].len(), 2);
89/// assert_eq!(by_first[&'c'].len(), 1);
90/// ```
91#[allow(dead_code)]
92pub fn group_by<T, K, F>(items: Vec<T>, key_fn: F) -> HashMap<K, Vec<T>>
93where
94    K: Eq + std::hash::Hash,
95    F: Fn(&T) -> K,
96{
97    let mut groups: HashMap<K, Vec<T>> = HashMap::new();
98
99    for item in items {
100        let key = key_fn(&item);
101        groups.entry(key).or_default().push(item);
102    }
103
104    groups
105}
106
107/// Find duplicate items in a vector.
108/// Returns a vector of items that appear more than once.
109///
110/// # Examples
111///
112/// ```
113/// use chie_shared::find_duplicates;
114///
115/// let numbers = vec![1, 2, 3, 2, 4, 3, 5];
116/// let mut dupes = find_duplicates(&numbers);
117/// dupes.sort(); // Order is not guaranteed
118/// assert_eq!(dupes, vec![2, 3]);
119///
120/// // Works with strings too
121/// let words = vec!["cat", "dog", "cat", "bird", "dog"];
122/// let mut dup_words = find_duplicates(&words);
123/// dup_words.sort();
124/// assert_eq!(dup_words, vec!["cat", "dog"]);
125///
126/// // No duplicates returns empty vector
127/// let unique = vec![1, 2, 3, 4];
128/// assert_eq!(find_duplicates(&unique), Vec::<i32>::new());
129/// ```
130#[allow(dead_code)]
131pub fn find_duplicates<T: Clone + Eq + std::hash::Hash>(items: &[T]) -> Vec<T> {
132    let mut seen = std::collections::HashSet::new();
133    let mut duplicates = std::collections::HashSet::new();
134
135    for item in items {
136        if !seen.insert(item) {
137            duplicates.insert(item.clone());
138        }
139    }
140
141    duplicates.into_iter().collect()
142}
143
144/// Merge two sorted vectors into a single sorted vector.
145#[allow(dead_code)]
146pub fn merge_sorted<T: Ord + Clone>(left: &[T], right: &[T]) -> Vec<T> {
147    let mut result = Vec::with_capacity(left.len() + right.len());
148    let mut i = 0;
149    let mut j = 0;
150
151    while i < left.len() && j < right.len() {
152        if left[i] <= right[j] {
153            result.push(left[i].clone());
154            i += 1;
155        } else {
156            result.push(right[j].clone());
157            j += 1;
158        }
159    }
160
161    result.extend_from_slice(&left[i..]);
162    result.extend_from_slice(&right[j..]);
163
164    result
165}
166
167/// Take the first N items from a vector.
168#[allow(dead_code)]
169pub fn take<T: Clone>(items: &[T], n: usize) -> Vec<T> {
170    items.iter().take(n).cloned().collect()
171}
172
173/// Skip the first N items and return the rest.
174#[allow(dead_code)]
175pub fn skip<T: Clone>(items: &[T], n: usize) -> Vec<T> {
176    items.iter().skip(n).cloned().collect()
177}
178
179/// Batch items into groups where the size of each batch is determined by a size function.
180/// Ensures no batch exceeds max_size.
181///
182/// # Examples
183///
184/// ```
185/// use chie_shared::batch_by_size;
186///
187/// // Batch strings by character count, max 10 chars per batch
188/// let words = vec!["hi", "hello", "world", "rust", "code"];
189/// let batches = batch_by_size(words, |s| s.len(), 10);
190/// // First batch: "hi" (2) + "hello" (5) = 7 chars
191/// // Second batch: "world" (5) + "rust" (4) = 9 chars
192/// // Third batch: "code" (4) chars
193/// assert_eq!(batches.len(), 3);
194/// assert_eq!(batches[0], vec!["hi", "hello"]);
195/// assert_eq!(batches[1], vec!["world", "rust"]);
196/// assert_eq!(batches[2], vec!["code"]);
197///
198/// // Batch numbers by value, max sum of 100
199/// let numbers = vec![30, 40, 50, 20, 60];
200/// let num_batches = batch_by_size(numbers, |n| *n, 100);
201/// assert_eq!(num_batches.len(), 3);
202/// assert_eq!(num_batches[0], vec![30, 40]); // 70 total
203/// assert_eq!(num_batches[1], vec![50, 20]); // 70 total
204/// assert_eq!(num_batches[2], vec![60]); // 60 total
205/// ```
206#[allow(dead_code)]
207pub fn batch_by_size<T>(
208    items: Vec<T>,
209    size_fn: impl Fn(&T) -> usize,
210    max_size: usize,
211) -> Vec<Vec<T>> {
212    let mut batches = Vec::new();
213    let mut current_batch = Vec::new();
214    let mut current_size = 0;
215
216    for item in items {
217        let item_size = size_fn(&item);
218
219        if current_size + item_size > max_size && !current_batch.is_empty() {
220            batches.push(std::mem::take(&mut current_batch));
221            current_size = 0;
222        }
223
224        current_batch.push(item);
225        current_size += item_size;
226    }
227
228    if !current_batch.is_empty() {
229        batches.push(current_batch);
230    }
231
232    batches
233}
234
235/// Zip two vectors together, stopping at the length of the shorter vector.
236#[allow(dead_code)]
237pub fn zip_with<A, B, C, F>(a: Vec<A>, b: Vec<B>, f: F) -> Vec<C>
238where
239    F: Fn(A, B) -> C,
240{
241    a.into_iter().zip(b).map(|(x, y)| f(x, y)).collect()
242}
243
244/// Flatten a vector of vectors into a single vector.
245#[allow(dead_code)]
246pub fn flatten<T>(items: Vec<Vec<T>>) -> Vec<T> {
247    items.into_iter().flatten().collect()
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_deduplicate_preserve_order() {
256        let items = vec![1, 2, 3, 2, 4, 1, 5];
257        let deduped = deduplicate_preserve_order(items);
258        assert_eq!(deduped, vec![1, 2, 3, 4, 5]);
259
260        let strings = vec!["a".to_string(), "b".to_string(), "a".to_string()];
261        let deduped = deduplicate_preserve_order(strings);
262        assert_eq!(deduped, vec!["a".to_string(), "b".to_string()]);
263
264        let empty: Vec<i32> = vec![];
265        let deduped = deduplicate_preserve_order(empty);
266        assert_eq!(deduped, Vec::<i32>::new());
267    }
268
269    #[test]
270    fn test_partition() {
271        let items = vec![1, 2, 3, 4, 5, 6];
272        let (evens, odds) = partition(items, |x| x % 2 == 0);
273        assert_eq!(evens, vec![2, 4, 6]);
274        assert_eq!(odds, vec![1, 3, 5]);
275    }
276
277    #[test]
278    fn test_group_by() {
279        let items = vec![1, 2, 3, 4, 5, 6];
280        let groups = group_by(items, |x| x % 3);
281
282        assert_eq!(groups.get(&0), Some(&vec![3, 6]));
283        assert_eq!(groups.get(&1), Some(&vec![1, 4]));
284        assert_eq!(groups.get(&2), Some(&vec![2, 5]));
285    }
286
287    #[test]
288    fn test_find_duplicates() {
289        let items = vec![1, 2, 3, 2, 4, 1, 5, 1];
290        let mut dups = find_duplicates(&items);
291        dups.sort();
292        assert_eq!(dups, vec![1, 2]);
293
294        let no_dups = vec![1, 2, 3, 4, 5];
295        assert_eq!(find_duplicates(&no_dups), Vec::<i32>::new());
296    }
297
298    #[test]
299    fn test_merge_sorted() {
300        let left = vec![1, 3, 5, 7];
301        let right = vec![2, 4, 6, 8];
302        let merged = merge_sorted(&left, &right);
303        assert_eq!(merged, vec![1, 2, 3, 4, 5, 6, 7, 8]);
304
305        let left = vec![1, 2, 3];
306        let right: Vec<i32> = vec![];
307        let merged = merge_sorted(&left, &right);
308        assert_eq!(merged, vec![1, 2, 3]);
309    }
310
311    #[test]
312    fn test_take_skip() {
313        let items = vec![1, 2, 3, 4, 5];
314
315        assert_eq!(take(&items, 3), vec![1, 2, 3]);
316        assert_eq!(take(&items, 10), vec![1, 2, 3, 4, 5]);
317        assert_eq!(take(&items, 0), Vec::<i32>::new());
318
319        assert_eq!(skip(&items, 2), vec![3, 4, 5]);
320        assert_eq!(skip(&items, 10), Vec::<i32>::new());
321        assert_eq!(skip(&items, 0), vec![1, 2, 3, 4, 5]);
322    }
323
324    #[test]
325    fn test_batch_by_size() {
326        let items = vec![10, 20, 30, 40, 50];
327        let batches = batch_by_size(items, |&x| x as usize, 60);
328
329        assert_eq!(batches.len(), 3);
330        assert_eq!(batches[0], vec![10, 20, 30]); // Total size: 60
331        assert_eq!(batches[1], vec![40]); // Size: 40
332        assert_eq!(batches[2], vec![50]); // Size: 50
333    }
334
335    #[test]
336    fn test_zip_with() {
337        let a = vec![1, 2, 3];
338        let b = vec![10, 20, 30];
339        let result = zip_with(a, b, |x, y| x + y);
340        assert_eq!(result, vec![11, 22, 33]);
341
342        let a = vec![1, 2, 3, 4];
343        let b = vec![10, 20];
344        let result = zip_with(a, b, |x, y| x * y);
345        assert_eq!(result, vec![10, 40]); // Stops at shorter vector
346    }
347
348    #[test]
349    fn test_flatten() {
350        let nested = vec![vec![1, 2], vec![3, 4, 5], vec![6]];
351        let flat = flatten(nested);
352        assert_eq!(flat, vec![1, 2, 3, 4, 5, 6]);
353
354        let empty: Vec<Vec<i32>> = vec![];
355        assert_eq!(flatten(empty), Vec::<i32>::new());
356    }
357}