Skip to main content

zsh/
stringsort.rs

1//! String manipulation and sorting for zshrs
2//!
3//! Direct port from zsh/Src/string.c and zsh/Src/sort.c
4//!
5//! Provides:
6//! - String duplication and concatenation utilities
7//! - Locale-aware string comparison
8//! - Numeric-aware string sorting
9//! - Case-insensitive and backslash-ignoring comparison
10
11use std::cmp::Ordering;
12
13/// Duplicate a string (equivalent to dupstring/ztrdup in C)
14#[inline]
15pub fn dupstring(s: &str) -> String {
16    s.to_string()
17}
18
19/// Duplicate a string with a specified length
20pub fn dupstring_wlen(s: &str, len: usize) -> String {
21    if len >= s.len() {
22        s.to_string()
23    } else {
24        s[..len].to_string()
25    }
26}
27
28/// Concatenate three strings
29pub fn tricat(s1: &str, s2: &str, s3: &str) -> String {
30    let mut result = String::with_capacity(s1.len() + s2.len() + s3.len());
31    result.push_str(s1);
32    result.push_str(s2);
33    result.push_str(s3);
34    result
35}
36
37/// Concatenate two strings
38pub fn bicat(s1: &str, s2: &str) -> String {
39    let mut result = String::with_capacity(s1.len() + s2.len());
40    result.push_str(s1);
41    result.push_str(s2);
42    result
43}
44
45/// Duplicate a prefix of a string
46pub fn dupstrpfx(s: &str, len: usize) -> String {
47    dupstring_wlen(s, len)
48}
49
50/// Append a string to another, returning the result
51pub fn appstr(base: &str, append: &str) -> String {
52    bicat(base, append)
53}
54
55/// Get pointer to the last character of a string
56pub fn strend(s: &str) -> Option<char> {
57    s.chars().last()
58}
59
60/// Sort flags
61pub mod sort_flags {
62    pub const SORTIT_BACKWARDS: u32 = 1;
63    pub const SORTIT_NUMERICALLY: u32 = 2;
64    pub const SORTIT_NUMERICALLY_SIGNED: u32 = 4;
65    pub const SORTIT_IGNORING_CASE: u32 = 8;
66    pub const SORTIT_IGNORING_BACKSLASHES: u32 = 16;
67}
68
69/// Compare two strings with various options
70pub fn zstrcmp(a: &str, b: &str, flags: u32) -> Ordering {
71    let ignore_case = flags & sort_flags::SORTIT_IGNORING_CASE != 0;
72    let ignore_backslash = flags & sort_flags::SORTIT_IGNORING_BACKSLASHES != 0;
73    let numeric = flags & sort_flags::SORTIT_NUMERICALLY != 0;
74    let numeric_signed = flags & sort_flags::SORTIT_NUMERICALLY_SIGNED != 0;
75
76    // Prepare strings for comparison
77    let (a_cmp, b_cmp): (std::borrow::Cow<str>, std::borrow::Cow<str>) = if ignore_case {
78        (
79            std::borrow::Cow::Owned(a.to_lowercase()),
80            std::borrow::Cow::Owned(b.to_lowercase()),
81        )
82    } else {
83        (std::borrow::Cow::Borrowed(a), std::borrow::Cow::Borrowed(b))
84    };
85
86    let (a_final, b_final): (std::borrow::Cow<str>, std::borrow::Cow<str>) = if ignore_backslash {
87        (
88            std::borrow::Cow::Owned(a_cmp.replace('\\', "")),
89            std::borrow::Cow::Owned(b_cmp.replace('\\', "")),
90        )
91    } else {
92        (a_cmp, b_cmp)
93    };
94
95    if numeric || numeric_signed {
96        numeric_compare(&a_final, &b_final, numeric_signed)
97    } else {
98        a_final.cmp(&b_final)
99    }
100}
101
102/// Numeric-aware string comparison
103fn numeric_compare(a: &str, b: &str, signed: bool) -> Ordering {
104    let mut a_chars = a.chars().peekable();
105    let mut b_chars = b.chars().peekable();
106
107    loop {
108        let a_next = a_chars.peek().copied();
109        let b_next = b_chars.peek().copied();
110
111        match (a_next, b_next) {
112            (None, None) => return Ordering::Equal,
113            (None, Some(_)) => return Ordering::Less,
114            (Some(_), None) => return Ordering::Greater,
115            (Some(ac), Some(bc)) => {
116                // Check if we're at the start of a number
117                let a_is_digit = ac.is_ascii_digit();
118                let b_is_digit = bc.is_ascii_digit();
119                let a_is_neg = signed
120                    && ac == '-'
121                    && a_chars
122                        .clone()
123                        .nth(1)
124                        .map(|c| c.is_ascii_digit())
125                        .unwrap_or(false);
126                let b_is_neg = signed
127                    && bc == '-'
128                    && b_chars
129                        .clone()
130                        .nth(1)
131                        .map(|c| c.is_ascii_digit())
132                        .unwrap_or(false);
133
134                if a_is_digit || b_is_digit || a_is_neg || b_is_neg {
135                    // Extract and compare numbers
136                    let a_num = extract_number(&mut a_chars, signed);
137                    let b_num = extract_number(&mut b_chars, signed);
138
139                    match a_num.cmp(&b_num) {
140                        Ordering::Equal => continue,
141                        other => return other,
142                    }
143                } else {
144                    // Regular character comparison
145                    a_chars.next();
146                    b_chars.next();
147                    match ac.cmp(&bc) {
148                        Ordering::Equal => continue,
149                        other => return other,
150                    }
151                }
152            }
153        }
154    }
155}
156
157/// Extract a number from a character iterator
158fn extract_number<I: Iterator<Item = char>>(
159    chars: &mut std::iter::Peekable<I>,
160    signed: bool,
161) -> i64 {
162    let mut negative = false;
163    let mut num: i64 = 0;
164    let mut has_digit = false;
165
166    // Check for sign
167    if signed {
168        if let Some(&'-') = chars.peek() {
169            chars.next();
170            negative = true;
171        } else if let Some(&'+') = chars.peek() {
172            chars.next();
173        }
174    }
175
176    // Skip leading zeros
177    while let Some(&'0') = chars.peek() {
178        chars.next();
179        has_digit = true;
180    }
181
182    // Collect digits
183    while let Some(&c) = chars.peek() {
184        if c.is_ascii_digit() {
185            has_digit = true;
186            num = num
187                .saturating_mul(10)
188                .saturating_add((c as i64) - ('0' as i64));
189            chars.next();
190        } else {
191            break;
192        }
193    }
194
195    if !has_digit {
196        return 0;
197    }
198
199    if negative {
200        -num
201    } else {
202        num
203    }
204}
205
206/// Sort an array of strings with various options
207pub fn strmetasort(array: &mut [String], flags: u32) {
208    if array.len() < 2 {
209        return;
210    }
211
212    let backwards = flags & sort_flags::SORTIT_BACKWARDS != 0;
213
214    array.sort_by(|a, b| {
215        let cmp = zstrcmp(a, b, flags);
216        if backwards {
217            cmp.reverse()
218        } else {
219            cmp
220        }
221    });
222}
223
224/// Sort string slices with various options
225pub fn sort_strings(array: &mut [&str], flags: u32) {
226    if array.len() < 2 {
227        return;
228    }
229
230    let backwards = flags & sort_flags::SORTIT_BACKWARDS != 0;
231
232    array.sort_by(|a, b| {
233        let cmp = zstrcmp(a, b, flags);
234        if backwards {
235            cmp.reverse()
236        } else {
237            cmp
238        }
239    });
240}
241
242/// Natural sort comparison (numbers sorted numerically within strings)
243pub fn natural_cmp(a: &str, b: &str) -> Ordering {
244    zstrcmp(a, b, sort_flags::SORTIT_NUMERICALLY)
245}
246
247/// Case-insensitive comparison
248pub fn strcasecmp(a: &str, b: &str) -> Ordering {
249    a.to_lowercase().cmp(&b.to_lowercase())
250}
251
252/// Find first occurrence of substring
253pub fn strstr(haystack: &str, needle: &str) -> Option<usize> {
254    haystack.find(needle)
255}
256
257/// Check if string starts with prefix
258pub fn strprefix(s: &str, prefix: &str) -> bool {
259    s.starts_with(prefix)
260}
261
262/// Check if string ends with suffix
263pub fn strsuffix(s: &str, suffix: &str) -> bool {
264    s.ends_with(suffix)
265}
266
267/// Join strings with a separator
268pub fn strjoin<I, S>(iter: I, sep: &str) -> String
269where
270    I: IntoIterator<Item = S>,
271    S: AsRef<str>,
272{
273    iter.into_iter()
274        .map(|s| s.as_ref().to_string())
275        .collect::<Vec<_>>()
276        .join(sep)
277}
278
279/// Split string by separator
280pub fn strsplit(s: &str, sep: char) -> Vec<&str> {
281    s.split(sep).collect()
282}
283
284/// Trim whitespace from both ends
285pub fn strtrim(s: &str) -> &str {
286    s.trim()
287}
288
289/// Convert string to lowercase
290pub fn strlower(s: &str) -> String {
291    s.to_lowercase()
292}
293
294/// Convert string to uppercase
295pub fn strupper(s: &str) -> String {
296    s.to_uppercase()
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    #[test]
304    fn test_dupstring() {
305        assert_eq!(dupstring("hello"), "hello");
306        assert_eq!(dupstring(""), "");
307    }
308
309    #[test]
310    fn test_dupstring_wlen() {
311        assert_eq!(dupstring_wlen("hello", 3), "hel");
312        assert_eq!(dupstring_wlen("hi", 10), "hi");
313    }
314
315    #[test]
316    fn test_tricat() {
317        assert_eq!(tricat("a", "b", "c"), "abc");
318        assert_eq!(tricat("hello", " ", "world"), "hello world");
319    }
320
321    #[test]
322    fn test_bicat() {
323        assert_eq!(bicat("hello", "world"), "helloworld");
324    }
325
326    #[test]
327    fn test_strend() {
328        assert_eq!(strend("hello"), Some('o'));
329        assert_eq!(strend(""), None);
330    }
331
332    #[test]
333    fn test_zstrcmp_basic() {
334        assert_eq!(zstrcmp("abc", "abc", 0), Ordering::Equal);
335        assert_eq!(zstrcmp("abc", "abd", 0), Ordering::Less);
336        assert_eq!(zstrcmp("abd", "abc", 0), Ordering::Greater);
337    }
338
339    #[test]
340    fn test_zstrcmp_case_insensitive() {
341        let flags = sort_flags::SORTIT_IGNORING_CASE;
342        assert_eq!(zstrcmp("ABC", "abc", flags), Ordering::Equal);
343        assert_eq!(zstrcmp("ABC", "ABD", flags), Ordering::Less);
344    }
345
346    #[test]
347    fn test_zstrcmp_ignore_backslash() {
348        let flags = sort_flags::SORTIT_IGNORING_BACKSLASHES;
349        assert_eq!(zstrcmp("a\\bc", "abc", flags), Ordering::Equal);
350    }
351
352    #[test]
353    fn test_zstrcmp_numeric() {
354        let flags = sort_flags::SORTIT_NUMERICALLY;
355        assert_eq!(zstrcmp("file2", "file10", flags), Ordering::Less);
356        assert_eq!(zstrcmp("file10", "file2", flags), Ordering::Greater);
357        assert_eq!(zstrcmp("file10", "file10", flags), Ordering::Equal);
358    }
359
360    #[test]
361    fn test_zstrcmp_numeric_signed() {
362        let flags = sort_flags::SORTIT_NUMERICALLY_SIGNED;
363        assert_eq!(zstrcmp("-5", "3", flags), Ordering::Less);
364        assert_eq!(zstrcmp("-10", "-2", flags), Ordering::Less);
365    }
366
367    #[test]
368    fn test_strmetasort() {
369        let mut arr = vec![
370            "file10".to_string(),
371            "file2".to_string(),
372            "file1".to_string(),
373        ];
374        strmetasort(&mut arr, sort_flags::SORTIT_NUMERICALLY);
375        assert_eq!(arr, vec!["file1", "file2", "file10"]);
376    }
377
378    #[test]
379    fn test_strmetasort_backwards() {
380        let mut arr = vec!["a".to_string(), "c".to_string(), "b".to_string()];
381        strmetasort(&mut arr, sort_flags::SORTIT_BACKWARDS);
382        assert_eq!(arr, vec!["c", "b", "a"]);
383    }
384
385    #[test]
386    fn test_natural_cmp() {
387        assert_eq!(natural_cmp("item2", "item10"), Ordering::Less);
388    }
389
390    #[test]
391    fn test_strcasecmp() {
392        assert_eq!(strcasecmp("Hello", "HELLO"), Ordering::Equal);
393        assert_eq!(strcasecmp("abc", "ABD"), Ordering::Less);
394    }
395
396    #[test]
397    fn test_strstr() {
398        assert_eq!(strstr("hello world", "world"), Some(6));
399        assert_eq!(strstr("hello", "xyz"), None);
400    }
401
402    #[test]
403    fn test_strprefix_suffix() {
404        assert!(strprefix("hello", "hel"));
405        assert!(!strprefix("hello", "ell"));
406        assert!(strsuffix("hello", "llo"));
407        assert!(!strsuffix("hello", "ell"));
408    }
409
410    #[test]
411    fn test_strjoin() {
412        assert_eq!(strjoin(["a", "b", "c"], ","), "a,b,c");
413        assert_eq!(strjoin(Vec::<&str>::new(), ","), "");
414    }
415
416    #[test]
417    fn test_strsplit() {
418        assert_eq!(strsplit("a,b,c", ','), vec!["a", "b", "c"]);
419    }
420
421    #[test]
422    fn test_strtrim() {
423        assert_eq!(strtrim("  hello  "), "hello");
424    }
425
426    #[test]
427    fn test_case_conversion() {
428        assert_eq!(strlower("HeLLo"), "hello");
429        assert_eq!(strupper("HeLLo"), "HELLO");
430    }
431}