Skip to main content

zsh/
sort.rs

1//! Zsh string sorting - Direct port from zsh/Src/sort.c
2//!
3//! Provides comparison and sorting functions for shell strings,
4//! including numeric sorting and various comparison modes.
5
6use std::cmp::Ordering;
7
8/// Sort flags from sort.c
9pub mod flags {
10    pub const NUMERIC: u32 = 1 << 0; // -n: numeric sort
11    pub const REVERSE: u32 = 1 << 1; // -O: reverse order
12    pub const CASE_INSENSITIVE: u32 = 1 << 2; // -i: case insensitive
13    pub const NO_BACKSLASH: u32 = 1 << 3; // ignore backslashes
14    pub const NUMERIC_SIGNED: u32 = 1 << 4; // handle negative numbers
15}
16
17/// Sort element with comparison string and length
18#[derive(Clone, Debug)]
19pub struct SortElt {
20    pub orig: String,
21    pub cmp: String,
22    pub len: Option<usize>, // None = standard null-terminated, Some = embedded nulls
23}
24
25impl SortElt {
26    pub fn new(s: &str) -> Self {
27        SortElt {
28            orig: s.to_string(),
29            cmp: s.to_string(),
30            len: None,
31        }
32    }
33
34    pub fn with_len(s: &str, len: usize) -> Self {
35        SortElt {
36            orig: s.to_string(),
37            cmp: s.to_string(),
38            len: Some(len),
39        }
40    }
41}
42
43/// Compare two strings according to sort flags (from sort.c eltpcmp)
44pub fn zstrcmp(a: &str, b: &str, sort_flags: u32) -> Ordering {
45    let reverse = (sort_flags & flags::REVERSE) != 0;
46    let numeric = (sort_flags & flags::NUMERIC) != 0;
47    let numeric_signed = (sort_flags & flags::NUMERIC_SIGNED) != 0;
48    let no_backslash = (sort_flags & flags::NO_BACKSLASH) != 0;
49    let case_insensitive = (sort_flags & flags::CASE_INSENSITIVE) != 0;
50
51    let mut result = compare_strings(
52        a,
53        b,
54        numeric,
55        numeric_signed,
56        no_backslash,
57        case_insensitive,
58    );
59
60    if reverse {
61        result = result.reverse();
62    }
63    result
64}
65
66fn compare_strings(
67    a: &str,
68    b: &str,
69    numeric: bool,
70    numeric_signed: bool,
71    no_backslash: bool,
72    case_insensitive: bool,
73) -> Ordering {
74    let a_chars: Vec<char> = if no_backslash {
75        a.chars().filter(|&c| c != '\\').collect()
76    } else {
77        a.chars().collect()
78    };
79
80    let b_chars: Vec<char> = if no_backslash {
81        b.chars().filter(|&c| c != '\\').collect()
82    } else {
83        b.chars().collect()
84    };
85
86    let a_str: String = a_chars.into_iter().collect();
87    let b_str: String = b_chars.into_iter().collect();
88
89    if numeric {
90        return compare_numeric(&a_str, &b_str, numeric_signed);
91    }
92
93    if case_insensitive {
94        a_str.to_lowercase().cmp(&b_str.to_lowercase())
95    } else {
96        a_str.cmp(&b_str)
97    }
98}
99
100/// Numeric comparison from sort.c
101fn compare_numeric(a: &str, b: &str, signed_mode: bool) -> Ordering {
102    let a_num = parse_leading_number(a, signed_mode);
103    let b_num = parse_leading_number(b, signed_mode);
104
105    match (a_num, b_num) {
106        (Some(an), Some(bn)) => {
107            let cmp = an.partial_cmp(&bn).unwrap_or(Ordering::Equal);
108            if cmp != Ordering::Equal {
109                return cmp;
110            }
111            // Numbers are equal, compare remaining strings
112            let a_rest = skip_number(a, signed_mode);
113            let b_rest = skip_number(b, signed_mode);
114            compare_numeric(a_rest, b_rest, signed_mode)
115        }
116        (Some(_), None) => Ordering::Greater, // Numbers before non-numbers? Or vice versa?
117        (None, Some(_)) => Ordering::Less,
118        (None, None) => {
119            // Both are non-numeric at this point, do string comparison
120            let (a_head, a_tail) = split_at_number(a);
121            let (b_head, b_tail) = split_at_number(b);
122
123            let head_cmp = a_head.cmp(&b_head);
124            if head_cmp != Ordering::Equal {
125                return head_cmp;
126            }
127
128            if a_tail.is_empty() && b_tail.is_empty() {
129                return Ordering::Equal;
130            }
131
132            compare_numeric(a_tail, b_tail, signed_mode)
133        }
134    }
135}
136
137fn parse_leading_number(s: &str, signed_mode: bool) -> Option<f64> {
138    let s = s.trim_start();
139    if s.is_empty() {
140        return None;
141    }
142
143    let mut chars = s.chars().peekable();
144    let mut num_str = String::new();
145
146    // Handle sign
147    if signed_mode {
148        if let Some(&c) = chars.peek() {
149            if c == '-' || c == '+' {
150                num_str.push(chars.next().unwrap());
151            }
152        }
153    }
154
155    // Check if next char is digit
156    if chars.peek().map_or(true, |c| !c.is_ascii_digit()) {
157        return None;
158    }
159
160    // Collect digits
161    while let Some(&c) = chars.peek() {
162        if c.is_ascii_digit() {
163            num_str.push(chars.next().unwrap());
164        } else if c == '.' {
165            num_str.push(chars.next().unwrap());
166            // Collect decimal digits
167            while let Some(&c) = chars.peek() {
168                if c.is_ascii_digit() {
169                    num_str.push(chars.next().unwrap());
170                } else {
171                    break;
172                }
173            }
174            break;
175        } else {
176            break;
177        }
178    }
179
180    num_str.parse::<f64>().ok()
181}
182
183fn skip_number(s: &str, signed_mode: bool) -> &str {
184    let s = s.trim_start();
185    let mut idx = 0;
186    let chars: Vec<char> = s.chars().collect();
187
188    // Skip sign
189    if signed_mode && !chars.is_empty() && (chars[0] == '-' || chars[0] == '+') {
190        idx += 1;
191    }
192
193    // Skip digits
194    while idx < chars.len() && chars[idx].is_ascii_digit() {
195        idx += 1;
196    }
197
198    // Skip decimal part
199    if idx < chars.len() && chars[idx] == '.' {
200        idx += 1;
201        while idx < chars.len() && chars[idx].is_ascii_digit() {
202            idx += 1;
203        }
204    }
205
206    &s[s.chars().take(idx).map(|c| c.len_utf8()).sum::<usize>()..]
207}
208
209fn split_at_number(s: &str) -> (&str, &str) {
210    let idx = s
211        .chars()
212        .position(|c| c.is_ascii_digit())
213        .unwrap_or(s.len());
214
215    let byte_idx = s.chars().take(idx).map(|c| c.len_utf8()).sum::<usize>();
216    (&s[..byte_idx], &s[byte_idx..])
217}
218
219/// Sort an array of strings (from sort.c strmetasort)
220pub fn strmetasort(arr: &mut [String], sort_flags: u32) {
221    arr.sort_by(|a, b| zstrcmp(a, b, sort_flags));
222}
223
224/// Sort array in place with natural (numeric) ordering
225pub fn natural_sort(arr: &mut [String]) {
226    strmetasort(arr, flags::NUMERIC | flags::NUMERIC_SIGNED);
227}
228
229/// Sort array in place with reverse order
230pub fn reverse_sort(arr: &mut [String]) {
231    strmetasort(arr, flags::REVERSE);
232}
233
234/// Sort array case-insensitively
235pub fn case_insensitive_sort(arr: &mut [String]) {
236    strmetasort(arr, flags::CASE_INSENSITIVE);
237}
238
239/// Sort array of SortElt structures
240pub fn sort_elts(elts: &mut [SortElt], sort_flags: u32) {
241    let reverse = (sort_flags & flags::REVERSE) != 0;
242    let numeric = (sort_flags & flags::NUMERIC) != 0;
243    let numeric_signed = (sort_flags & flags::NUMERIC_SIGNED) != 0;
244    let no_backslash = (sort_flags & flags::NO_BACKSLASH) != 0;
245    let case_insensitive = (sort_flags & flags::CASE_INSENSITIVE) != 0;
246
247    elts.sort_by(|a, b| {
248        let mut result = compare_strings(
249            &a.cmp,
250            &b.cmp,
251            numeric,
252            numeric_signed,
253            no_backslash,
254            case_insensitive,
255        );
256        if reverse {
257            result = result.reverse();
258        }
259        result
260    });
261}
262
263/// Create comparison key for sorting (from sort.c tricat style)
264pub fn make_sort_key(s: &str, case_insensitive: bool) -> String {
265    if case_insensitive {
266        s.to_lowercase()
267    } else {
268        s.to_string()
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    #[test]
277    fn test_zstrcmp_basic() {
278        assert_eq!(zstrcmp("abc", "def", 0), Ordering::Less);
279        assert_eq!(zstrcmp("def", "abc", 0), Ordering::Greater);
280        assert_eq!(zstrcmp("abc", "abc", 0), Ordering::Equal);
281    }
282
283    #[test]
284    fn test_zstrcmp_reverse() {
285        assert_eq!(zstrcmp("abc", "def", flags::REVERSE), Ordering::Greater);
286        assert_eq!(zstrcmp("def", "abc", flags::REVERSE), Ordering::Less);
287    }
288
289    #[test]
290    fn test_zstrcmp_case_insensitive() {
291        assert_eq!(
292            zstrcmp("ABC", "abc", flags::CASE_INSENSITIVE),
293            Ordering::Equal
294        );
295        assert_eq!(
296            zstrcmp("ABC", "def", flags::CASE_INSENSITIVE),
297            Ordering::Less
298        );
299    }
300
301    #[test]
302    fn test_zstrcmp_numeric() {
303        assert_eq!(zstrcmp("file2", "file10", flags::NUMERIC), Ordering::Less);
304        assert_eq!(
305            zstrcmp("file10", "file2", flags::NUMERIC),
306            Ordering::Greater
307        );
308        assert_eq!(zstrcmp("100", "20", flags::NUMERIC), Ordering::Greater);
309    }
310
311    #[test]
312    fn test_zstrcmp_numeric_signed() {
313        let f = flags::NUMERIC | flags::NUMERIC_SIGNED;
314        assert_eq!(zstrcmp("-5", "3", f), Ordering::Less);
315        assert_eq!(zstrcmp("-10", "-2", f), Ordering::Less);
316        assert_eq!(zstrcmp("5", "-3", f), Ordering::Greater);
317    }
318
319    #[test]
320    fn test_natural_sort() {
321        let mut arr = vec![
322            "file10".to_string(),
323            "file2".to_string(),
324            "file1".to_string(),
325            "file20".to_string(),
326        ];
327        natural_sort(&mut arr);
328        assert_eq!(arr, vec!["file1", "file2", "file10", "file20"]);
329    }
330
331    #[test]
332    fn test_strmetasort() {
333        let mut arr = vec![
334            "zebra".to_string(),
335            "apple".to_string(),
336            "mango".to_string(),
337        ];
338        strmetasort(&mut arr, 0);
339        assert_eq!(arr, vec!["apple", "mango", "zebra"]);
340    }
341
342    #[test]
343    fn test_reverse_sort() {
344        let mut arr = vec!["a".to_string(), "b".to_string(), "c".to_string()];
345        reverse_sort(&mut arr);
346        assert_eq!(arr, vec!["c", "b", "a"]);
347    }
348
349    #[test]
350    fn test_case_insensitive_sort() {
351        let mut arr = vec![
352            "Banana".to_string(),
353            "apple".to_string(),
354            "Cherry".to_string(),
355        ];
356        case_insensitive_sort(&mut arr);
357        assert_eq!(arr, vec!["apple", "Banana", "Cherry"]);
358    }
359
360    #[test]
361    fn test_no_backslash() {
362        assert_eq!(
363            zstrcmp("a\\bc", "abc", flags::NO_BACKSLASH),
364            Ordering::Equal
365        );
366    }
367
368    #[test]
369    fn test_make_sort_key() {
370        assert_eq!(make_sort_key("Hello", false), "Hello");
371        assert_eq!(make_sort_key("Hello", true), "hello");
372    }
373}