alphanumeric_sort/
lib.rs

1/*!
2# Alphanumeric Sort
3
4This crate can help you sort order for files and folders whose names contain numerals.
5
6## Motives and Examples
7
8With the Rust native `sort` method, strings and paths are arranged into lexicographical order. In some cases, it is not so intuitive. For example, there are screen snap shots named by **shot-%N** like **shot-2**, **shot-1**, **shot-11**. After a lexicographical sorting, they will be ordered into **shot-1**, **shot-11**, **shot-2**. However, we would prefer **shot-1**, **shot-2**, **shot-11** mostly.
9
10```rust
11let mut names = ["shot-2", "shot-1", "shot-11"];
12
13names.sort();
14
15assert_eq!(["shot-1", "shot-11", "shot-2"], names);
16```
17
18Thus, in this kind of case, an alphanumeric sort might come in handy.
19
20```rust
21let mut names = ["shot-2", "shot-1", "shot-11"];
22
23alphanumeric_sort::sort_str_slice(&mut names);
24
25assert_eq!(["shot-1", "shot-2", "shot-11"], names);
26```
27
28```rust
29# #[cfg(feature = "std")] {
30use std::path::Path;
31
32let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
33
34alphanumeric_sort::sort_path_slice(&mut paths);
35
36assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
37# }
38```
39
40## About the `compare_*` Functions and the `sort_*` Functions
41
42To sort a slice, the code can also be written like,
43
44```rust
45# #[cfg(feature = "std")] {
46use std::path::Path;
47
48let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
49
50paths.sort_by(|a, b| alphanumeric_sort::compare_path(a, b));
51
52assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
53# }
54```
55
56But it is not recommended because the `compare_*` functions try to convert data (e.g `Path`, `CStr`) to `&str` every time in its execution and thus they are slower than the `sort_*` functions when sorting a slice.
57
58## Version `1.3` to `1.4`
59
60No breaking change in API is made, though the order has some changes.
61
62* `"0001"` is greater than `"001"` instead of being equal.
63* `"中"` is greater than `"1"` instead of being less. `"第1章"` is still less than `"第1-2章"`, even though `"章"` is greater than `"-"`.
64
65## No Std
66
67Disable the default features to compile this crate without std.
68
69```toml
70[dependencies.alphanumeric-sort]
71version = "*"
72default-features = false
73```
74
75## Benchmark
76
77```bash
78cargo bench
79```
80 */
81
82#![cfg_attr(not(feature = "std"), no_std)]
83#![cfg_attr(docsrs, feature(doc_auto_cfg))]
84
85extern crate alloc; // used for sorting
86
87#[cfg(feature = "std")]
88mod std_functions;
89
90use core::{cmp::Ordering, str::Chars};
91
92#[cfg(feature = "std")]
93pub use std_functions::*;
94
95/// Compare two strings.
96pub fn compare_str<A: AsRef<str>, B: AsRef<str>>(a: A, b: B) -> Ordering {
97    let mut c1 = a.as_ref().chars();
98    let mut c2 = b.as_ref().chars();
99
100    // this flag is to handle something like "1點" < "1-1點"
101    let mut last_is_number = false;
102
103    let mut v1: Option<char> = None;
104    let mut v2: Option<char> = None;
105
106    loop {
107        let mut ca = {
108            match v1.take() {
109                Some(c) => c,
110                None => match c1.next() {
111                    Some(c) => c,
112                    None => {
113                        if v2.take().is_some() || c2.next().is_some() {
114                            return Ordering::Less;
115                        } else {
116                            return Ordering::Equal;
117                        }
118                    },
119                },
120            }
121        };
122
123        let mut cb = {
124            match v2.take() {
125                Some(c) => c,
126                None => match c2.next() {
127                    Some(c) => c,
128                    None => {
129                        return Ordering::Greater;
130                    },
131                },
132            }
133        };
134
135        if ca.is_ascii_digit() && cb.is_ascii_digit() {
136            // count the digit length, but ignore the leading zeros and the following same part (prefix)
137            let mut la = 1usize;
138            let mut lb = 1usize;
139
140            // this counter is to handle something like "001" > "01"
141            let mut lc = 0isize;
142
143            // find the first non-zero digit in c1
144            while ca == '0' {
145                lc += 1;
146                if let Some(c) = c1.next() {
147                    if c.is_ascii_digit() {
148                        ca = c;
149                    } else {
150                        v1 = Some(c);
151                        la = 0;
152                        break;
153                    }
154                } else {
155                    la = 0;
156                    break;
157                }
158            }
159
160            // find the first non-zero digit in c2
161            while cb == '0' {
162                lc -= 1;
163                if let Some(c) = c2.next() {
164                    if c.is_ascii_digit() {
165                        cb = c;
166                    } else {
167                        v2 = Some(c);
168                        lb = 0;
169                        break;
170                    }
171                } else {
172                    lb = 0;
173                    break;
174                }
175            }
176
177            // consume the remaining ascii digit
178            let consume_ascii_digit = |chars: &mut Chars, store: &mut Option<char>| {
179                let mut counter = 0;
180
181                for c in chars.by_ref() {
182                    if c.is_ascii_digit() {
183                        counter += 1;
184                    } else {
185                        *store = Some(c);
186                        break;
187                    }
188                }
189
190                counter
191            };
192
193            let mut ordering = Ordering::Equal;
194
195            if la == 0 {
196                if lb == 0 {
197                    // e.g. 000 vs 000, 000 vs 0000, 0000 vs 000
198                } else {
199                    // e.g. 0000 vs 001
200
201                    return Ordering::Less;
202                }
203            } else if lb == 0 {
204                // e.g. 001 vs 0000
205
206                return Ordering::Greater;
207            } else {
208                // e.g. 1 vs 12, 001 vs 0012
209
210                // skip the same prefix and compare the next ascii digit
211                loop {
212                    ordering = ca.cmp(&cb);
213
214                    if ordering == Ordering::Equal {
215                        if let Some(c) = c1.next() {
216                            if c.is_ascii_digit() {
217                                if let Some(cc) = c2.next() {
218                                    if cc.is_ascii_digit() {
219                                        ca = c;
220                                        cb = cc;
221                                    } else {
222                                        return Ordering::Greater;
223                                    }
224                                } else {
225                                    return Ordering::Greater;
226                                }
227                            } else {
228                                let n = consume_ascii_digit(&mut c2, &mut v2);
229                                v1 = Some(c);
230
231                                if n > 0 {
232                                    return Ordering::Less;
233                                }
234
235                                break;
236                            }
237                        } else if c2.next().is_some() {
238                            return Ordering::Less;
239                        } else {
240                            break;
241                        }
242                    } else {
243                        la += consume_ascii_digit(&mut c1, &mut v1);
244                        lb += consume_ascii_digit(&mut c2, &mut v2);
245
246                        if la != lb {
247                            ordering = la.cmp(&lb);
248                        }
249
250                        break;
251                    }
252                }
253            }
254
255            if ordering == Ordering::Equal {
256                match lc.cmp(&0) {
257                    Ordering::Equal => {
258                        last_is_number = true;
259                    },
260                    Ordering::Greater => return Ordering::Greater,
261                    Ordering::Less => return Ordering::Less,
262                }
263            } else {
264                return ordering;
265            }
266        } else {
267            match ca.cmp(&cb) {
268                Ordering::Equal => last_is_number = false,
269                Ordering::Greater => {
270                    return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
271                        Ordering::Less
272                    } else {
273                        Ordering::Greater
274                    };
275                },
276                Ordering::Less => {
277                    return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
278                        Ordering::Greater
279                    } else {
280                        Ordering::Less
281                    };
282                },
283            }
284        }
285    }
286}
287
288// TODO -----------
289
290/// Sort a slice by a `str` key, but may not preserve the order of equal elements.
291#[inline]
292pub fn sort_slice_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
293    slice: &mut [A],
294    mut f: F,
295) {
296    slice.sort_unstable_by(|a, b| compare_str(f(a), f(b)));
297}
298
299/// Sort a slice by a `str` key.
300#[inline]
301pub fn sort_slice_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
302    slice: &mut [A],
303    mut f: F,
304) {
305    slice.sort_by(|a, b| compare_str(f(a), f(b)));
306}
307
308/// Reversely sort a slice by a `str` key, but may not preserve the order of equal elements.
309#[inline]
310pub fn sort_slice_rev_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
311    slice: &mut [A],
312    mut f: F,
313) {
314    slice.sort_unstable_by(|a, b| compare_str(f(b), f(a)));
315}
316
317/// Reversely sort a slice by a `str` key.
318#[inline]
319pub fn sort_slice_rev_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
320    slice: &mut [A],
321    mut f: F,
322) {
323    slice.sort_by(|a, b| compare_str(f(b), f(a)));
324}
325
326// TODO -----------
327
328/// Sort a `str` slice.
329#[inline]
330pub fn sort_str_slice<S: AsRef<str>>(slice: &mut [S]) {
331    slice.sort_unstable_by(|a, b| compare_str(a, b));
332}
333
334/// Reversely sort a `str` slice.
335#[inline]
336pub fn sort_str_slice_rev<S: AsRef<str>>(slice: &mut [S]) {
337    slice.sort_unstable_by(|a, b| compare_str(b, a));
338}