alphanumeric_sort/
lib.rs

1/*!
2# Alphanumeric Sort
3
4This crate can help you sort order for files and folders whose names contain numerals.
5
6## Motives and Examples
7
8With the Rust native `sort` method, strings and paths are arranged into lexicographical order. In some cases, it is not so intuitive. For example, there are screen snap shots named by **shot-%N** like **shot-2**, **shot-1**, **shot-11**. After a lexicographical sorting, they will be ordered into **shot-1**, **shot-11**, **shot-2**. However, we would prefer **shot-1**, **shot-2**, **shot-11** mostly.
9
10```rust
11let mut names = ["shot-2", "shot-1", "shot-11"];
12
13names.sort();
14
15assert_eq!(["shot-1", "shot-11", "shot-2"], names);
16```
17
18Thus, in this kind of case, an alphanumeric sort might come in handy.
19
20```rust
21let mut names = ["shot-2", "shot-1", "shot-11"];
22
23alphanumeric_sort::sort_str_slice(&mut names);
24
25assert_eq!(["shot-1", "shot-2", "shot-11"], names);
26```
27
28```rust
29# #[cfg(feature = "std")] {
30use std::path::Path;
31
32let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
33
34alphanumeric_sort::sort_path_slice(&mut paths);
35
36assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
37# }
38```
39
40## About the `compare_*` Functions and the `sort_*` Functions
41
42To sort a slice, the code can also be written like,
43
44```rust
45# #[cfg(feature = "std")] {
46use std::path::Path;
47
48let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
49
50paths.sort_by(|a, b| alphanumeric_sort::compare_path(a, b));
51
52assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
53# }
54```
55
56But it is not recommended because the `compare_*` functions try to convert data (e.g `Path`, `CStr`) to `&str` every time in its execution and thus they are slower than the `sort_*` functions when sorting a slice.
57
58## Version `1.3` to `1.4`
59
60No breaking change in API is made, though the order has some changes.
61
62* `"0001"` is greater than `"001"` instead of being equal.
63* `"中"` is greater than `"1"` instead of being less. `"第1章"` is still less than `"第1-2章"`, even though `"章"` is greater than `"-"`.
64
65## No Std
66
67Disable the default features to compile this crate without std.
68
69```toml
70[dependencies.alphanumeric-sort]
71version = "*"
72default-features = false
73```
74
75## Benchmark
76
77```bash
78cargo bench
79```
80 */
81
82#![cfg_attr(not(feature = "std"), no_std)]
83#![cfg_attr(docsrs, feature(doc_cfg))]
84
85extern crate alloc; // used for sorting
86
87#[cfg(feature = "std")]
88mod std_functions;
89
90use core::{cmp::Ordering, str::Chars};
91
92#[cfg(feature = "std")]
93pub use std_functions::*;
94
95/// Compare two strings.
96pub fn compare_str<A: AsRef<str>, B: AsRef<str>>(a: A, b: B) -> Ordering {
97    let mut c1 = a.as_ref().chars();
98    let mut c2 = b.as_ref().chars();
99
100    // this flag is to handle something like "1點" < "1-1點"
101    let mut last_is_number = false;
102
103    // this flag is to handle something like "1a" > "01"
104    let mut pre_anwser = Ordering::Equal;
105
106    let mut v1: Option<char> = None;
107    let mut v2: Option<char> = None;
108
109    loop {
110        let mut ca = {
111            match v1.take() {
112                Some(c) => c,
113                None => match c1.next() {
114                    Some(c) => c,
115                    None => {
116                        if v2.take().is_some() || c2.next().is_some() {
117                            return Ordering::Less;
118                        } else {
119                            return pre_anwser;
120                        }
121                    },
122                },
123            }
124        };
125
126        let mut cb = {
127            match v2.take() {
128                Some(c) => c,
129                None => match c2.next() {
130                    Some(c) => c,
131                    None => {
132                        return Ordering::Greater;
133                    },
134                },
135            }
136        };
137
138        if ca.is_ascii_digit() && cb.is_ascii_digit() {
139            // count the digit length, but ignore the leading zeros and the following same part (prefix)
140            let mut la = 1usize;
141            let mut lb = 1usize;
142
143            // this counter is to handle something like "001" > "01"
144            let mut lc = 0isize;
145
146            // find the first non-zero digit in c1
147            while ca == '0' {
148                lc += 1;
149                if let Some(c) = c1.next() {
150                    if c.is_ascii_digit() {
151                        ca = c;
152                    } else {
153                        v1 = Some(c);
154                        la = 0;
155                        break;
156                    }
157                } else {
158                    la = 0;
159                    break;
160                }
161            }
162
163            // find the first non-zero digit in c2
164            while cb == '0' {
165                lc -= 1;
166                if let Some(c) = c2.next() {
167                    if c.is_ascii_digit() {
168                        cb = c;
169                    } else {
170                        v2 = Some(c);
171                        lb = 0;
172                        break;
173                    }
174                } else {
175                    lb = 0;
176                    break;
177                }
178            }
179
180            // consume the remaining ascii digit
181            let consume_ascii_digit = |chars: &mut Chars, store: &mut Option<char>| {
182                let mut counter = 0;
183
184                for c in chars.by_ref() {
185                    if c.is_ascii_digit() {
186                        counter += 1;
187                    } else {
188                        *store = Some(c);
189                        break;
190                    }
191                }
192
193                counter
194            };
195
196            let mut ordering = Ordering::Equal;
197
198            if la == 0 {
199                if lb == 0 {
200                    // e.g. 000 vs 000, 000 vs 0000, 0000 vs 000
201                } else {
202                    // e.g. 0000 vs 001
203
204                    return Ordering::Less;
205                }
206            } else if lb == 0 {
207                // e.g. 001 vs 0000
208
209                return Ordering::Greater;
210            } else {
211                // e.g. 1 vs 12, 001 vs 0012
212
213                // skip the same prefix and compare the next ascii digit
214                loop {
215                    ordering = ca.cmp(&cb);
216
217                    if ordering == Ordering::Equal {
218                        if let Some(c) = c1.next() {
219                            if c.is_ascii_digit() {
220                                if let Some(cc) = c2.next() {
221                                    if cc.is_ascii_digit() {
222                                        ca = c;
223                                        cb = cc;
224                                    } else {
225                                        return Ordering::Greater;
226                                    }
227                                } else {
228                                    return Ordering::Greater;
229                                }
230                            } else {
231                                let n = consume_ascii_digit(&mut c2, &mut v2);
232                                v1 = Some(c);
233
234                                if n > 0 {
235                                    return Ordering::Less;
236                                }
237
238                                break;
239                            }
240                        } else if c2.next().is_some() {
241                            return Ordering::Less;
242                        } else {
243                            break;
244                        }
245                    } else {
246                        la += consume_ascii_digit(&mut c1, &mut v1);
247                        lb += consume_ascii_digit(&mut c2, &mut v2);
248
249                        if la != lb {
250                            ordering = la.cmp(&lb);
251                        }
252
253                        break;
254                    }
255                }
256            }
257
258            if ordering == Ordering::Equal {
259                match lc.cmp(&0) {
260                    Ordering::Equal => {
261                        last_is_number = true;
262                    },
263                    Ordering::Greater => {
264                        if pre_anwser == Ordering::Equal {
265                            pre_anwser = Ordering::Greater;
266                        } else {
267                            // ignore
268                        }
269                    },
270                    Ordering::Less => {
271                        if pre_anwser == Ordering::Equal {
272                            pre_anwser = Ordering::Less;
273                        } else {
274                            // ignore
275                        }
276                    },
277                }
278            } else {
279                return ordering;
280            }
281        } else {
282            match ca.cmp(&cb) {
283                Ordering::Equal => last_is_number = false,
284                Ordering::Greater => {
285                    return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
286                        Ordering::Less
287                    } else {
288                        Ordering::Greater
289                    };
290                },
291                Ordering::Less => {
292                    return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
293                        Ordering::Greater
294                    } else {
295                        Ordering::Less
296                    };
297                },
298            }
299        }
300    }
301}
302
303// TODO -----------
304
305/// Sort a slice by a `str` key, but may not preserve the order of equal elements.
306#[inline]
307pub fn sort_slice_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
308    slice: &mut [A],
309    mut f: F,
310) {
311    slice.sort_unstable_by(|a, b| compare_str(f(a), f(b)));
312}
313
314/// Sort a slice by a `str` key.
315#[inline]
316pub fn sort_slice_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
317    slice: &mut [A],
318    mut f: F,
319) {
320    slice.sort_by(|a, b| compare_str(f(a), f(b)));
321}
322
323/// Reversely sort a slice by a `str` key, but may not preserve the order of equal elements.
324#[inline]
325pub fn sort_slice_rev_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
326    slice: &mut [A],
327    mut f: F,
328) {
329    slice.sort_unstable_by(|a, b| compare_str(f(b), f(a)));
330}
331
332/// Reversely sort a slice by a `str` key.
333#[inline]
334pub fn sort_slice_rev_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
335    slice: &mut [A],
336    mut f: F,
337) {
338    slice.sort_by(|a, b| compare_str(f(b), f(a)));
339}
340
341// TODO -----------
342
343/// Sort a `str` slice.
344#[inline]
345pub fn sort_str_slice<S: AsRef<str>>(slice: &mut [S]) {
346    slice.sort_unstable_by(|a, b| compare_str(a, b));
347}
348
349/// Reversely sort a `str` slice.
350#[inline]
351pub fn sort_str_slice_rev<S: AsRef<str>>(slice: &mut [S]) {
352    slice.sort_unstable_by(|a, b| compare_str(b, a));
353}