Skip to main content

alphanumeric_sort/
lib.rs

1/*!
2# Alphanumeric Sort
3
4This crate can help you sort order for files and folders whose names contain numerals.
5
6## Motives and Examples
7
8With the Rust native `sort` method, strings and paths are arranged into lexicographical order. In some cases, it is not so intuitive. For example, there are screen snap shots named by **shot-%N** like **shot-2**, **shot-1**, **shot-11**. After a lexicographical sorting, they will be ordered into **shot-1**, **shot-11**, **shot-2**. However, we would prefer **shot-1**, **shot-2**, **shot-11** mostly.
9
10```rust
11let mut names = ["shot-2", "shot-1", "shot-11"];
12
13names.sort();
14
15assert_eq!(["shot-1", "shot-11", "shot-2"], names);
16```
17
18Thus, in this kind of case, an alphanumeric sort might come in handy.
19
20```rust
21let mut names = ["shot-2", "shot-1", "shot-11"];
22
23alphanumeric_sort::sort_str_slice(&mut names);
24
25assert_eq!(["shot-1", "shot-2", "shot-11"], names);
26```
27
28```rust
29# #[cfg(feature = "std")] {
30use std::path::Path;
31
32let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
33
34alphanumeric_sort::sort_path_slice(&mut paths);
35
36assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
37# }
38```
39
40## Sorting Rules
41
42ASCII digit sequences are compared by their numeric values instead of their lexicographical order. When two digit sequences have the same numeric value, leading zeros are used as a tie-breaker, so `"0001"` is greater than `"001"`.
43
44This crate is not a locale-aware collation library. Non-numeric characters are compared by their Unicode scalar values, except after equal digit sequences: if the next different characters are on different sides of U+00FF, their ordering is reversed. This keeps cases like `"第1章"` less than `"第1-2章"`, while `"1"` is still less than `"中"`.
45
46## About the `compare_*` Functions and the `sort_*` Functions
47
48To sort a slice, the code can also be written like,
49
50```rust
51# #[cfg(feature = "std")] {
52use std::path::Path;
53
54let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
55
56paths.sort_by(|a, b| alphanumeric_sort::compare_path(a, b));
57
58assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
59# }
60```
61
62But it is not recommended because the `compare_*` functions try to convert data (e.g `Path`, `CStr`) to `&str` every time in its execution and thus they are slower than the `sort_*` functions when sorting a slice.
63
64## Version `1.3` to `1.4`
65
66No breaking change in API is made, though the order has some changes.
67
68* `"0001"` is greater than `"001"` instead of being equal.
69* `"中"` is greater than `"1"` instead of being less. `"第1章"` is still less than `"第1-2章"`, even though `"章"` is greater than `"-"`.
70
71## No Std
72
73Disable the default features to compile this crate without std.
74
75```toml
76[dependencies.alphanumeric-sort]
77version = "*"
78default-features = false
79```
80
81## Benchmark
82
83```bash
84cargo bench
85```
86 */
87
88#![cfg_attr(not(feature = "std"), no_std)]
89#![cfg_attr(docsrs, feature(doc_cfg))]
90
91extern crate alloc; // used for sorting
92
93#[cfg(feature = "std")]
94mod std_functions;
95
96use core::{cmp::Ordering, str::Chars};
97
98#[cfg(feature = "std")]
99pub use std_functions::*;
100
101/// Compare two strings.
102pub fn compare_str<A: AsRef<str>, B: AsRef<str>>(a: A, b: B) -> Ordering {
103    let mut c1 = a.as_ref().chars();
104    let mut c2 = b.as_ref().chars();
105
106    // this flag is to handle something like "1點" < "1-1點"
107    let mut last_is_number = false;
108
109    // this flag is to handle something like "1a" > "01"
110    let mut pre_answer = Ordering::Equal;
111
112    let mut v1: Option<char> = None;
113    let mut v2: Option<char> = None;
114
115    loop {
116        let mut ca = {
117            match v1.take() {
118                Some(c) => c,
119                None => match c1.next() {
120                    Some(c) => c,
121                    None => {
122                        if v2.take().is_some() || c2.next().is_some() {
123                            return Ordering::Less;
124                        } else {
125                            return pre_answer;
126                        }
127                    },
128                },
129            }
130        };
131
132        let mut cb = {
133            match v2.take() {
134                Some(c) => c,
135                None => match c2.next() {
136                    Some(c) => c,
137                    None => {
138                        return Ordering::Greater;
139                    },
140                },
141            }
142        };
143
144        if ca.is_ascii_digit() && cb.is_ascii_digit() {
145            // count the digit length, but ignore the leading zeros and the following same part (prefix)
146            let mut la = 1usize;
147            let mut lb = 1usize;
148
149            // this counter is to handle something like "001" > "01"
150            let mut lc = 0isize;
151
152            // find the first non-zero digit in c1
153            while ca == '0' {
154                lc += 1;
155                if let Some(c) = c1.next() {
156                    if c.is_ascii_digit() {
157                        ca = c;
158                    } else {
159                        v1 = Some(c);
160                        la = 0;
161                        break;
162                    }
163                } else {
164                    la = 0;
165                    break;
166                }
167            }
168
169            // find the first non-zero digit in c2
170            while cb == '0' {
171                lc -= 1;
172                if let Some(c) = c2.next() {
173                    if c.is_ascii_digit() {
174                        cb = c;
175                    } else {
176                        v2 = Some(c);
177                        lb = 0;
178                        break;
179                    }
180                } else {
181                    lb = 0;
182                    break;
183                }
184            }
185
186            // consume the remaining ascii digit
187            let consume_ascii_digit = |chars: &mut Chars, store: &mut Option<char>| {
188                let mut counter = 0;
189
190                for c in chars.by_ref() {
191                    if c.is_ascii_digit() {
192                        counter += 1;
193                    } else {
194                        *store = Some(c);
195                        break;
196                    }
197                }
198
199                counter
200            };
201
202            let mut ordering = Ordering::Equal;
203
204            if la == 0 {
205                if lb == 0 {
206                    // e.g. 000 vs 000, 000 vs 0000, 0000 vs 000
207                } else {
208                    // e.g. 0000 vs 001
209
210                    return Ordering::Less;
211                }
212            } else if lb == 0 {
213                // e.g. 001 vs 0000
214
215                return Ordering::Greater;
216            } else {
217                // e.g. 1 vs 12, 001 vs 0012
218
219                // skip the same prefix and compare the next ascii digit
220                loop {
221                    ordering = ca.cmp(&cb);
222
223                    if ordering == Ordering::Equal {
224                        if let Some(c) = c1.next() {
225                            if c.is_ascii_digit() {
226                                if let Some(cc) = c2.next() {
227                                    if cc.is_ascii_digit() {
228                                        ca = c;
229                                        cb = cc;
230                                    } else {
231                                        return Ordering::Greater;
232                                    }
233                                } else {
234                                    return Ordering::Greater;
235                                }
236                            } else {
237                                let n = consume_ascii_digit(&mut c2, &mut v2);
238                                v1 = Some(c);
239
240                                if n > 0 {
241                                    return Ordering::Less;
242                                }
243
244                                break;
245                            }
246                        } else if c2.next().is_some() {
247                            return Ordering::Less;
248                        } else {
249                            break;
250                        }
251                    } else {
252                        la += consume_ascii_digit(&mut c1, &mut v1);
253                        lb += consume_ascii_digit(&mut c2, &mut v2);
254
255                        if la != lb {
256                            ordering = la.cmp(&lb);
257                        }
258
259                        break;
260                    }
261                }
262            }
263
264            if ordering == Ordering::Equal {
265                match lc.cmp(&0) {
266                    Ordering::Equal => {
267                        last_is_number = true;
268                    },
269                    Ordering::Greater => {
270                        if pre_answer == Ordering::Equal {
271                            pre_answer = Ordering::Greater;
272                        } else {
273                            // ignore
274                        }
275                    },
276                    Ordering::Less => {
277                        if pre_answer == Ordering::Equal {
278                            pre_answer = Ordering::Less;
279                        } else {
280                            // ignore
281                        }
282                    },
283                }
284            } else {
285                return ordering;
286            }
287        } else {
288            match ca.cmp(&cb) {
289                Ordering::Equal => last_is_number = false,
290                Ordering::Greater => {
291                    return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
292                        Ordering::Less
293                    } else {
294                        Ordering::Greater
295                    };
296                },
297                Ordering::Less => {
298                    return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
299                        Ordering::Greater
300                    } else {
301                        Ordering::Less
302                    };
303                },
304            }
305        }
306    }
307}
308
309// String-key sorting
310
311/// Sort a slice by a `str` key, but may not preserve the order of equal elements.
312#[inline]
313pub fn sort_slice_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
314    slice: &mut [A],
315    mut f: F,
316) {
317    slice.sort_unstable_by(|a, b| compare_str(f(a), f(b)));
318}
319
320/// Sort a slice by a `str` key.
321#[inline]
322pub fn sort_slice_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
323    slice: &mut [A],
324    mut f: F,
325) {
326    slice.sort_by(|a, b| compare_str(f(a), f(b)));
327}
328
329/// Reversely sort a slice by a `str` key, but may not preserve the order of equal elements.
330#[inline]
331pub fn sort_slice_rev_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
332    slice: &mut [A],
333    mut f: F,
334) {
335    slice.sort_unstable_by(|a, b| compare_str(f(b), f(a)));
336}
337
338/// Reversely sort a slice by a `str` key.
339#[inline]
340pub fn sort_slice_rev_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
341    slice: &mut [A],
342    mut f: F,
343) {
344    slice.sort_by(|a, b| compare_str(f(b), f(a)));
345}
346
347// Direct string slice sorting
348
349/// Sort a `str` slice.
350#[inline]
351pub fn sort_str_slice<S: AsRef<str>>(slice: &mut [S]) {
352    slice.sort_unstable_by(|a, b| compare_str(a, b));
353}
354
355/// Reversely sort a `str` slice.
356#[inline]
357pub fn sort_str_slice_rev<S: AsRef<str>>(slice: &mut [S]) {
358    slice.sort_unstable_by(|a, b| compare_str(b, a));
359}