alphanumeric_sort/lib.rs
1/*!
2# Alphanumeric Sort
3
4This crate can help you sort order for files and folders whose names contain numerals.
5
6## Motives and Examples
7
8With the Rust native `sort` method, strings and paths are arranged into lexicographical order. In some cases, it is not so intuitive. For example, there are screen snap shots named by **shot-%N** like **shot-2**, **shot-1**, **shot-11**. After a lexicographical sorting, they will be ordered into **shot-1**, **shot-11**, **shot-2**. However, we would prefer **shot-1**, **shot-2**, **shot-11** mostly.
9
10```rust
11let mut names = ["shot-2", "shot-1", "shot-11"];
12
13names.sort();
14
15assert_eq!(["shot-1", "shot-11", "shot-2"], names);
16```
17
18Thus, in this kind of case, an alphanumeric sort might come in handy.
19
20```rust
21let mut names = ["shot-2", "shot-1", "shot-11"];
22
23alphanumeric_sort::sort_str_slice(&mut names);
24
25assert_eq!(["shot-1", "shot-2", "shot-11"], names);
26```
27
28```rust
29# #[cfg(feature = "std")] {
30use std::path::Path;
31
32let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
33
34alphanumeric_sort::sort_path_slice(&mut paths);
35
36assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
37# }
38```
39
40## Sorting Rules
41
42ASCII digit sequences are compared by their numeric values instead of their lexicographical order. When two digit sequences have the same numeric value, leading zeros are used as a tie-breaker, so `"0001"` is greater than `"001"`.
43
44This crate is not a locale-aware collation library. Non-numeric characters are compared by their Unicode scalar values, except after equal digit sequences: if the next different characters are on different sides of U+00FF, their ordering is reversed. This keeps cases like `"第1章"` less than `"第1-2章"`, while `"1"` is still less than `"中"`.
45
46## About the `compare_*` Functions and the `sort_*` Functions
47
48To sort a slice, the code can also be written like,
49
50```rust
51# #[cfg(feature = "std")] {
52use std::path::Path;
53
54let mut paths = [Path::new("shot-2"), Path::new("shot-1"), Path::new("shot-11")];
55
56paths.sort_by(|a, b| alphanumeric_sort::compare_path(a, b));
57
58assert_eq!([Path::new("shot-1"), Path::new("shot-2"), Path::new("shot-11")], paths);
59# }
60```
61
62But it is not recommended because the `compare_*` functions try to convert data (e.g `Path`, `CStr`) to `&str` every time in its execution and thus they are slower than the `sort_*` functions when sorting a slice.
63
64## Version `1.3` to `1.4`
65
66No breaking change in API is made, though the order has some changes.
67
68* `"0001"` is greater than `"001"` instead of being equal.
69* `"中"` is greater than `"1"` instead of being less. `"第1章"` is still less than `"第1-2章"`, even though `"章"` is greater than `"-"`.
70
71## No Std
72
73Disable the default features to compile this crate without std.
74
75```toml
76[dependencies.alphanumeric-sort]
77version = "*"
78default-features = false
79```
80
81## Benchmark
82
83```bash
84cargo bench
85```
86 */
87
88#![cfg_attr(not(feature = "std"), no_std)]
89#![cfg_attr(docsrs, feature(doc_cfg))]
90
91extern crate alloc; // used for sorting
92
93#[cfg(feature = "std")]
94mod std_functions;
95
96use core::{cmp::Ordering, str::Chars};
97
98#[cfg(feature = "std")]
99pub use std_functions::*;
100
101/// Compare two strings.
102pub fn compare_str<A: AsRef<str>, B: AsRef<str>>(a: A, b: B) -> Ordering {
103 let mut c1 = a.as_ref().chars();
104 let mut c2 = b.as_ref().chars();
105
106 // this flag is to handle something like "1點" < "1-1點"
107 let mut last_is_number = false;
108
109 // this flag is to handle something like "1a" > "01"
110 let mut pre_answer = Ordering::Equal;
111
112 let mut v1: Option<char> = None;
113 let mut v2: Option<char> = None;
114
115 loop {
116 let mut ca = {
117 match v1.take() {
118 Some(c) => c,
119 None => match c1.next() {
120 Some(c) => c,
121 None => {
122 if v2.take().is_some() || c2.next().is_some() {
123 return Ordering::Less;
124 } else {
125 return pre_answer;
126 }
127 },
128 },
129 }
130 };
131
132 let mut cb = {
133 match v2.take() {
134 Some(c) => c,
135 None => match c2.next() {
136 Some(c) => c,
137 None => {
138 return Ordering::Greater;
139 },
140 },
141 }
142 };
143
144 if ca.is_ascii_digit() && cb.is_ascii_digit() {
145 // count the digit length, but ignore the leading zeros and the following same part (prefix)
146 let mut la = 1usize;
147 let mut lb = 1usize;
148
149 // this counter is to handle something like "001" > "01"
150 let mut lc = 0isize;
151
152 // find the first non-zero digit in c1
153 while ca == '0' {
154 lc += 1;
155 if let Some(c) = c1.next() {
156 if c.is_ascii_digit() {
157 ca = c;
158 } else {
159 v1 = Some(c);
160 la = 0;
161 break;
162 }
163 } else {
164 la = 0;
165 break;
166 }
167 }
168
169 // find the first non-zero digit in c2
170 while cb == '0' {
171 lc -= 1;
172 if let Some(c) = c2.next() {
173 if c.is_ascii_digit() {
174 cb = c;
175 } else {
176 v2 = Some(c);
177 lb = 0;
178 break;
179 }
180 } else {
181 lb = 0;
182 break;
183 }
184 }
185
186 // consume the remaining ascii digit
187 let consume_ascii_digit = |chars: &mut Chars, store: &mut Option<char>| {
188 let mut counter = 0;
189
190 for c in chars.by_ref() {
191 if c.is_ascii_digit() {
192 counter += 1;
193 } else {
194 *store = Some(c);
195 break;
196 }
197 }
198
199 counter
200 };
201
202 let mut ordering = Ordering::Equal;
203
204 if la == 0 {
205 if lb == 0 {
206 // e.g. 000 vs 000, 000 vs 0000, 0000 vs 000
207 } else {
208 // e.g. 0000 vs 001
209
210 return Ordering::Less;
211 }
212 } else if lb == 0 {
213 // e.g. 001 vs 0000
214
215 return Ordering::Greater;
216 } else {
217 // e.g. 1 vs 12, 001 vs 0012
218
219 // skip the same prefix and compare the next ascii digit
220 loop {
221 ordering = ca.cmp(&cb);
222
223 if ordering == Ordering::Equal {
224 if let Some(c) = c1.next() {
225 if c.is_ascii_digit() {
226 if let Some(cc) = c2.next() {
227 if cc.is_ascii_digit() {
228 ca = c;
229 cb = cc;
230 } else {
231 return Ordering::Greater;
232 }
233 } else {
234 return Ordering::Greater;
235 }
236 } else {
237 let n = consume_ascii_digit(&mut c2, &mut v2);
238 v1 = Some(c);
239
240 if n > 0 {
241 return Ordering::Less;
242 }
243
244 break;
245 }
246 } else if c2.next().is_some() {
247 return Ordering::Less;
248 } else {
249 break;
250 }
251 } else {
252 la += consume_ascii_digit(&mut c1, &mut v1);
253 lb += consume_ascii_digit(&mut c2, &mut v2);
254
255 if la != lb {
256 ordering = la.cmp(&lb);
257 }
258
259 break;
260 }
261 }
262 }
263
264 if ordering == Ordering::Equal {
265 match lc.cmp(&0) {
266 Ordering::Equal => {
267 last_is_number = true;
268 },
269 Ordering::Greater => {
270 if pre_answer == Ordering::Equal {
271 pre_answer = Ordering::Greater;
272 } else {
273 // ignore
274 }
275 },
276 Ordering::Less => {
277 if pre_answer == Ordering::Equal {
278 pre_answer = Ordering::Less;
279 } else {
280 // ignore
281 }
282 },
283 }
284 } else {
285 return ordering;
286 }
287 } else {
288 match ca.cmp(&cb) {
289 Ordering::Equal => last_is_number = false,
290 Ordering::Greater => {
291 return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
292 Ordering::Less
293 } else {
294 Ordering::Greater
295 };
296 },
297 Ordering::Less => {
298 return if last_is_number && (ca > (255 as char)) ^ (cb > (255 as char)) {
299 Ordering::Greater
300 } else {
301 Ordering::Less
302 };
303 },
304 }
305 }
306 }
307}
308
309// String-key sorting
310
311/// Sort a slice by a `str` key, but may not preserve the order of equal elements.
312#[inline]
313pub fn sort_slice_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
314 slice: &mut [A],
315 mut f: F,
316) {
317 slice.sort_unstable_by(|a, b| compare_str(f(a), f(b)));
318}
319
320/// Sort a slice by a `str` key.
321#[inline]
322pub fn sort_slice_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
323 slice: &mut [A],
324 mut f: F,
325) {
326 slice.sort_by(|a, b| compare_str(f(a), f(b)));
327}
328
329/// Reversely sort a slice by a `str` key, but may not preserve the order of equal elements.
330#[inline]
331pub fn sort_slice_rev_unstable_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
332 slice: &mut [A],
333 mut f: F,
334) {
335 slice.sort_unstable_by(|a, b| compare_str(f(b), f(a)));
336}
337
338/// Reversely sort a slice by a `str` key.
339#[inline]
340pub fn sort_slice_rev_by_str_key<A, T: ?Sized + AsRef<str>, F: FnMut(&A) -> &T>(
341 slice: &mut [A],
342 mut f: F,
343) {
344 slice.sort_by(|a, b| compare_str(f(b), f(a)));
345}
346
347// Direct string slice sorting
348
349/// Sort a `str` slice.
350#[inline]
351pub fn sort_str_slice<S: AsRef<str>>(slice: &mut [S]) {
352 slice.sort_unstable_by(|a, b| compare_str(a, b));
353}
354
355/// Reversely sort a `str` slice.
356#[inline]
357pub fn sort_str_slice_rev<S: AsRef<str>>(slice: &mut [S]) {
358 slice.sort_unstable_by(|a, b| compare_str(b, a));
359}