lexicmp/lib.rs
1//! This is a library to compare and sort strings (or file paths) **lexicographically**. This
2//! means that non-ASCII characters such as `á` or `ß` are treated like their closest ASCII
3//! character: `á` is treated as `a`, `ß` is treated as `ss`, etc.
4//!
5//! Lexical comparisons are case-insensitive. Alphanumeric characters are sorted after all other
6//! characters (punctuation, whitespace, special characters, emojis, ...).
7//!
8//! It is possible to enable **natural sorting**, which also handles ASCII numbers. For example,
9//! `50` is less than `100` with natural sorting turned on. It's also possible to skip
10//! characters that aren't alphanumeric, so e.g. `f-5` is next to `f5`.
11//!
12//! If different strings have the same ASCII representation (e.g. `"Foo"` and `"fóò"`), it
13//! falls back to the default method from the standard library, so sorting is deterministic.
14//!
15//! <table><tr><td>
16//! <b>NOTE</b>: This crate doesn't attempt to be correct for every locale, but it should work
17//! reasonably well for a wide range of locales, while providing excellent performance.
18//! </td></tr></table>
19//!
20//! ## Usage
21//!
22//! To sort strings or paths, you can use the `StringSort` or `PathSort` trait:
23//!
24//! ```rust
25//! use lexicmp::{StringSort, natural_lexical_cmp};
26//!
27//! let mut strings = vec!["ß", "é", "100", "hello", "world", "50", ".", "B!"];
28//! strings.string_sort_unstable(natural_lexical_cmp);
29//!
30//! assert_eq!(&strings, &[".", "50", "100", "B!", "é", "hello", "ß", "world"]);
31//! ```
32//!
33//! There are eight comparison functions:
34//!
35//! | Function | lexicographical | natural | skips non-alphanumeric chars |
36//! | -------------------------------- |:---------------:|:-------:|:----------------------------:|
37//! | `cmp` | | | |
38//! | `only_alnum_cmp` | | | yes |
39//! | `lexical_cmp` | yes | | |
40//! | `lexical_only_alnum_cmp` | yes | | yes |
41//! | `natural_cmp` | | yes | |
42//! | `natural_only_alnum_cmp` | | yes | yes |
43//! | `natural_lexical_cmp` | yes | yes | |
44//! | `natural_lexical_only_alnum_cmp` | yes | yes | yes |
45//!
46//! Note that only the functions that sort lexicographically are case insensitive.
47
48mod cmp;
49pub mod iter;
50
51pub use cmp::{
52 cmp, lexical_cmp, lexical_only_alnum_cmp, natural_cmp, natural_lexical_cmp,
53 natural_lexical_only_alnum_cmp, natural_only_alnum_cmp, only_alnum_cmp,
54};
55
56use core::cmp::Ordering;
57
58/// A trait to sort strings. This is a convenient wrapper for the standard library sort functions.
59///
60/// This trait is implemented for all slices whose inner type implements `AsRef<str>`.
61///
62/// ## Example
63///
64/// ```rust
65/// use lexicmp::StringSort;
66///
67/// let slice = &mut ["Hello", " world", "!"];
68/// slice.string_sort_unstable(lexicmp::natural_lexical_cmp);
69///
70/// // or trim the strings before comparing:
71/// slice.string_sort_unstable_by(lexicmp::natural_lexical_cmp, str::trim_start);
72/// ```
73///
74/// If you want to sort file paths or OsStrings, use the `PathSort` trait instead.
75pub trait StringSort {
76 /// Sorts the items using the provided comparison function.
77 ///
78 /// **This is a stable sort, which is often not required**.
79 /// You can use `string_sort_unstable` instead.
80 ///
81 /// ## Example
82 ///
83 /// ```rust
84 /// use lexicmp::StringSort;
85 ///
86 /// let slice = &mut ["Lorem", "ipsum", "dolor", "sit", "amet"];
87 /// slice.string_sort(lexicmp::natural_lexical_cmp);
88 ///
89 /// assert_eq!(slice, &["amet", "dolor", "ipsum", "Lorem", "sit"]);
90 /// ```
91 fn string_sort(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
92
93 /// Sorts the items using the provided comparison function.
94 ///
95 /// This sort is unstable: The original order of equal strings is not preserved.
96 /// It is slightly more efficient than the stable alternative.
97 ///
98 /// ## Example
99 ///
100 /// ```rust
101 /// use lexicmp::StringSort;
102 ///
103 /// let slice = &mut ["The", "quick", "brown", "fox"];
104 /// slice.string_sort_unstable(lexicmp::natural_lexical_cmp);
105 ///
106 /// assert_eq!(slice, &["brown", "fox", "quick", "The"]);
107 /// ```
108 fn string_sort_unstable(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
109
110 /// Sorts the items using the provided comparison function and another function that is
111 /// applied to each string before the comparison. This can be used to trim the strings.
112 ///
113 /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
114 /// In this case you should use `[_]::sort_by()` directly.
115 ///
116 /// **This is a stable sort, which is often not required**.
117 /// You can use `string_sort_unstable` instead.
118 ///
119 /// ## Example
120 ///
121 /// ```rust
122 /// use lexicmp::StringSort;
123 ///
124 /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
125 /// slice.string_sort_by(lexicmp::natural_lexical_cmp, str::trim_start);
126 ///
127 /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
128 /// ```
129 fn string_sort_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
130 where
131 Cmp: FnMut(&str, &str) -> Ordering,
132 Map: FnMut(&str) -> &str;
133
134 /// Sorts the items using the provided comparison function and another function that is
135 /// applied to each string before the comparison. This can be used to trim the strings.
136 ///
137 /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
138 /// In this case you should use `[_]::sort_by()` directly.
139 ///
140 /// This sort is unstable: The original order of equal strings is not preserved.
141 /// It is slightly more efficient than the stable alternative.
142 ///
143 /// ## Example
144 ///
145 /// ```rust
146 /// use lexicmp::StringSort;
147 ///
148 /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
149 /// slice.string_sort_unstable_by(lexicmp::natural_lexical_cmp, str::trim_start);
150 ///
151 /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
152 /// ```
153 fn string_sort_unstable_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
154 where
155 Cmp: FnMut(&str, &str) -> Ordering,
156 Map: FnMut(&str) -> &str;
157}
158
159impl<A: AsRef<str>> StringSort for [A] {
160 fn string_sort(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
161 self.sort_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
162 }
163
164 fn string_sort_unstable(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
165 self.sort_unstable_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
166 }
167
168 fn string_sort_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
169 where
170 Cmp: FnMut(&str, &str) -> Ordering,
171 Map: FnMut(&str) -> &str,
172 {
173 self.sort_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
174 }
175
176 fn string_sort_unstable_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
177 where
178 Cmp: FnMut(&str, &str) -> Ordering,
179 Map: FnMut(&str) -> &str,
180 {
181 self.sort_unstable_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
182 }
183}
184
185#[test]
186fn test_sort() {
187 macro_rules! assert_lexically_sorted {
188 ($T:ident, $array:expr, natural = $natural:expr) => {{
189 let mut sorted = $array.clone();
190 if $natural {
191 sorted.$T(natural_lexical_cmp);
192 } else {
193 sorted.$T(lexical_cmp);
194 }
195
196 assert_eq!($array, sorted);
197 }};
198 }
199
200 let strings = [
201 "-", "-$", "-a", "100", "50", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
202 ];
203 let strings_nat = [
204 "-", "-$", "-a", "50", "100", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
205 ];
206
207 assert_lexically_sorted!(string_sort, strings, natural = false);
208 assert_lexically_sorted!(string_sort, strings_nat, natural = true);
209}