lexicmp/
lib.rs

1//! This is a library to compare and sort strings (or file paths) **lexicographically**. This
2//! means that non-ASCII characters such as `á` or `ß` are treated like their closest ASCII
3//! character: `á` is treated as `a`, `ß` is treated as `ss`, etc.
4//!
5//! Lexical comparisons are case-insensitive. Alphanumeric characters are sorted after all other
6//! characters (punctuation, whitespace, special characters, emojis, ...).
7//!
8//! It is possible to enable **natural sorting**, which also handles ASCII numbers. For example,
9//! `50` is less than `100` with natural sorting turned on. It's also possible to skip
10//! characters that aren't alphanumeric, so e.g. `f-5` is next to `f5`.
11//!
12//! If different strings have the same ASCII representation (e.g. `"Foo"` and `"fóò"`), it
13//! falls back to the default method from the standard library, so sorting is deterministic.
14//!
15//! <table><tr><td>
16//! <b>NOTE</b>: This crate doesn't attempt to be correct for every locale, but it should work
17//! reasonably well for a wide range of locales, while providing excellent performance.
18//! </td></tr></table>
19//!
20//! ## Usage
21//!
22//! To sort strings or paths, you can use the `StringSort` or `PathSort` trait:
23//!
24//! ```rust
25//! use lexicmp::{StringSort, natural_lexical_cmp};
26//!
27//! let mut strings = vec!["ß", "é", "100", "hello", "world", "50", ".", "B!"];
28//! strings.string_sort_unstable(natural_lexical_cmp);
29//!
30//! assert_eq!(&strings, &[".", "50", "100", "B!", "é", "hello", "ß", "world"]);
31//! ```
32//!
33//! There are eight comparison functions:
34//!
35//! | Function                         | lexico­graphical | natural | skips non-alphanumeric chars |
36//! | -------------------------------- |:---------------:|:-------:|:----------------------------:|
37//! | `cmp`                            |                 |         |                              |
38//! | `only_alnum_cmp`                 |                 |         | yes                          |
39//! | `lexical_cmp`                    | yes             |         |                              |
40//! | `lexical_only_alnum_cmp`         | yes             |         | yes                          |
41//! | `natural_cmp`                    |                 | yes     |                              |
42//! | `natural_only_alnum_cmp`         |                 | yes     | yes                          |
43//! | `natural_lexical_cmp`            | yes             | yes     |                              |
44//! | `natural_lexical_­only_alnum_cmp` | yes             | yes     | yes                          |
45//!
46//! Note that only the functions that sort lexicographically are case insensitive.
47
48mod cmp;
49pub mod iter;
50
51pub use cmp::{
52    cmp, lexical_cmp, lexical_only_alnum_cmp, natural_cmp, natural_lexical_cmp,
53    natural_lexical_only_alnum_cmp, natural_only_alnum_cmp, only_alnum_cmp,
54};
55
56use core::cmp::Ordering;
57
58/// A trait to sort strings. This is a convenient wrapper for the standard library sort functions.
59///
60/// This trait is implemented for all slices whose inner type implements `AsRef<str>`.
61///
62/// ## Example
63///
64/// ```rust
65/// use lexicmp::StringSort;
66///
67/// let slice = &mut ["Hello", " world", "!"];
68/// slice.string_sort_unstable(lexicmp::natural_lexical_cmp);
69///
70/// // or trim the strings before comparing:
71/// slice.string_sort_unstable_by(lexicmp::natural_lexical_cmp, str::trim_start);
72/// ```
73///
74/// If you want to sort file paths or OsStrings, use the `PathSort` trait instead.
75pub trait StringSort {
76    /// Sorts the items using the provided comparison function.
77    ///
78    /// **This is a stable sort, which is often not required**.
79    /// You can use `string_sort_unstable` instead.
80    ///
81    /// ## Example
82    ///
83    /// ```rust
84    /// use lexicmp::StringSort;
85    ///
86    /// let slice = &mut ["Lorem", "ipsum", "dolor", "sit", "amet"];
87    /// slice.string_sort(lexicmp::natural_lexical_cmp);
88    ///
89    /// assert_eq!(slice, &["amet", "dolor", "ipsum", "Lorem", "sit"]);
90    /// ```
91    fn string_sort(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
92
93    /// Sorts the items using the provided comparison function.
94    ///
95    /// This sort is unstable: The original order of equal strings is not preserved.
96    /// It is slightly more efficient than the stable alternative.
97    ///
98    /// ## Example
99    ///
100    /// ```rust
101    /// use lexicmp::StringSort;
102    ///
103    /// let slice = &mut ["The", "quick", "brown", "fox"];
104    /// slice.string_sort_unstable(lexicmp::natural_lexical_cmp);
105    ///
106    /// assert_eq!(slice, &["brown", "fox", "quick", "The"]);
107    /// ```
108    fn string_sort_unstable(&mut self, cmp: impl FnMut(&str, &str) -> Ordering);
109
110    /// Sorts the items using the provided comparison function and another function that is
111    /// applied to each string before the comparison. This can be used to trim the strings.
112    ///
113    /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
114    /// In this case you should use `[_]::sort_by()` directly.
115    ///
116    /// **This is a stable sort, which is often not required**.
117    /// You can use `string_sort_unstable` instead.
118    ///
119    /// ## Example
120    ///
121    /// ```rust
122    /// use lexicmp::StringSort;
123    ///
124    /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
125    /// slice.string_sort_by(lexicmp::natural_lexical_cmp, str::trim_start);
126    ///
127    /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
128    /// ```
129    fn string_sort_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
130    where
131        Cmp: FnMut(&str, &str) -> Ordering,
132        Map: FnMut(&str) -> &str;
133
134    /// Sorts the items using the provided comparison function and another function that is
135    /// applied to each string before the comparison. This can be used to trim the strings.
136    ///
137    /// If you do anything more complicated than trimming, you'll likely run into lifetime problems.
138    /// In this case you should use `[_]::sort_by()` directly.
139    ///
140    /// This sort is unstable: The original order of equal strings is not preserved.
141    /// It is slightly more efficient than the stable alternative.
142    ///
143    /// ## Example
144    ///
145    /// ```rust
146    /// use lexicmp::StringSort;
147    ///
148    /// let slice = &mut ["Eeny", " meeny", " miny", " moe"];
149    /// slice.string_sort_unstable_by(lexicmp::natural_lexical_cmp, str::trim_start);
150    ///
151    /// assert_eq!(slice, &["Eeny", " meeny", " miny", " moe"]);
152    /// ```
153    fn string_sort_unstable_by<Cmp, Map>(&mut self, cmp: Cmp, map: Map)
154    where
155        Cmp: FnMut(&str, &str) -> Ordering,
156        Map: FnMut(&str) -> &str;
157}
158
159impl<A: AsRef<str>> StringSort for [A] {
160    fn string_sort(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
161        self.sort_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
162    }
163
164    fn string_sort_unstable(&mut self, mut cmp: impl FnMut(&str, &str) -> Ordering) {
165        self.sort_unstable_by(|lhs, rhs| cmp(lhs.as_ref(), rhs.as_ref()));
166    }
167
168    fn string_sort_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
169    where
170        Cmp: FnMut(&str, &str) -> Ordering,
171        Map: FnMut(&str) -> &str,
172    {
173        self.sort_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
174    }
175
176    fn string_sort_unstable_by<Cmp, Map>(&mut self, mut cmp: Cmp, mut map: Map)
177    where
178        Cmp: FnMut(&str, &str) -> Ordering,
179        Map: FnMut(&str) -> &str,
180    {
181        self.sort_unstable_by(|lhs, rhs| cmp(map(lhs.as_ref()), map(rhs.as_ref())));
182    }
183}
184
185#[test]
186fn test_sort() {
187    macro_rules! assert_lexically_sorted {
188        ($T:ident, $array:expr, natural = $natural:expr) => {{
189            let mut sorted = $array.clone();
190            if $natural {
191                sorted.$T(natural_lexical_cmp);
192            } else {
193                sorted.$T(lexical_cmp);
194            }
195
196            assert_eq!($array, sorted);
197        }};
198    }
199
200    let strings = [
201        "-", "-$", "-a", "100", "50", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
202    ];
203    let strings_nat = [
204        "-", "-$", "-a", "50", "100", "a", "ä", "aa", "áa", "AB", "Ab", "ab", "AE", "ae", "æ", "af",
205    ];
206
207    assert_lexically_sorted!(string_sort, strings, natural = false);
208    assert_lexically_sorted!(string_sort, strings_nat, natural = true);
209}