1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
//! A crate for sorting the way humans would.
//!
//! This crate aims to provide the sorting behavior a human might expect.
//! Say you have a directory of files all called "Something-" with a sequential number appended.
//! With traditional sorting by character the file "Something-11" would occur after the file
//! "Something-2".
//! Often this is not the desired behavior, this crate implements a more human compatible ordering
//! by treating each occurrence of consecutive digits as a combined number in sorting.
//!
//! The crate implements the type `HumaneOrder` for common types (currently only most string types) and `HumaneSortable` for slices of
//! `HumanOrder` types.
//!
//! The API is very simple to use:
//!
//! ```
//! use humanesort::prelude::*;
//! let mut sort_me = vec!["something-11", "something-1", "something-2"];
//! sort_me.humane_sort();
//! assert_eq!(vec!["something-1", "something-2", "something-11"], sort_me);
//! ```
//!
//! ## Details on String Sorting
//!
//! For sorting, a string is split into numeric and non-numeric sections.
//! The comparison starts at the first group and if no group is (by any of the rules) larger than the other
//! the comparison moves on to the next section. For comparison of sections the following rules are
//! used.
//!
//! * Any non-numbers are compared using their usual compare methods
//! * Numbers are always greater than nun-numbers
//! * Numeric sequences are ordered by their numeric value
//! * Empty sequences are always smaller than non-empty ones
//!
//!
//! These examples should give you some idea of how this works out in practice:
//!
//! ```
//! use humanesort::HumaneSortable;
//! let mut a = ["lol-1", "lal-2"];
//! a.humane_sort();
//! assert_eq!(a, ["lal-2", "lol-1"])
//! ```
//!
//! ```
//! use humanesort::HumaneSortable;
//! let mut a = ["13-zzzz", "1-ffff", "12-aaaa"];
//! a.humane_sort();
//! assert_eq!(a, ["1-ffff", "12-aaaa", "13-zzzz"])
//! ```
extern crate unicode_segmentation;
pub mod prelude;
use std::iter::Peekable;
use unicode_segmentation::{GraphemeIndices, UnicodeSegmentation};
use std::cmp::Ordering;

#[cfg(test)]
mod tests {
    #[test]
    fn sorting_test() {
        use ::SortingType;
        let s = "11LOL";
        let fun = &|x: &str| -> SortingType {
            if x.chars().all(|c| char::is_numeric(c)) {
                return SortingType::Numeric
            } else {
                return SortingType::NonNumeric
            }
        };
        let mut it = ::TokenIterator::new(s, fun);
        assert_eq!(it.next().unwrap().0, "11");
        assert_eq!(it.next().unwrap().0, "LOL");
    }

    #[test]
    fn sort_slice() {
        use HumaneSortable;
        let mut strings = vec!["11", "2", "a", "1"];
        strings.humane_sort();
        assert_eq!(vec!["1", "2", "11", "a"], strings);
        let mut sort_me = vec!["something-11", "something-1", "something-2"];
        sort_me.humane_sort();
        assert_eq!(vec!["something-1", "something-2", "something-11"], sort_me);
    }
}

fn sorting_type(x: &str) -> SortingType {
    let num: Result<u64, _> = x.parse();
    match num {
        Ok(_) => SortingType::Numeric,
        _ => SortingType::NonNumeric
    }
}

/// Trait for collections of `HumaneOrder` types.
pub trait HumaneSortable {
    fn humane_sort(&mut self);
}

impl<T> HumaneSortable for [T] where T: HumaneOrder {
    fn humane_sort(&mut self) {
        self.sort_by(|a, b| a.humane_cmp(b))
    }
}

/// Trait for types that can be ordered in a human friendly way.
pub trait HumaneOrder {
    fn humane_cmp(&self, other: &Self) -> Ordering;
}

impl<T> HumaneOrder for T where T: AsRef<str> {
    fn humane_cmp(&self, other: &Self) -> Ordering {
        let sorting_type_function = &sorting_type;
        let mut self_tokens = TokenIterator::new(self.as_ref(), sorting_type_function);
        let mut other_tokens = TokenIterator::new(other.as_ref(), sorting_type_function);
        loop {
            match (self_tokens.next(), other_tokens.next()) {
                (None, None) => return Ordering::Equal,
                (None, _) => return Ordering::Less,
                (_, None) => return Ordering::Greater,
                (Some(ours), Some(theirs)) => {
                    match (ours.1, theirs.1) {
                        (SortingType::Numeric, SortingType::NonNumeric) => return Ordering::Less,
                        (SortingType::NonNumeric, SortingType::Numeric) => return Ordering::Greater,
                        (SortingType::Numeric, SortingType::Numeric) => {
                            let cmp = ours.0.parse::<usize>().unwrap().cmp(&theirs.0.parse::<usize>().unwrap());
                            if cmp != Ordering::Equal {
                                return cmp
                            }
                        }
                        (SortingType::NonNumeric, SortingType::NonNumeric) => {
                            let cmp = ours.0.cmp(theirs.0);
                            if cmp != Ordering::Equal {
                                return cmp
                            }
                        }
                    }
                }
            }
        }
    }
}

#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum SortingType {
    Numeric,
    NonNumeric
}

struct TokenIterator<'a, T> where T: Eq + Copy + 'a {
    token_type: &'a Fn(&str) -> T,
    string: &'a str,
    grapheme_iterator: Peekable<GraphemeIndices<'a>>
}

impl<'a, T> TokenIterator<'a, T> where T: Eq + Copy {
    fn new(s: &'a str, func: &'a Fn(&str) -> T) -> Self {
        TokenIterator {
            token_type: func,
            string: s,
            grapheme_iterator: UnicodeSegmentation::grapheme_indices(&s[..], true).peekable()
        }
    }
}

impl<'a, T> Iterator for TokenIterator<'a, T> where T: Eq + Copy {
    type Item = (&'a str, T);

    fn next(&mut self) -> Option<(&'a str, T)> {
        let (first_index, mut grapheme) = match self.grapheme_iterator.next() {
            Some((i, s)) => (i, s),
            None => return None // This is only reached when the first element is None
        };
        loop {
            let current_type = (self.token_type)(grapheme);
            let (next_index, next_grapheme) = match self.grapheme_iterator.peek() {
                Some(&(i, g)) => (i, g),
                None => return Some((&self.string[first_index..self.string.len()], (self.token_type)(grapheme)))
            };
            if current_type != (self.token_type)(next_grapheme) {
                return Some((&self.string[first_index..next_index], current_type))
            }
            let tup = match self.grapheme_iterator.next() {
                Some((i, s)) => (i, s),
                None => return None // This is only reached when the first element is None
            };
            grapheme = tup.1;
        }
    }
}