slugify/
lib.rs

1//! **A utility macro for flexible slug genereation that handles unicode.**
2//! 
3//! The `slugify!` macro implements a flexible slug generator, allowing for stop words, custom separator
4//! and maximum length options. The macro provides both a simple interface with sane default parameters
5//! but also allows the parameters to be overriden when needed.
6//! 
7//! Features:
8//!
9//!* Unicode strings support (phonetic conversion).
10//!* Support for custom slug separator.
11//!* Stop words filtering.
12//!* Slug maximum length support.
13//!
14//! 
15//!# Usage
16//! 
17//! This crate is on crates.io and can be used by adding `slugify` to the dependencies in your project's
18//! `Cargo.toml`
19//! 
20//! ```toml
21//! [dependencies]
22//! slugify = "0.1.0"
23//! ```
24//! 
25//!  and this to your crate root:
26//! 
27//!```rust,ignore
28//! #[macro_use] extern crate slugify;
29//!use slugify::slugify;
30//!```
31//! 
32//!# Examples
33//!
34//!## Basic slug generation
35//! 
36//!```rust
37//! # #[macro_use] extern crate slugify;
38//! # use slugify::slugify;
39//! # fn main() {
40//!assert_eq!(slugify!("hello world"), "hello-world");
41//! # }
42//!```
43//! 
44//!## Using a custom separator
45//! 
46//! ```rust
47//! # #[macro_use] extern crate slugify;
48//! # use slugify::slugify;
49//! # fn main() {
50//!assert_eq!(slugify!("hello world", separator = "."), "hello.world");
51//!assert_eq!(slugify!("hello world", separator = " "), "hello world");
52//! # }
53//! ```
54//! 
55//!## Stop words filtering
56//! 
57//!```rust
58//! # #[macro_use] extern crate slugify;
59//! # use slugify::slugify;
60//! # fn main() {
61//!assert_eq!(slugify!("the quick brown fox jumps over the lazy dog", stop_words = "the,fox"), "quick-brown-jumps-over-lazy-dog");
62//! # }
63//!```
64//! 
65//!## Maximum length
66//! 
67//!```rust
68//! # #[macro_use] extern crate slugify;
69//! # use slugify::slugify;
70//! # fn main() {
71//!assert_eq!(slugify!("hello world", max_length = 5), "hello");
72//!assert_eq!(slugify!("the hello world", stop_words = "the", max_length = 5), "hello");
73//! # }
74//!```
75//! 
76//!## Phonetic Conversion and accented text
77//! 
78//!```rust
79//! # #[macro_use] extern crate slugify;
80//! # use slugify::slugify;
81//! # fn main() {
82//!assert_eq!(slugify!("影師嗎"), "ying-shi-ma");
83//!assert_eq!(slugify!("Æúű--cool?"), "aeuu-cool");
84//!assert_eq!(slugify!("Nín hǎo. Wǒ shì zhōng guó rén"), "nin-hao-wo-shi-zhong-guo-ren");
85//! # }
86//!```
87//! 
88//!## Passing multiple optional parameters.
89//!
90//! **NOTE:** the order of optional parameters matters: **stop_words**, **separator**
91//! and then **max_length**. All of them are optional, however when specifying more than one optional parameter, this
92//! order must be adhered.
93//! 
94//!```rust
95//! # #[macro_use] extern crate slugify;
96//! # use slugify::slugify;
97//! # fn main() {
98//!assert_eq!(slugify!("the hello world", stop_words = "the", separator = "-"), "hello-world");
99//!assert_eq!(slugify!("the hello world", separator = ".", max_length = 10), "the.hello");
100//!assert_eq!(slugify!("the hello world", stop_words = "the", max_length = 5), "hello");
101//!assert_eq!(slugify!("the hello world", stop_words = "the", separator = "-", max_length = 20), "hello-world");
102//! # }
103//!```
104//!
105extern crate unidecode;
106use unidecode::unidecode;
107
108
109#[macro_export]
110macro_rules! slugify {
111    ($text:expr) => (
112        {
113         slugify($text, "", "-", None)
114        }
115    );
116
117    ($text:expr, stop_words=$stopwords:expr) => (
118        {
119         slugify($text, $stopwords, "-", None)
120        }
121    );
122
123    ($text:expr, separator=$sep:expr) => (
124        {
125         slugify($text, "", $sep, None)
126        }
127    );
128
129    ($text:expr, max_length=$len:expr) => (
130        {
131         slugify($text, "", "-", Some($len))
132        }
133    );
134
135    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr) => (
136        {
137         slugify($text, $stopwords, $sep, None)
138        }
139    );
140
141    ($text:expr, stop_words=$stopwords:expr, max_length=$len:expr) => (
142        {
143         slugify($text, $stopwords, "-", Some($len))
144        }
145    );
146
147    ($text:expr, separator=$sep:expr, max_length=$len:expr) => (
148        {
149         slugify($text, "", $sep, Some($len))
150        }
151    );
152
153    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr, max_length=$len:expr) => (
154        {
155         slugify($text, $stopwords, $sep, Some($len))
156        }
157    );
158
159
160}
161
162pub fn slugify(string: &str, stop_words: &str, sep: &str, max_length: Option<usize>) -> String {
163    let char_vec: Vec<char> = sep.chars().collect();
164    let mut string: String = unidecode(string.into())
165        .to_lowercase()
166        .trim()
167        .trim_matches(char_vec[0])
168        .replace(' ', &sep.to_string());
169
170    // remove stop words
171    for word in stop_words.split(",") {
172        if !word.is_empty() {
173            string = string.replace(word, &sep.to_string());
174        }
175
176    }
177
178    let mut slug = Vec::with_capacity(string.len());
179
180    let mut is_sep = true;
181
182    for x in string.chars() {
183        match x {
184            'a'...'z' | '0'...'9' => {
185                is_sep = false;
186                slug.push(x as u8);
187            }
188            _ => {
189                if !is_sep {
190                    is_sep = true;
191                    slug.push(char_vec[0] as u8);
192                } else {
193                }
194            }
195        }
196    }
197
198    if slug.last() == Some(&(char_vec[0] as u8)) {
199        slug.pop();
200    }
201
202    let mut s = String::from_utf8(slug).unwrap();
203
204    match max_length {
205        Some(x) => {
206            s.truncate(x);
207            s = s.trim_right_matches(char_vec[0]).to_string();
208        }
209        None => {}
210    }
211
212    s
213
214}
215
216
217
218#[cfg(test)]
219mod tests {
220    use slugify;
221    #[test]
222    fn basic() {
223        assert_eq!(slugify("hello world", "", "-", None), "hello-world");
224        assert_eq!(slugify("hello world-", "", "-", None), "hello-world");
225        assert_eq!(slugify("hello world ", "", "-", None), "hello-world");
226    }
227
228    #[test]
229    fn test_email() {
230        assert_eq!(slugify!("alice@bob.com"), "alice-bob-com");
231    }
232
233    #[test]
234    fn test_starts_with_number() {
235        assert_eq!(slugify!("10 amazing secrets"), "10-amazing-secrets");
236    }
237
238    #[test]
239    fn test_contains_numbers() {
240        assert_eq!(slugify!("the 101 dalmatians"), "the-101-dalmatians");
241    }
242
243    #[test]
244    fn test_ends_with_number() {
245        assert_eq!(slugify!("lucky number 7"), "lucky-number-7");
246    }
247
248    #[test]
249    fn test_numbers_only() {
250        assert_eq!(slugify!("101"), "101");
251    }
252
253    #[test]
254    fn test_numbers_and_symbols() {
255        assert_eq!(slugify!("1000 reasons you are #1"),
256                   "1000-reasons-you-are-1");
257    }
258
259    #[test]
260    fn test_stop_words() {
261        assert_eq!(slugify("hello world", "world", "-", None), "hello");
262        assert_eq!(slugify!("hello world", stop_words = "world"), "hello");
263    }
264
265    #[test]
266    fn test_differently_cased_stopword_match() {
267        assert_eq!(slugify("Foo A FOO B foo C", "foo", "-", None), "a-b-c");
268    }
269
270    #[test]
271    fn test_multiple_stop_words() {
272        assert_eq!(slugify("the quick brown fox jumps over the lazy dog",
273                           "the",
274                           "-",
275                           None),
276                   "quick-brown-fox-jumps-over-lazy-dog");
277        assert_eq!(slugify("the quick brown fox jumps over the lazy dog",
278                           "the,fox",
279                           "-",
280                           None),
281                   "quick-brown-jumps-over-lazy-dog");
282        assert_eq!(slugify!("the quick brown fox jumps over the lazy dog",
283                            stop_words = "the,fox"),
284                   "quick-brown-jumps-over-lazy-dog");
285    }
286
287    #[test]
288    fn test_stopwords_with_different_separator() {
289        assert_eq!(slugify("the quick brown fox jumps over the lazy dog",
290                           "the",
291                           " ",
292                           None),
293                   "quick brown fox jumps over lazy dog");
294        assert_eq!(slugify!("the quick brown fox jumps over the lazy dog",
295                            stop_words = "the",
296                            separator = " "),
297                   "quick brown fox jumps over lazy dog");
298    }
299
300    #[test]
301    fn test_separator() {
302        assert_eq!(slugify("hello world", "", ".", None), "hello.world");
303        assert_eq!(slugify("hello world", "", "_", None), "hello_world");
304        assert_eq!(slugify!("hello world", separator = "_"), "hello_world");
305    }
306
307    #[test]
308    fn test_phonetic_conversion() {
309        assert_eq!(slugify("影師嗎", "", "-", None), "ying-shi-ma");
310    }
311
312    #[test]
313    fn test_accented_text() {
314        assert_eq!(slugify("Æúű--cool?", "", "-", None), "aeuu-cool");
315        assert_eq!(slugify("Nín hǎo. Wǒ shì zhōng guó rén", "", "-", None),
316                   "nin-hao-wo-shi-zhong-guo-ren");
317    }
318
319    #[test]
320    fn test_accented_text_non_word_chars() {
321        assert_eq!(slugify!("jaja---lol-méméméoo--a"), "jaja-lol-mememeoo-a");
322    }
323
324    #[test]
325    fn test_cyrillic_text() {
326        assert_eq!(slugify!("Компьютер"), "komp-iuter");
327    }
328
329    #[test]
330    fn test_macro() {
331        assert_eq!(slugify!("Компьютер"), "komp-iuter");
332        assert_eq!(slugify!("hello world", separator = "-"), "hello-world");
333        assert_eq!(slugify!("hello world", separator = " "), "hello world");
334        assert_eq!(slugify!("hello world", max_length = 5), "hello");
335        assert_eq!(slugify!("hello world", max_length = 6), "hello");
336        assert_eq!(slugify!("hello world", separator = " ", max_length = 8),
337                   "hello wo");
338        assert_eq!(slugify!("hello world", separator = "x", max_length = 8),
339                   "helloxwo");
340        assert_eq!(slugify!("the hello world", stop_words = "the", separator = "-"),
341                   "hello-world");
342        assert_eq!(slugify!("the hello world", stop_words = "the", max_length = 5),
343                   "hello");
344        assert_eq!(slugify!("the hello world",
345                            stop_words = "the",
346                            separator = "-",
347                            max_length = 10),
348                   "hello-worl");
349        assert_eq!(slugify!("the hello world",
350                            stop_words = "the",
351                            separator = "-",
352                            max_length = 20),
353                   "hello-world");
354    }
355}