slugify_rs/
lib.rs

1//! **A utility macro for flexible slug genereation that handles unicode.**
2//!
3//! The `slugify!` macro implements a flexible slug generator, allowing for stop words, custom separator, randomness
4//! and maximum length options. The macro provides both a simple interface with sane default parameters
5//! but also allows the parameters to be overriden when needed.
6//!
7//! Features:
8//!
9//!* Unicode strings support (phonetic conversion).
10//!* Support for custom slug separator.
11//!* Stop words filtering.
12//!* Slug maximum length support.
13//!
14//!
15//!# Usage
16//!
17//! This crate is on crates.io and can be used by adding `slugify-rs` to the dependencies in your project's
18//! `Cargo.toml`
19//!
20//! ```toml
21//! [dependencies]
22//! slugify-rs = "0.0.2"
23//! ```
24//!
25//!  and this to your crate root:
26//!
27//!```rust,ignore
28//!use slugify-rs::slugify;
29//!```
30//!
31//!# Examples
32//!
33//!## Basic slug generation
34//!
35//!```rust
36//! # use slugify-rs::slugify;
37//! # fn main() {
38//!assert_eq!(slugify!("hello world"), "hello-world");
39//! # }
40//!```
41//!
42//!## Using a custom separator
43//!
44//! ```rust
45//! # use slugify_rs::slugify;
46//! # fn main() {
47//!assert_eq!(slugify!("hello world", separator = "."), "hello.world");
48//!assert_eq!(slugify!("hello world", separator = " "), "hello world");
49//! assert_eq!(slugify!("hello world", separator = ""), "helloworld");
50//! # }
51//! ```
52//!
53//!## Stop words filtering
54//!
55//!```rust
56//! # use slugify_rs::slugify;
57//! # fn main() {
58//!assert_eq!(slugify!("the quick brown fox jumps over the lazy dog", stop_words = "the,fox"), "quick-brown-jumps-over-lazy-dog");
59//! # }
60//!```
61//!
62//!## Maximum length
63//!
64//!```rust
65//! # use slugify_rs::slugify;
66//! # fn main() {
67//!assert_eq!(slugify!("hello world", max_length = 5), "hello");
68//!assert_eq!(slugify!("the hello world", stop_words = "the", max_length = 5), "hello");
69//! # }
70//!```
71//!
72//!## Random values added to string through nanoid
73//! ```rust
74//! # use slugify_rs::slugify;
75//! # // Default randomness is of 5 characters
76//! # fn main() {
77//! assert_eq!(slugify!("hello world", randomness=true).len(), "hello-world".len()+5);
78//! # }
79//! ```
80//! ```rust
81//! # use slugify_rs::slugify;
82//! # // You can also add custom length to the randomness
83//! # fn main() {
84//! assert_eq!(slugify!("hello world", randomness=true,randomness_length=8).len(), "hello-world".len()+8);
85//! # }
86//! ```
87//!## Phonetic Conversion and accented text
88//!
89//!```rust
90//! # use slugify_rs::slugify;
91//! # fn main() {
92//!assert_eq!(slugify!("影師嗎"), "ying-shi-ma");
93//!assert_eq!(slugify!("Æúű--cool?"), "aeuu-cool");
94//!assert_eq!(slugify!("Nín hǎo. Wǒ shì zhōng guó rén"), "nin-hao-wo-shi-zhong-guo-ren");
95//! # }
96//!```
97//!
98//!## Passing multiple optional parameters.
99//!
100//! **NOTE:** the order of optional parameters matters: **stop_words**, **separator**
101//! and then **max_length**. All of them are optional, however when specifying more than one optional parameter, this
102//! order must be adhered.
103//!
104//!```rust
105//! # use slugify_rs::slugify;
106//! # fn main() {
107//!assert_eq!(slugify!("the hello world", stop_words = "the", separator = "-"), "hello-world");
108//!assert_eq!(slugify!("the hello world", separator = ".", max_length = 10), "the.hello");
109//!assert_eq!(slugify!("the hello world", stop_words = "the", max_length = 5), "hello");
110//!assert_eq!(slugify!("the hello world", stop_words = "the", separator = "-", max_length = 20), "hello-world");
111//! # }
112//!```
113//!
114use deunicode::deunicode;
115
116#[macro_export]
117macro_rules! slugify {
118    ($text:expr) => {{
119        slugify($text, "", "-", None, false, 5)
120    }};
121    // with a boolean
122    ($text:expr, randomness=$bool:expr) => {{
123        slugify($text, "", "-", None, $bool, 5)
124    }};
125
126    ($text:expr, randomness=$bool:expr,randomness_length=$usize:expr) => {{
127        slugify($text, "", "-", None, $bool, $usize)
128    }};
129
130    ($text:expr, stop_words=$stopwords:expr) => {{
131        slugify($text, $stopwords, "-", None, false, 5)
132    }};
133
134    ($text:expr, stop_words=$stopwords:expr,randomness=$bool:expr) => {{
135        slugify($text, $stopwords, "-", None, $bool, 5)
136    }};
137
138    ($text:expr, stop_words=$stopwords:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
139        slugify($text, $stopwords, "-", None, $bool, $usize)
140    }};
141
142    ($text:expr, separator=$sep:expr) => {{
143        slugify($text, "", $sep, None, false, 5)
144    }};
145
146    ($text:expr, separator=$sep:expr,randomness=$bool:expr) => {{
147        slugify($text, "", $sep, None, $bool, 5)
148    }};
149
150    ($text:expr, separator=$sep:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
151        slugify($text, "", $sep, None, $bool, $usize)
152    }};
153    ($text:expr, max_length=$len:expr) => {{
154        slugify($text, "", "-", Some($len), false, 5)
155    }};
156
157    ($text:expr, max_length=$len:expr,randomness=$bool:expr) => {{
158        slugify($text, "", "-", Some($len), $bool, 5)
159    }};
160
161    ($text:expr, max_length=$len:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
162        slugify($text, "", "-", Some($len), $bool, $usize)
163    }};
164
165    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr) => {{
166        slugify($text, $stopwords, $sep, None, false, 5)
167    }};
168
169    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr,randomness=$bool:expr) => {{
170        slugify($text, $stopwords, $sep, None, $bool, 5)
171    }};
172
173    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
174        slugify($text, $stopwords, $sep, None, $bool, $usize)
175    }};
176
177    ($text:expr, stop_words=$stopwords:expr, max_length=$len:expr) => {{
178        slugify($text, $stopwords, "-", Some($len), false, 5)
179    }};
180
181    ($text:expr, stop_words=$stopwords:expr, max_length=$len:expr,randomness=$bool:expr) => {{
182        slugify($text, $stopwords, "-", Some($len), $bool, 5)
183    }};
184
185    ($text:expr, stop_words=$stopwords:expr, max_length=$len:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
186        slugify($text, $stopwords, "-", Some($len), $bool, $usize)
187    }};
188
189    ($text:expr, separator=$sep:expr, max_length=$len:expr) => {{
190        slugify($text, "", $sep, Some($len), false, 5)
191    }};
192
193    ($text:expr, separator=$sep:expr, max_length=$len:expr,randomness=$bool:expr) => {{
194        slugify($text, "", $sep, Some($len), $bool, 5)
195    }};
196
197    ($text:expr, separator=$sep:expr, max_length=$len:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
198        slugify($text, "", $sep, Some($len), $bool, $usize)
199    }};
200
201    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr, max_length=$len:expr) => {{
202        slugify($text, $stopwords, $sep, Some($len), false, 5)
203    }};
204
205    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr, max_length=$len:expr,randomness=$bool:expr) => {{
206        slugify($text, $stopwords, $sep, Some($len), $bool, 5)
207    }};
208
209    ($text:expr, stop_words=$stopwords:expr, separator=$sep:expr, max_length=$len:expr,randomness=$bool:expr,randomness_length=$usize:expr) => {{
210        slugify($text, $stopwords, $sep, Some($len), $bool, $usize)
211    }};
212}
213
214pub fn slugify(
215    string: &str,
216    stop_words: &str,
217    sep: &str,
218    max_length: Option<usize>,
219    randomness: bool,
220    randomness_length: usize,
221) -> String {
222    let char_vec: Vec<char> = sep.chars().collect();
223    let mut string: String = deunicode(string.into())
224        .to_lowercase()
225        .trim()
226        .trim_matches(match char_vec.get(0) {
227            Some(a) => a.to_owned(),
228            None => ' ',
229        })
230        .replace(' ', &sep.to_string());
231
232    // remove stop words
233    for word in stop_words.split(",") {
234        if !word.is_empty() {
235            string = string.replace(word, &sep.to_string());
236        }
237    }
238
239    let mut slug = Vec::with_capacity(string.len());
240
241    let mut is_sep = true;
242
243    for x in string.chars() {
244        match x {
245            'a'..='z' | '0'..='9' => {
246                is_sep = false;
247                slug.push(x as u8);
248            }
249            _ => {
250                if !is_sep {
251                    is_sep = true;
252                    slug.push(char_vec[0] as u8);
253                } else {
254                }
255            }
256        }
257    }
258
259    if char_vec.len() > 0 && slug.last() == Some(&(char_vec[0] as u8)) {
260        slug.pop();
261    }
262
263    let mut s = String::from_utf8(slug).unwrap();
264
265    match max_length {
266        Some(x) => {
267            s.truncate(x);
268            s = s.trim_end_matches(char_vec[0]).to_string();
269        }
270        None => {}
271    }
272
273    // if randomness is true, generate a nanoid with of size 5 and append it to s
274    if randomness {
275        // Decrease one from randomness_length
276        let randomness_length = randomness_length - 1;
277        let nanoid = nanoid::nanoid!(randomness_length);
278        // change letters to lowercase
279        let nanoid = nanoid.to_lowercase();
280        // append separator to infront of nanoid
281        s.push_str(&sep);
282        s.push_str(&nanoid);
283    }
284    s
285}
286
287#[cfg(test)]
288mod tests {
289    use crate::slugify;
290    #[test]
291    fn basic() {
292        assert_eq!(
293            slugify("hello world", "", "-", None, false, 5),
294            "hello-world"
295        );
296        assert_eq!(
297            slugify("hello world-", "", "-", None, false, 5),
298            "hello-world"
299        );
300        assert_eq!(
301            slugify("hello world ", "", "-", None, false, 5),
302            "hello-world"
303        );
304        assert_eq!(
305            slugify("hello world ", "", "-", None, true, 5).len(),
306            "hello-world".len() + 5
307        );
308        assert_eq!(
309            slugify("hello world ", "", "", None, false, 5),
310            "helloworld"
311        );
312    }
313
314    #[test]
315    fn test_email() {
316        assert_eq!(slugify!("alice@bob.com"), "alice-bob-com");
317        assert_eq!(slugify!("alice@bob.com"), "alice-bob-com");
318    }
319
320    // Test length of randomness
321    #[test]
322    fn test_randomness() {
323        assert_eq!(
324            slugify!("hello world", randomness = true).len(),
325            "hello-world".len() + 5
326        );
327    }
328
329    #[test]
330    fn test_starts_with_number() {
331        assert_eq!(slugify!("10 amazing secrets"), "10-amazing-secrets");
332    }
333
334    #[test]
335    fn test_contains_numbers() {
336        assert_eq!(slugify!("the 101 dalmatians"), "the-101-dalmatians");
337        assert_eq!(
338            slugify!("the 101 dalmatians", randomness = true).len(),
339            "the-101-dalmatians".len() + 5
340        );
341    }
342
343    #[test]
344    fn test_ends_with_number() {
345        assert_eq!(slugify!("lucky number 7"), "lucky-number-7");
346    }
347
348    #[test]
349    fn test_numbers_only() {
350        assert_eq!(slugify!("101"), "101");
351    }
352
353    #[test]
354    fn test_numbers_and_symbols() {
355        assert_eq!(
356            slugify!("1000 reasons you are #1"),
357            "1000-reasons-you-are-1"
358        );
359    }
360
361    #[test]
362    fn test_stop_words() {
363        assert_eq!(
364            slugify("hello world", "world", "-", None, false, 5),
365            "hello",
366        );
367        assert_eq!(slugify!("hello world", stop_words = "world"), "hello");
368        assert_eq!(
369            slugify!("hello world", stop_words = "world", randomness = true).len(),
370            "hello".len() + 5
371        );
372    }
373
374    #[test]
375    fn test_differently_cased_stopword_match() {
376        assert_eq!(
377            slugify("Foo A FOO B foo C", "foo", "-", None, false, 5),
378            "a-b-c",
379        );
380    }
381
382    #[test]
383    fn test_multiple_stop_words() {
384        assert_eq!(
385            slugify(
386                "the quick brown fox jumps over the lazy dog",
387                "the",
388                "-",
389                None,
390                false,
391                5
392            ),
393            "quick-brown-fox-jumps-over-lazy-dog",
394        );
395        assert_eq!(
396            slugify(
397                "the quick brown fox jumps over the lazy dog",
398                "the,fox",
399                "-",
400                None,
401                false,
402                5
403            ),
404            "quick-brown-jumps-over-lazy-dog",
405        );
406        assert_eq!(
407            slugify!(
408                "the quick brown fox jumps over the lazy dog",
409                stop_words = "the,fox"
410            ),
411            "quick-brown-jumps-over-lazy-dog"
412        );
413    }
414
415    #[test]
416    fn test_stopwords_with_different_separator() {
417        assert_eq!(
418            slugify(
419                "the quick brown fox jumps over the lazy dog",
420                "the",
421                " ",
422                None,
423                false,
424                5
425            ),
426            "quick brown fox jumps over lazy dog"
427        );
428
429        assert_eq!(
430            slugify(
431                "the quick brown fox jumps over the lazy dog",
432                "the",
433                " ",
434                None,
435                true,
436                8
437            )
438            .len(),
439            "quick brown fox jumps over lazy dog".len() + 8
440        );
441        assert_eq!(
442            slugify!(
443                "the quick brown fox jumps over the lazy dog",
444                stop_words = "the",
445                separator = " "
446            ),
447            "quick brown fox jumps over lazy dog"
448        );
449
450        assert_eq!(
451            slugify!(
452                "the quick brown fox jumps over the lazy dog",
453                stop_words = "the",
454                separator = " ",
455                randomness = true
456            )
457            .len(),
458            "quick brown fox jumps over lazy dog".len() + 5
459        );
460
461        assert_eq!(
462            slugify!(
463                "the quick brown fox jumps over the lazy dog",
464                stop_words = "the",
465                separator = " ",
466                randomness = true,
467                randomness_length = 10
468            )
469            .len(),
470            "quick brown fox jumps over lazy dog".len() + 10
471        );
472    }
473
474    #[test]
475    fn test_separator() {
476        assert_eq!(
477            slugify("hello world", "", ".", None, false, 5),
478            "hello.world"
479        );
480        assert_eq!(
481            slugify("hello world", "", "_", None, false, 5),
482            "hello_world"
483        );
484        assert_eq!(slugify!("hello world", separator = "_"), "hello_world");
485        assert_eq!(
486            slugify!("hello world-", separator = "_", randomness = true).len(),
487            "hello_world".len() + 5
488        );
489    }
490
491    #[test]
492    fn test_phonetic_conversion() {
493        assert_eq!(slugify("影師嗎", "", "-", None, false, 5), "ying-shi-ma");
494    }
495
496    #[test]
497    fn test_accented_text() {
498        assert_eq!(slugify("Æúű--cool?", "", "-", None, false, 5), "aeuu-cool");
499        assert_eq!(
500            slugify("Nín hǎo. Wǒ shì zhōng guó rén", "", "-", None, false, 5),
501            "nin-hao-wo-shi-zhong-guo-ren"
502        );
503    }
504
505    #[test]
506    fn test_accented_text_non_word_chars() {
507        assert_eq!(slugify!("jaja---lol-méméméoo--a"), "jaja-lol-mememeoo-a");
508    }
509
510    #[test]
511    fn test_cyrillic_text() {
512        assert_eq!(slugify!("Компьютер"), "komp-iuter");
513    }
514
515    #[test]
516    fn test_macro() {
517        assert_eq!(slugify!("Компьютер"), "komp-iuter");
518        assert_eq!(slugify!("hello world", separator = "-"), "hello-world");
519        assert_eq!(slugify!("hello world", separator = " "), "hello world");
520        assert_eq!(slugify!("hello world", max_length = 5), "hello");
521        assert_eq!(slugify!("hello world", max_length = 6), "hello");
522        assert_eq!(
523            slugify!("hello world", separator = " ", max_length = 8),
524            "hello wo"
525        );
526        assert_eq!(
527            slugify!("hello world", separator = "x", max_length = 8),
528            "helloxwo"
529        );
530        assert_eq!(
531            slugify!("the hello world", stop_words = "the", separator = "-"),
532            "hello-world"
533        );
534        assert_eq!(
535            slugify!("the hello world", stop_words = "the", max_length = 5),
536            "hello"
537        );
538        assert_eq!(
539            slugify!(
540                "the hello world",
541                stop_words = "the",
542                separator = "-",
543                max_length = 10
544            ),
545            "hello-worl"
546        );
547        assert_eq!(
548            slugify!(
549                "the hello world",
550                stop_words = "the",
551                separator = "-",
552                max_length = 20
553            ),
554            "hello-world"
555        );
556    }
557}