slugify_core/
lib.rs

1use unicode_normalization::UnicodeNormalization;
2use unicode_segmentation::UnicodeSegmentation;
3use std::ffi::{CStr, CString};
4use std::os::raw::c_char;
5
6#[derive(Debug, Clone)]
7pub struct SlugOptions {
8    pub separator: char,
9    pub max_length: Option<usize>,
10    pub lowercase: bool,
11    pub remove_stopwords: bool,
12    pub ascii_only: bool,
13}
14
15impl Default for SlugOptions {
16    fn default() -> Self {
17        Self {
18            separator: '-',
19            max_length: None,
20            lowercase: true,
21            remove_stopwords: false,
22            ascii_only: false,
23        }
24    }
25}
26
27const COMMON_STOPWORDS: &[&str] = &[
28    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from",
29    "has", "he", "in", "is", "it", "its", "of", "on", "that", "the",
30    "to", "was", "will", "with", "the", "this", "but", "they", "have",
31];
32
33pub fn slugify(input: &str, options: &SlugOptions) -> String {
34    if input.trim().is_empty() {
35        return String::new();
36    }
37
38    let mut result = input
39        .nfc()
40        .collect::<String>()
41        .unicode_words()
42        .filter_map(|word| {
43            let word = word.trim();
44            if word.is_empty() {
45                return None;
46            }
47            
48            if options.remove_stopwords && COMMON_STOPWORDS.contains(&word.to_lowercase().as_str()) {
49                return None;
50            }
51            
52            let processed = if options.ascii_only {
53                word.chars()
54                    .filter_map(|c| {
55                        if c.is_ascii_alphanumeric() {
56                            Some(c)
57                        } else if c.is_alphabetic() {
58                            transliterate_char(c)
59                        } else {
60                            None
61                        }
62                    })
63                    .collect::<String>()
64            } else {
65                word.chars()
66                    .filter(|c| c.is_alphanumeric() || c.is_alphabetic())
67                    .collect::<String>()
68            };
69            
70            if processed.is_empty() {
71                None
72            } else {
73                Some(processed)
74            }
75        })
76        .collect::<Vec<_>>()
77        .join(&options.separator.to_string());
78
79    if options.lowercase {
80        result = result.to_lowercase();
81    }
82
83    if let Some(max_len) = options.max_length {
84        if result.len() > max_len {
85            if let Some((idx, _)) = result.char_indices().nth(max_len) {
86                result.truncate(idx);
87            }
88            
89            while result.ends_with(options.separator) {
90                result.pop();
91            }
92        }
93    }
94
95    result
96}
97
98fn transliterate_char(c: char) -> Option<char> {
99    match c {
100        'à'..='å' | 'À'..='Å' => Some('a'),
101        'ç' | 'Ç' => Some('c'),
102        'è'..='ë' | 'È'..='Ë' => Some('e'),
103        'ì'..='ï' | 'Ì'..='Ï' => Some('i'),
104        'ñ' | 'Ñ' => Some('n'),
105        'ò'..='ö' | 'Ò'..='Ö' => Some('o'),
106        'ù'..='ü' | 'Ù'..='Ü' => Some('u'),
107        'ý' | 'ÿ' | 'Ý' => Some('y'),
108        'ß' => Some('s'),
109        _ => None,
110    }
111}
112
113#[no_mangle]
114pub extern "C" fn slugify_simple(input: *const c_char) -> *mut c_char {
115    if input.is_null() {
116        return std::ptr::null_mut();
117    }
118    
119    let c_str = unsafe { CStr::from_ptr(input) };
120    let input_str = match c_str.to_str() {
121        Ok(s) => s,
122        Err(_) => return std::ptr::null_mut(),
123    };
124    
125    let options = SlugOptions::default();
126    let result = slugify(input_str, &options);
127    
128    match CString::new(result) {
129        Ok(c_string) => c_string.into_raw(),
130        Err(_) => std::ptr::null_mut(),
131    }
132}
133
134#[no_mangle]
135pub extern "C" fn slugify_with_options(
136    input: *const c_char,
137    separator: c_char,
138    max_length: i32,
139    lowercase: bool,
140    remove_stopwords: bool,
141    ascii_only: bool,
142) -> *mut c_char {
143    if input.is_null() {
144        return std::ptr::null_mut();
145    }
146    
147    let c_str = unsafe { CStr::from_ptr(input) };
148    let input_str = match c_str.to_str() {
149        Ok(s) => s,
150        Err(_) => return std::ptr::null_mut(),
151    };
152    
153    let options = SlugOptions {
154        separator: separator as u8 as char,
155        max_length: if max_length > 0 { Some(max_length as usize) } else { None },
156        lowercase,
157        remove_stopwords,
158        ascii_only,
159    };
160    
161    let result = slugify(input_str, &options);
162    
163    match CString::new(result) {
164        Ok(c_string) => c_string.into_raw(),
165        Err(_) => std::ptr::null_mut(),
166    }
167}
168
169#[no_mangle]
170pub extern "C" fn free_string(ptr: *mut c_char) {
171    if !ptr.is_null() {
172        unsafe {
173            let _ = CString::from_raw(ptr);
174        }
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn test_basic_slugify() {
184        let options = SlugOptions::default();
185        assert_eq!(slugify("Hello World", &options), "hello-world");
186        assert_eq!(slugify("Test 123", &options), "test-123");
187    }
188
189    #[test]
190    fn test_unicode_handling() {
191        let options = SlugOptions::default();
192        assert_eq!(slugify("Café münü", &options), "café-münü");
193        
194        let ascii_options = SlugOptions { ascii_only: true, ..Default::default() };
195        assert_eq!(slugify("Café münü", &ascii_options), "cafe-munu");
196    }
197
198    #[test]
199    fn test_custom_separator() {
200        let options = SlugOptions { separator: '_', ..Default::default() };
201        assert_eq!(slugify("Hello World", &options), "hello_world");
202    }
203
204    #[test]
205    fn test_max_length() {
206        let options = SlugOptions { max_length: Some(10), ..Default::default() };
207        assert_eq!(slugify("This is a very long title", &options), "this-is-a");
208    }
209
210    #[test]
211    fn test_stopwords() {
212        let options = SlugOptions { remove_stopwords: true, ..Default::default() };
213        assert_eq!(slugify("The quick brown fox", &options), "quick-brown-fox");
214    }
215
216    #[test]
217    fn test_empty_input() {
218        let options = SlugOptions::default();
219        assert_eq!(slugify("", &options), "");
220        assert_eq!(slugify("   ", &options), "");
221    }
222
223    #[test]
224    fn test_special_characters() {
225        let options = SlugOptions::default();
226        assert_eq!(slugify("Hello, World! @#$%", &options), "hello-world");
227    }
228}