1use unicode_normalization::UnicodeNormalization;
2use unicode_segmentation::UnicodeSegmentation;
3use std::ffi::{CStr, CString};
4use std::os::raw::c_char;
5
6#[derive(Debug, Clone)]
7pub struct SlugOptions {
8 pub separator: char,
9 pub max_length: Option<usize>,
10 pub lowercase: bool,
11 pub remove_stopwords: bool,
12 pub ascii_only: bool,
13}
14
15impl Default for SlugOptions {
16 fn default() -> Self {
17 Self {
18 separator: '-',
19 max_length: None,
20 lowercase: true,
21 remove_stopwords: false,
22 ascii_only: false,
23 }
24 }
25}
26
27const COMMON_STOPWORDS: &[&str] = &[
28 "a", "an", "and", "are", "as", "at", "be", "by", "for", "from",
29 "has", "he", "in", "is", "it", "its", "of", "on", "that", "the",
30 "to", "was", "will", "with", "the", "this", "but", "they", "have",
31];
32
33pub fn slugify(input: &str, options: &SlugOptions) -> String {
34 if input.trim().is_empty() {
35 return String::new();
36 }
37
38 let mut result = input
39 .nfc()
40 .collect::<String>()
41 .unicode_words()
42 .filter_map(|word| {
43 let word = word.trim();
44 if word.is_empty() {
45 return None;
46 }
47
48 if options.remove_stopwords && COMMON_STOPWORDS.contains(&word.to_lowercase().as_str()) {
49 return None;
50 }
51
52 let processed = if options.ascii_only {
53 word.chars()
54 .filter_map(|c| {
55 if c.is_ascii_alphanumeric() {
56 Some(c)
57 } else if c.is_alphabetic() {
58 transliterate_char(c)
59 } else {
60 None
61 }
62 })
63 .collect::<String>()
64 } else {
65 word.chars()
66 .filter(|c| c.is_alphanumeric() || c.is_alphabetic())
67 .collect::<String>()
68 };
69
70 if processed.is_empty() {
71 None
72 } else {
73 Some(processed)
74 }
75 })
76 .collect::<Vec<_>>()
77 .join(&options.separator.to_string());
78
79 if options.lowercase {
80 result = result.to_lowercase();
81 }
82
83 if let Some(max_len) = options.max_length {
84 if result.len() > max_len {
85 if let Some((idx, _)) = result.char_indices().nth(max_len) {
86 result.truncate(idx);
87 }
88
89 while result.ends_with(options.separator) {
90 result.pop();
91 }
92 }
93 }
94
95 result
96}
97
98fn transliterate_char(c: char) -> Option<char> {
99 match c {
100 'à'..='å' | 'À'..='Å' => Some('a'),
101 'ç' | 'Ç' => Some('c'),
102 'è'..='ë' | 'È'..='Ë' => Some('e'),
103 'ì'..='ï' | 'Ì'..='Ï' => Some('i'),
104 'ñ' | 'Ñ' => Some('n'),
105 'ò'..='ö' | 'Ò'..='Ö' => Some('o'),
106 'ù'..='ü' | 'Ù'..='Ü' => Some('u'),
107 'ý' | 'ÿ' | 'Ý' => Some('y'),
108 'ß' => Some('s'),
109 _ => None,
110 }
111}
112
113#[no_mangle]
114pub extern "C" fn slugify_simple(input: *const c_char) -> *mut c_char {
115 if input.is_null() {
116 return std::ptr::null_mut();
117 }
118
119 let c_str = unsafe { CStr::from_ptr(input) };
120 let input_str = match c_str.to_str() {
121 Ok(s) => s,
122 Err(_) => return std::ptr::null_mut(),
123 };
124
125 let options = SlugOptions::default();
126 let result = slugify(input_str, &options);
127
128 match CString::new(result) {
129 Ok(c_string) => c_string.into_raw(),
130 Err(_) => std::ptr::null_mut(),
131 }
132}
133
134#[no_mangle]
135pub extern "C" fn slugify_with_options(
136 input: *const c_char,
137 separator: c_char,
138 max_length: i32,
139 lowercase: bool,
140 remove_stopwords: bool,
141 ascii_only: bool,
142) -> *mut c_char {
143 if input.is_null() {
144 return std::ptr::null_mut();
145 }
146
147 let c_str = unsafe { CStr::from_ptr(input) };
148 let input_str = match c_str.to_str() {
149 Ok(s) => s,
150 Err(_) => return std::ptr::null_mut(),
151 };
152
153 let options = SlugOptions {
154 separator: separator as u8 as char,
155 max_length: if max_length > 0 { Some(max_length as usize) } else { None },
156 lowercase,
157 remove_stopwords,
158 ascii_only,
159 };
160
161 let result = slugify(input_str, &options);
162
163 match CString::new(result) {
164 Ok(c_string) => c_string.into_raw(),
165 Err(_) => std::ptr::null_mut(),
166 }
167}
168
169#[no_mangle]
170pub extern "C" fn free_string(ptr: *mut c_char) {
171 if !ptr.is_null() {
172 unsafe {
173 let _ = CString::from_raw(ptr);
174 }
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn test_basic_slugify() {
184 let options = SlugOptions::default();
185 assert_eq!(slugify("Hello World", &options), "hello-world");
186 assert_eq!(slugify("Test 123", &options), "test-123");
187 }
188
189 #[test]
190 fn test_unicode_handling() {
191 let options = SlugOptions::default();
192 assert_eq!(slugify("Café münü", &options), "café-münü");
193
194 let ascii_options = SlugOptions { ascii_only: true, ..Default::default() };
195 assert_eq!(slugify("Café münü", &ascii_options), "cafe-munu");
196 }
197
198 #[test]
199 fn test_custom_separator() {
200 let options = SlugOptions { separator: '_', ..Default::default() };
201 assert_eq!(slugify("Hello World", &options), "hello_world");
202 }
203
204 #[test]
205 fn test_max_length() {
206 let options = SlugOptions { max_length: Some(10), ..Default::default() };
207 assert_eq!(slugify("This is a very long title", &options), "this-is-a");
208 }
209
210 #[test]
211 fn test_stopwords() {
212 let options = SlugOptions { remove_stopwords: true, ..Default::default() };
213 assert_eq!(slugify("The quick brown fox", &options), "quick-brown-fox");
214 }
215
216 #[test]
217 fn test_empty_input() {
218 let options = SlugOptions::default();
219 assert_eq!(slugify("", &options), "");
220 assert_eq!(slugify(" ", &options), "");
221 }
222
223 #[test]
224 fn test_special_characters() {
225 let options = SlugOptions::default();
226 assert_eq!(slugify("Hello, World! @#$%", &options), "hello-world");
227 }
228}