use deunicode::deunicode;
use regex::Regex;
use unicode_categories::UnicodeCategories;
use crate::options::Options;
#[derive(Debug, Clone)]
pub struct Slugifier {
options: Options,
}
impl Slugifier {
pub fn new() -> Self {
Self {
options: Options::default(),
}
}
pub fn with_options(options: Options) -> Self {
Self { options }
}
pub fn options(&self) -> &Options {
&self.options
}
pub fn options_mut(&mut self) -> &mut Options {
&mut self.options
}
pub fn separator(mut self, separator: impl Into<String>) -> Self {
self.options.separator = separator.into();
self
}
pub fn locale(mut self, locale: Option<crate::Locale>) -> Self {
self.options.locale = locale;
self
}
pub fn remove(mut self, remove: Option<Regex>) -> Self {
self.options.remove = remove;
self
}
pub fn lowercase(mut self, lowercase: bool) -> Self {
self.options.lowercase = lowercase;
self
}
pub fn trim(mut self, trim: bool) -> Self {
self.options.trim = trim;
self
}
pub fn max_length(mut self, max_length: Option<usize>) -> Self {
self.options.max_length = max_length;
self
}
pub fn slugify(&self, input: &str) -> String {
slugify_impl(input, &self.options)
}
}
impl Default for Slugifier {
fn default() -> Self {
Self::new()
}
}
pub fn slugify_with_options(input: &str, options: &Options) -> String {
slugify_impl(input, options)
}
fn slugify_impl(input: &str, options: &Options) -> String {
let mut preprocessed = input;
let owned_removed;
if let Some(ref remove_regex) = options.remove {
owned_removed = remove_regex.replace_all(input, "").into_owned();
preprocessed = &owned_removed;
}
let localized = if let Some(locale) = options.locale {
locale.apply(preprocessed)
} else {
preprocessed.to_string()
};
let mut builder = String::with_capacity(localized.len());
let mut prev_was_sep = false;
let mut bytes_so_far = 0usize;
let sep = options.separator.as_str();
let push_sep = |out: &mut String, prev_flag: &mut bool| {
if !*prev_flag {
out.push_str(sep);
*prev_flag = true;
}
};
for ch in localized.chars() {
if options.drop_emoji && !(ch.is_letter() || ch.is_number() || ch.is_whitespace()) {
if ch == '-' || ch == '_' {
let add_len = sep.len();
if options
.max_length
.map(|max_len| bytes_so_far + add_len > max_len)
.unwrap_or(false)
{
break;
}
push_sep(&mut builder, &mut prev_was_sep);
bytes_so_far += add_len;
}
continue;
}
if options.drop_apostrophes && (ch == '\'' || ch == '’') {
continue;
}
if ch.is_alphanumeric() {
if ch.is_ascii() {
if options
.max_length
.map(|max_len| bytes_so_far + 1 > max_len)
.unwrap_or(false)
{
break;
}
builder.push(ch);
prev_was_sep = false;
bytes_so_far += 1;
} else {
let mut buf = [0u8; 4];
let s = ch.encode_utf8(&mut buf);
let ascii = deunicode(s);
if ascii.is_empty() {
continue;
}
let add_len = ascii.len();
if options
.max_length
.map(|max_len| bytes_so_far + add_len > max_len)
.unwrap_or(false)
{
break;
}
builder.push_str(&ascii);
prev_was_sep = false;
bytes_so_far += add_len;
}
} else {
let add_len = sep.len();
if options
.max_length
.map(|max_len| bytes_so_far + add_len > max_len)
.unwrap_or(false)
{
break;
}
push_sep(&mut builder, &mut prev_was_sep);
bytes_so_far += add_len;
}
}
let mut text = builder;
if options.trim {
while text.starts_with(sep) {
text.drain(..sep.len());
}
while text.ends_with(sep) {
let new_len = text.len() - sep.len();
text.truncate(new_len);
}
}
if options.lowercase {
text = text.to_lowercase();
}
text
}