1use std::collections::BTreeSet;
4use std::ffi::OsStr;
5
6use chrono::DateTime;
7use chrono::Utc;
8use deunicode::deunicode;
9use once_cell::sync::Lazy;
10use regex::Regex;
11use serde::Serialize;
12
13use super::result::Result;
14use crate::render::engine::RenderEngine;
15
16static RE_TAG: Lazy<Regex> = Lazy::new(|| Regex::new(r"#[a-zA-Z][^\s#]+\s?").unwrap());
19
20static RE_BLOCKS: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n{3,}").unwrap());
22
23#[must_use]
30pub fn strip(string: &str, chars: &str) -> String {
31 let mut stripped = string.to_string();
32
33 stripped.retain(|char| !chars.contains(char));
34
35 stripped
36}
37
38#[must_use]
44pub fn sanitize(string: &str) -> String {
45 let remove = &['\n', '\r', '\0'];
47 let replace = &['/', ':'];
48
49 let sanitized: String = string
50 .chars()
51 .filter(|c| !remove.contains(c))
52 .map(|c| if replace.contains(&c) { '_' } else { c })
53 .collect();
54
55 let sanitized = OsStr::new(&sanitized);
56 let sanitized = sanitized.to_string_lossy().to_string();
57
58 if sanitized != string {
59 log::warn!("the string '{}' contained invalid characters", string);
60 };
61
62 sanitized
63}
64
65#[must_use]
75pub fn to_slug(string: &str, lowercase: bool) -> String {
76 let mut slug = String::with_capacity(string.len());
77
78 let mut prev_is_dash = true;
80
81 {
82 let mut push_char = |mut char: u8| match char {
83 b'a'..=b'z' | b'0'..=b'9' => {
84 prev_is_dash = false;
85 slug.push(char.into());
86 }
87 b'A'..=b'Z' => {
88 prev_is_dash = false;
89
90 char = if lowercase { char - b'A' + b'a' } else { char };
91
92 slug.push(char.into());
93 }
94 _ => {
95 if !prev_is_dash {
96 slug.push('-');
97 prev_is_dash = true;
98 }
99 }
100 };
101
102 for char in string.chars() {
103 if char.is_ascii() {
104 (push_char)(char as u8);
105 } else {
106 for &byte in deunicode::deunicode_char(char).unwrap_or("-").as_bytes() {
107 (push_char)(byte);
108 }
109 }
110 }
111 }
112
113 if slug.ends_with('-') {
114 slug.pop();
115 }
116
117 slug.shrink_to_fit();
118
119 slug
120}
121
122#[must_use]
128pub fn to_slug_date(date: &DateTime<Utc>) -> String {
129 date.format(crate::defaults::DATE_FORMAT_SLUG).to_string()
130}
131
132pub fn render_and_sanitize<C>(template: &str, context: C) -> Result<String>
138where
139 C: Serialize,
140{
141 let string = RenderEngine::default().render_str(template, context)?;
142
143 Ok(sanitize(&string))
144}
145
146#[must_use]
157pub fn build_filename_and_sanitize(file_stem: &str, extension: &str) -> String {
158 let filename = format!("{file_stem}.{extension}");
159
160 sanitize(&filename)
161}
162
163#[must_use]
169pub fn normalize_whitespace(string: &str) -> String {
170 string
171 .lines()
172 .filter(|&s| !s.is_empty())
173 .map(str::trim)
174 .map(ToOwned::to_owned)
175 .collect::<Vec<_>>()
176 .join("\n\n")
177}
178
179#[must_use]
185pub fn extract_tags(string: &str) -> BTreeSet<String> {
186 let mut tags = RE_TAG
187 .find_iter(string)
188 .map(|t| t.as_str())
189 .map(str::trim)
190 .map(ToOwned::to_owned)
191 .collect::<Vec<String>>();
192
193 tags.sort();
194
195 BTreeSet::from_iter(tags)
196}
197
198#[must_use]
204pub fn remove_tags(string: &str) -> String {
205 RE_TAG.replace_all(string, "").trim().to_owned()
206}
207
208#[must_use]
214pub fn convert_all_to_ascii(string: &str) -> String {
215 deunicode(string)
216}
217
218#[must_use]
228pub fn convert_symbols_to_ascii(string: &str) -> String {
229 let mut string = string.to_owned();
230
231 for (from, to) in &*crate::defaults::UNICODE_TO_ASCII_SYMBOLS {
232 string = string.replace(*from, to);
233 }
234
235 string
236}
237
238#[must_use]
252pub fn trim_blocks(string: &str) -> String {
253 let string = RE_BLOCKS.replace_all(string, "\n\n");
254 let mut string = string.trim_end().to_string();
255
256 string.push('\n');
257
258 string
259}
260
261#[cfg(test)]
263mod test {
264
265 use super::*;
266
267 #[test]
268 fn strip() {
269 assert_eq!(
270 super::strip("Lorem ipsum. Aedipisicing culpa!?", " .!?"),
271 "LoremipsumAedipisicingculpa"
272 );
273 assert_eq!(
274 super::strip("Lorem ipsum.\n Aedipisicing culpa!?", " .!?\n"),
275 "LoremipsumAedipisicingculpa"
276 );
277 assert_eq!(
278 super::strip("--Lorem--ipsum. Aedipisicing -culpa-", " .-"),
279 "LoremipsumAedipisicingculpa"
280 );
281 assert_eq!(
282 super::strip("Lorem & Ipsúm. Ædipisicing culpa!?", " &.!?"),
283 "LoremIpsúmÆdipisicingculpa"
284 );
285 }
286
287 #[test]
288 fn slugify_original() {
289 assert_eq!(
290 super::to_slug("Lorem ipsum. Aedipisicing culpa!?", true),
291 "lorem-ipsum-aedipisicing-culpa"
292 );
293 assert_eq!(
294 super::to_slug("Lorem ipsum.\n Aedipisicing culpa!?", true),
295 "lorem-ipsum-aedipisicing-culpa"
296 );
297 assert_eq!(
298 super::to_slug("--Lorem--ipsum. Aedipisicing -culpa-", true),
299 "lorem-ipsum-aedipisicing-culpa"
300 );
301 assert_eq!(
302 super::to_slug("Lorem & Ipsúm. Ædipisicing culpa!?", true),
303 "lorem-ipsum-aedipisicing-culpa"
304 );
305 }
306
307 #[test]
308 fn slugify_with_lowercase() {
309 assert_eq!(
310 super::to_slug("Lorem ipsum. Aedipisicing culpa!?", false),
311 "Lorem-ipsum-Aedipisicing-culpa"
312 );
313 assert_eq!(
314 super::to_slug("Lorem ipsum.\n Aedipisicing culpa!?", false),
315 "Lorem-ipsum-Aedipisicing-culpa"
316 );
317 assert_eq!(
318 super::to_slug("--Lorem--ipsum. Aedipisicing -culpa-", false),
319 "Lorem-ipsum-Aedipisicing-culpa"
320 );
321 assert_eq!(
322 super::to_slug("Lorem & Ipsúm. Ædipisicing culpa!?", false),
323 "Lorem-Ipsum-AEdipisicing-culpa"
324 );
325 }
326
327 macro_rules! remove_and_extract_tags {
329 ($($name:ident: ($input:tt, $tags_removed_expected:tt, $tags_expected:tt),)*) => {
330 $(
331 #[test]
332 fn $name() {
333 let tags_extracted = super::extract_tags($input);
334 let tags_expected: BTreeSet<String> = $tags_expected
335 .into_iter()
336 .map(|t: &str| t.to_string())
337 .collect();
338
339 let tags_removed = super::remove_tags($input);
340
341 assert_eq!(tags_extracted, tags_expected);
342 assert_eq!(tags_removed, $tags_removed_expected.to_string());
343 }
344 )*
345 }
346 }
347
348 remove_and_extract_tags! {
356 process_tags_00: (
358 "Lorem ipsum.",
359 "Lorem ipsum.",
360 []
361 ),
362 process_tags_01: (
364 "Lorem ipsum. #tag01 #tag02",
365 "Lorem ipsum.",
366 ["#tag01", "#tag02"]
367 ),
368 process_tags_02: (
370 "Lorem ipsum. #tag01 #tag02 Adipisicing culpa.",
371 "Lorem ipsum. Adipisicing culpa.",
372 ["#tag01", "#tag02"]
373 ),
374 process_tags_03: (
376 "#tag01 #tag02 Lorem ipsum. Adipisicing culpa.",
377 "Lorem ipsum. Adipisicing culpa.",
378 ["#tag01", "#tag02"]
379 ),
380 process_tags_04: (
382 "Lorem ipsum. #tag01 #tag02 ",
383 "Lorem ipsum.",
384 ["#tag01", "#tag02"]
385 ),
386 process_tags_05: (
388 "Lorem ipsum.#tag01#tag02",
389 "Lorem ipsum.",
390 ["#tag01", "#tag02"]
391 ),
392 process_tags_06: (
394 "#tag01 #TAG01 #Tag01 #1 #999",
395 "#1 #999",
396 ["#tag01", "#TAG01", "#Tag01"]
397 ),
398 process_tags_07: (
400 "#tag01 #tag02",
401 "",
402 ["#tag01", "#tag02"]
403 ),
404 process_tags_08: (
406 "#tag01 #tag01 #tag01",
407 "",
408 ["#tag01"]
409 ),
410 process_tags_09: (
412 "###tag01##tag02",
413 "###",
414 ["#tag01", "#tag02"]
415 ),
416 }
417}