intelli_shell/utils/
string.rs

1use std::sync::LazyLock;
2
3use regex::Regex;
4use unidecode::unidecode;
5
6/// Converts all types of newline sequences (`\r`, `\n`, `\r\n`) in a string to a single newline character (`\n`).
7///
8/// This is useful for normalizing text input that might come from different operating systems or sources with
9/// inconsistent line endings.
10///
11/// # Examples
12///
13/// ```rust
14/// # use intelli_shell::utils::unify_newlines;
15/// let text = "Hello\r\nWorld\nAnother\rLine";
16/// let unified = unify_newlines(text);
17/// assert_eq!(unified, "Hello\nWorld\nAnother\nLine");
18/// ```
19pub fn unify_newlines(str: impl AsRef<str>) -> String {
20    /// Regex to match various newline sequences (`\r`, `\n`, `\r\n`)
21    static NEW_LINES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\r\n|\r|\n"#).unwrap());
22
23    NEW_LINES.replace_all(str.as_ref(), "\n").to_string()
24}
25
26/// Removes newline sequences and any surrounding whitespace, replacing them with a single space.
27///
28/// This function is useful for converting multi-line text into a single line while preserving word separation.
29/// It collapses multiple lines and adjacent whitespace into one space.
30///
31/// # Examples
32///
33/// ```rust
34/// # use intelli_shell::utils::remove_newlines;
35/// let text = "Line 1\n  Line 2 \r\n\tLine 3";
36/// let single_line = remove_newlines(text);
37/// assert_eq!(single_line, "Line 1 Line 2 Line 3");
38///
39/// // Example with potentially escaped newline
40/// let text_escaped = "Line A \\\n Line B";
41/// let single_line_escaped = remove_newlines(text_escaped);
42/// assert_eq!(single_line_escaped, "Line A Line B");
43/// ```
44pub fn remove_newlines(str: impl AsRef<str>) -> String {
45    /// Regex to match newline sequences potentially surrounded by whitespace.
46    ///
47    /// It also handles an optional backslash (`\`) preceding the newline, which might indicate an escaped newline in
48    /// shell contexts.
49    static NEW_LINE_AND_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\s*(\\)?(\r\n|\r|\n)\s*"#).unwrap());
50
51    NEW_LINE_AND_SPACES.replace_all(str.as_ref(), " ").to_string()
52}
53
54/// Normalizes a string by performing ASCII transliteration and converting to lowercase.
55///
56/// This uses the [unidecode] crate to approximate non-ASCII characters with their closest ASCII equivalents, and then
57/// converts the entire string to lowercase. Then, remove any non-alphanumeric character and consecutive whitespaces,
58/// returning the trimmed string.
59///
60/// # Examples
61///
62/// ```rust
63/// # use intelli_shell::utils::flatten_str;
64/// let text = "Héllö Wörld! (-123) ";
65/// let flattened = flatten_str(text);
66/// assert_eq!(flattened, "hello world -123");
67/// ```
68pub fn flatten_str(s: impl AsRef<str>) -> String {
69    /// Regex to match any non-allowed character on the flattened version
70    static FLAT_STRING_FORBIDDEN_CHARS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[^a-z0-9\s-]").unwrap());
71
72    flatten(s, &FLAT_STRING_FORBIDDEN_CHARS)
73}
74
75/// Normalizes a variable name string by performing ASCII transliteration and converting to lowercase.
76///
77/// This uses the [unidecode] crate to approximate non-ASCII characters with their closest ASCII equivalents, and then
78/// converts the entire string to lowercase. Then, remove any non-allowed character and consecutive whitespaces,
79/// returning the trimmed string.
80///
81/// # Examples
82///
83/// ```rust
84/// # use intelli_shell::utils::flatten_variable_name;
85/// let variable = "  SÉCOND Part ";
86/// let flattened = flatten_variable_name(variable);
87/// assert_eq!(flattened, "second part");
88/// ```
89pub fn flatten_variable_name(variable_name: impl AsRef<str>) -> String {
90    /// Regex to match any non-allowed character on the flattened version of a variable
91    static VARIABLE_FORBIDDEN_CHARS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[^a-z0-9\s_:-]").unwrap());
92
93    flatten(variable_name, &VARIABLE_FORBIDDEN_CHARS)
94}
95
96fn flatten(s: impl AsRef<str>, forbidden_chars: &Regex) -> String {
97    // Unidecode and lowercase
98    let decoded = unidecode(s.as_ref()).to_lowercase();
99
100    // Keep only allowed characters
101    let flattened = forbidden_chars.replace_all(&decoded, "");
102
103    /// Regex to match consecutive whitespaces
104    static FLATTEN_COLLAPSE_WHITESPACE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
105
106    // Remove consecutive whitespaces
107    FLATTEN_COLLAPSE_WHITESPACE_REGEX
108        .replace_all(&flattened, " ")
109        .trim()
110        .to_string()
111}