intelli_shell/utils/
string.rs

1use std::sync::LazyLock;
2
3use itertools::Itertools;
4use regex::Regex;
5use unidecode::unidecode;
6
7/// Regex to match various newline sequences (`\r`, `\n`, `\r\n`)
8static NEW_LINES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\r\n|\r|\n"#).unwrap());
9
10/// Converts all types of newline sequences (`\r`, `\n`, `\r\n`) in a string to a single newline character (`\n`).
11///
12/// This is useful for normalizing text input that might come from different operating systems or sources with
13/// inconsistent line endings.
14///
15/// # Examples
16///
17/// ```rust
18/// # use intelli_shell::utils::unify_newlines;
19/// let text = "Hello\r\nWorld\nAnother\rLine";
20/// let unified = unify_newlines(text);
21/// assert_eq!(unified, "Hello\nWorld\nAnother\nLine");
22/// ```
23pub fn unify_newlines(str: impl AsRef<str>) -> String {
24    NEW_LINES.replace_all(str.as_ref(), "\n").to_string()
25}
26
27/// Regex to match newline sequences potentially surrounded by whitespace.
28///
29/// It also handles an optional backslash (`\`) preceding the newline, which might indicate an escaped newline in shell
30/// contexts.
31static NEW_LINE_AND_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"\s*(\\)?(\r\n|\r|\n)\s*"#).unwrap());
32
33/// Removes newline sequences and any surrounding whitespace, replacing them with a single space.
34///
35/// This function is useful for converting multi-line text into a single line while preserving word separation.
36/// It collapses multiple lines and adjacent whitespace into one space.
37///
38/// # Examples
39///
40/// ```rust
41/// # use intelli_shell::utils::remove_newlines;
42/// let text = "Line 1\n  Line 2 \r\n\tLine 3";
43/// let single_line = remove_newlines(text);
44/// assert_eq!(single_line, "Line 1 Line 2 Line 3");
45///
46/// // Example with potentially escaped newline
47/// let text_escaped = "Line A \\\n Line B";
48/// let single_line_escaped = remove_newlines(text_escaped);
49/// assert_eq!(single_line_escaped, "Line A Line B");
50/// ```
51pub fn remove_newlines(str: impl AsRef<str>) -> String {
52    NEW_LINE_AND_SPACES.replace_all(str.as_ref(), " ").to_string()
53}
54
55/// Regex to match any non-allowed character on the flattened version
56static FLATTEN_KEEP_CHARS_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[^a-z0-9\s-]").unwrap());
57/// Regex to match consecutive whitespaces
58static FLATTEN_COLLAPSE_WHITESPACE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+").unwrap());
59
60/// Normalizes a string by performing ASCII transliteration and converting to lowercase.
61///
62/// This uses the [unidecode] crate to approximate non-ASCII characters with their closest ASCII equivalents, and then
63/// converts the entire string to lowercase. Then, remove any non-alphanumeric character and consecutive whitespaces,
64/// returning the trimmed string.
65///
66/// # Examples
67///
68/// ```rust
69/// # use intelli_shell::utils::flatten_str;
70/// let text = "Héllö Wörld! (-123) ";
71/// let flattened = flatten_str(text);
72/// assert_eq!(flattened, "hello world -123");
73/// ```
74pub fn flatten_str(s: impl AsRef<str>) -> String {
75    // Unidecode and lowercase
76    let decoded = unidecode(s.as_ref()).to_lowercase();
77
78    // Keep only alphanumeric characters and whitespace.
79    let flattened = FLATTEN_KEEP_CHARS_REGEX.replace_all(&decoded, "");
80
81    // Remove consecutive whitespaces
82    FLATTEN_COLLAPSE_WHITESPACE_REGEX
83        .replace_all(&flattened, " ")
84        .trim()
85        .to_string()
86}
87
88/// Normalizes a variable name string that may contain multiple segments separated by `|`.
89///
90/// Each segment is individually processed by [`flatten_str`].
91///
92/// After processing, any segments that become empty are removed. The remaining non-empty,
93/// flattened segments are then joined back together with `|`.
94///
95/// # Examples
96///
97/// ```rust
98/// # use intelli_shell::utils::flatten_variable;
99/// let variable = "  First Segment | SÉCOND Part |  | Last One! || ";
100/// let flattened = flatten_variable(variable);
101/// assert_eq!(flattened, "first segment|second part|last one");
102/// ```
103pub fn flatten_variable(variable: impl AsRef<str>) -> String {
104    variable
105        .as_ref()
106        .split('|')
107        .map(str::trim)
108        .map(flatten_str)
109        .filter(|s| !s.is_empty())
110        .join("|")
111}