r3bl_rs_utils_core/
calc_str_len.rs

1/*
2 *   Copyright (c) 2024 R3BL LLC
3 *   All rights reserved.
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *   http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18use std::collections::{hash_map::Entry, HashMap};
19
20use sha2::{Digest, Sha256};
21use unicode_width::UnicodeWidthStr;
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum StringLength {
25    StripAnsi,
26    Unicode,
27}
28
29pub type MemoizedLenMap = HashMap<String, u16>;
30
31mod to_from_string_impl {
32    use super::*;
33
34    impl std::str::FromStr for StringLength {
35        type Err = String;
36
37        fn from_str(s: &str) -> Result<Self, Self::Err> {
38            match s {
39                "strip_ansi" => Ok(Self::StripAnsi),
40                "unicode" => Ok(Self::Unicode),
41                _ => Err(format!("Invalid StringLength variant: {}", s)),
42            }
43        }
44    }
45
46    impl std::fmt::Display for StringLength {
47        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48            match self {
49                Self::StripAnsi => write!(f, "strip_ansi"),
50                Self::Unicode => write!(f, "unicode"),
51            }
52        }
53    }
54}
55
56impl StringLength {
57    /// If the input can't be found in the memoized map, calculate the length and store
58    /// it. Otherwise return the stored length.
59    ///
60    /// # Memoization
61    ///
62    /// The key is the [String] that needs to be measured using the variants. The value is
63    /// the length.
64    ///
65    /// # Speedup, even for small strings
66    ///
67    /// | Variant                   | Cached | Speedup |
68    /// |---------------------------|--------|---------|
69    /// | [StringLength::Unicode]   | No     | None    |
70    /// | [StringLength::StripAnsi] | Yes    | 70x     |
71    ///
72    /* cspell:disable-next-line  */
73    /// Eg: For input: `"\u{1b}[31mfoo\u{1b}[0m";` on a 13th Gen Intel® Core™ i5-13600K
74    /// machine with 64GB of RAM running Ubuntu 24.04, the execution times are:
75    /// - Uncached time is 700µs.
76    /// - Cached time is 10µs.
77    pub fn calculate(&self, input: &str, memoized_len_map: &mut MemoizedLenMap) -> u16 {
78        match self {
79            // Do not memoize (slower to do this).
80            StringLength::Unicode => UnicodeWidthStr::width(input) as u16,
81
82            // Memoize (faster to do this).
83            StringLength::StripAnsi => match memoized_len_map.entry(input.to_string()) {
84                Entry::Occupied(entry) => *entry.get(),
85                Entry::Vacant(entry) => {
86                    let stripped_input = strip_ansi::strip_ansi(input);
87                    let stripped_input: &str = stripped_input.as_ref();
88                    let length = UnicodeWidthStr::width(stripped_input) as u16;
89                    entry.insert(length);
90                    length
91                }
92            },
93        }
94    }
95
96    /// [SHA256](sha2) produces a 256-bit (32-byte) hash value, typically rendered as a
97    /// hexadecimal number. However, here we are converting it to a u32. Here's an example
98    /// of how long it takes to run on `foo`: 25.695µs. To provide some perspective of how
99    /// long this is, it takes about the same time to run [StringLength::Unicode] on the
100    /// same input, on a 13th Gen Intel® Core™ i5-13600K machine with 64GB of RAM running
101    /// Ubuntu 24.04.
102    pub fn calculate_sha256(text: &str) -> u32 {
103        let mut hasher = Sha256::new();
104        hasher.update(text);
105        let result = hasher.finalize();
106        let mut bytes = [0u8; 4];
107        bytes.copy_from_slice(&result.as_slice()[..4]);
108        u32::from_le_bytes(bytes)
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115    use crate::timed;
116
117    #[test]
118    fn test_sha256() {
119        let input = "foo";
120        let (hash, duration) = timed!({
121            let hash = StringLength::calculate_sha256(input);
122            assert_eq!(hash, 1806968364);
123            hash
124        });
125        println!("Execution time - string_length(Sha256): {:?}", duration);
126        assert_eq!(hash, 1806968364);
127    }
128
129    #[test]
130    fn test_strip_ansi_esc_seq_len_cache_speedup() {
131        /* cspell: disable-next-line */
132        let input = "\u{1b}[31mfoo\u{1b}[0m";
133        let memoized_len_map = &mut MemoizedLenMap::new();
134
135        assert!(!memoized_len_map.contains_key(input));
136
137        let (_, duration_uncached) = timed!({
138            let len = StringLength::StripAnsi.calculate(input, memoized_len_map);
139            assert_eq!(len, 3);
140            assert!(memoized_len_map.contains_key(input));
141        });
142        println!(
143            "Execution time - U string_length(StripAnsi): {:?}",
144            duration_uncached
145        );
146
147        let (_, duration_cached) = timed!({
148            let len = StringLength::StripAnsi.calculate(input, memoized_len_map);
149            assert_eq!(len, 3);
150            assert!(memoized_len_map.contains_key(input));
151        });
152        println!(
153            "Execution time - C string_length(StripAnsi): {:?}",
154            duration_cached
155        );
156    }
157
158    #[test]
159    fn test_unicode_string_len_no_cache() {
160        let input = "foo";
161        let memoized_len_map = &mut MemoizedLenMap::new();
162
163        assert!(!memoized_len_map.contains_key(input));
164
165        let (_, duration_uncached) = timed!({
166            let len = StringLength::Unicode.calculate(input, memoized_len_map);
167            assert_eq!(len, 3);
168            assert!(!memoized_len_map.contains_key(input));
169        });
170        println!(
171            "Execution time - U string_length(Unicode): {:?}",
172            duration_uncached
173        );
174    }
175}