gear_core/
str.rs

1// This file is part of Gear.
2
3// Copyright (C) 2023-2025 Gear Technologies Inc.
4// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this program. If not, see <https://www.gnu.org/licenses/>.
18
19//! String with limited length implementation
20
21use alloc::{borrow::Cow, string::String};
22use parity_scale_codec::{Decode, Encode};
23use scale_info::TypeInfo;
24
25/// Max amount of bytes allowed to be thrown as string explanation of the error.
26pub const TRIMMED_MAX_LEN: usize = 1024;
27
28fn smart_truncate(s: &mut String, max_bytes: usize) {
29    let mut last_byte = max_bytes;
30
31    if s.len() > last_byte {
32        while !s.is_char_boundary(last_byte) {
33            last_byte = last_byte.saturating_sub(1);
34        }
35
36        s.truncate(last_byte);
37    }
38}
39
40/// Wrapped string to fit [`TRIMMED_MAX_LEN`] amount of bytes.
41///
42/// The `Cow` is used to avoid allocating a new `String` when the `LimitedStr` is
43/// created from a `&str`.
44///
45/// Plain `str` is not used because it can't be properly encoded/decoded via scale codec.
46#[derive(
47    TypeInfo, Encode, Decode, Debug, Clone, derive_more::Display, PartialEq, Eq, PartialOrd, Ord,
48)]
49pub struct LimitedStr<'a>(Cow<'a, str>);
50
51impl<'a> LimitedStr<'a> {
52    const INIT_ERROR_MSG: &'static str = concat!(
53        "String must be less than ",
54        stringify!(TRIMMED_MAX_LEN),
55        " bytes."
56    );
57
58    /// Convert from `&str` in compile-time.
59    #[track_caller]
60    pub const fn from_small_str(s: &'a str) -> Self {
61        if s.len() > TRIMMED_MAX_LEN {
62            panic!("{}", Self::INIT_ERROR_MSG)
63        }
64
65        Self(Cow::Borrowed(s))
66    }
67
68    /// Return string slice.
69    pub fn as_str(&self) -> &str {
70        self.0.as_ref()
71    }
72
73    /// Return inner value.
74    pub fn into_inner(self) -> Cow<'a, str> {
75        self.0
76    }
77}
78
79/// The error type returned when a conversion from `&str` to [`LimitedStr`] fails.
80#[derive(Clone, Debug, derive_more::Display)]
81#[display("String must be less than {TRIMMED_MAX_LEN} bytes")]
82pub struct LimitedStrTryFromError;
83
84impl<'a> TryFrom<&'a str> for LimitedStr<'a> {
85    type Error = LimitedStrTryFromError;
86
87    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
88        if s.len() > TRIMMED_MAX_LEN {
89            return Err(LimitedStrTryFromError);
90        }
91
92        Ok(Self(Cow::from(s)))
93    }
94}
95
96impl From<String> for LimitedStr<'_> {
97    fn from(mut s: String) -> Self {
98        smart_truncate(&mut s, TRIMMED_MAX_LEN);
99        Self(Cow::from(s))
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use rand::{Rng, distributions::Standard};
107
108    fn assert_result(string: &'static str, max_bytes: usize, expectation: &'static str) {
109        let mut string = string.into();
110        smart_truncate(&mut string, max_bytes);
111        assert_eq!(string, expectation);
112    }
113
114    fn check_panicking(initial_string: &'static str, upper_boundary: usize) {
115        let initial_size = initial_string.len();
116
117        for max_bytes in 0..=upper_boundary {
118            let mut string = initial_string.into();
119            smart_truncate(&mut string, max_bytes);
120
121            // Extra check just for confidence.
122            if max_bytes >= initial_size {
123                assert_eq!(string, initial_string);
124            }
125        }
126    }
127
128    #[test]
129    fn truncate_test() {
130        // String for demonstration with UTF_8 encoding.
131        let utf_8 = "hello";
132        // Length in bytes.
133        assert_eq!(utf_8.len(), 5);
134        // Length in chars.
135        assert_eq!(utf_8.chars().count(), 5);
136
137        // Check that `smart_truncate` never panics.
138        //
139        // It calls the `smart_truncate` with `max_bytes` arg in 0..= len * 2.
140        check_panicking(utf_8, utf_8.len().saturating_mul(2));
141
142        // Asserting results.
143        assert_result(utf_8, 0, "");
144        assert_result(utf_8, 1, "h");
145        assert_result(utf_8, 2, "he");
146        assert_result(utf_8, 3, "hel");
147        assert_result(utf_8, 4, "hell");
148        assert_result(utf_8, 5, "hello");
149        assert_result(utf_8, 6, "hello");
150
151        // String for demonstration with CJK encoding.
152        let cjk = "你好吗";
153        // Length in bytes.
154        assert_eq!(cjk.len(), 9);
155        // Length in chars.
156        assert_eq!(cjk.chars().count(), 3);
157
158        // Check that `smart_truncate` never panics.
159        //
160        // It calls the `smart_truncate` with `max_bytes` arg in 0..= len * 2.
161        check_panicking(cjk, cjk.len().saturating_mul(2));
162
163        // Asserting results.
164        assert_result(cjk, 0, "");
165        assert_result(cjk, 1, "");
166        assert_result(cjk, 2, "");
167        assert_result(cjk, 3, "你");
168        assert_result(cjk, 4, "你");
169        assert_result(cjk, 5, "你");
170        assert_result(cjk, 6, "你好");
171        assert_result(cjk, 7, "你好");
172        assert_result(cjk, 8, "你好");
173        assert_result(cjk, 9, "你好吗");
174        assert_result(cjk, 10, "你好吗");
175
176        // String for demonstration with mixed CJK and UTF-8 encoding.
177        // Chaotic sum of "hello" and "你好吗".
178        // Length in bytes.
179        let mix = "你he好l吗lo";
180        assert_eq!(mix.len(), utf_8.len() + cjk.len());
181        assert_eq!(mix.len(), 14);
182        // Length in chars.
183        assert_eq!(
184            mix.chars().count(),
185            utf_8.chars().count() + cjk.chars().count()
186        );
187        assert_eq!(mix.chars().count(), 8);
188
189        // Check that `smart_truncate` never panics.
190        //
191        // It calls the `smart_truncate` with `max_bytes` arg in 0..= len * 2.
192        check_panicking(mix, mix.len().saturating_mul(2));
193
194        // Asserting results.
195        assert_result(mix, 0, "");
196        assert_result(mix, 1, "");
197        assert_result(mix, 2, "");
198        assert_result(mix, 3, "你");
199        assert_result(mix, 4, "你h");
200        assert_result(mix, 5, "你he");
201        assert_result(mix, 6, "你he");
202        assert_result(mix, 7, "你he");
203        assert_result(mix, 8, "你he好");
204        assert_result(mix, 9, "你he好l");
205        assert_result(mix, 10, "你he好l");
206        assert_result(mix, 11, "你he好l");
207        assert_result(mix, 12, "你he好l吗");
208        assert_result(mix, 13, "你he好l吗l");
209        assert_result(mix, 14, "你he好l吗lo");
210        assert_result(mix, 15, "你he好l吗lo");
211    }
212
213    #[test]
214    fn truncate_test_fuzz() {
215        for _ in 0..50 {
216            let mut thread_rng = rand::thread_rng();
217
218            let rand_len = thread_rng.gen_range(0..=100_000);
219            let max_bytes = thread_rng.gen_range(0..=rand_len);
220            let mut string = thread_rng
221                .sample_iter::<char, _>(Standard)
222                .take(rand_len)
223                .collect();
224
225            smart_truncate(&mut string, max_bytes);
226
227            if string.len() > max_bytes {
228                panic!("String '{}' input invalidated algorithms property", string);
229            }
230        }
231    }
232}