gear_core/limited/
str.rs

1// This file is part of Gear.
2
3// Copyright (C) 2023-2025 Gear Technologies Inc.
4// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5
6// This program is free software: you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation, either version 3 of the License, or
9// (at your option) any later version.
10
11// This program is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this program. If not, see <https://www.gnu.org/licenses/>.
18
19//! This module provides type for string with limited length.
20
21use alloc::{borrow::Cow, string::String};
22use derive_more::{AsRef, Deref, Display, Into};
23use parity_scale_codec::{Decode, Encode};
24use scale_decode::DecodeAsType;
25use scale_encode::EncodeAsType;
26use scale_info::TypeInfo;
27
28/// Wrapped string to fit given amount of bytes.
29///
30/// The [`Cow`] is used to avoid allocating a new `String` when
31/// the `LimitedStr` is created from a `&str`.
32///
33/// Plain [`str`] is not used because it can't be properly
34/// encoded/decoded via scale codec.
35#[derive(
36    Debug,
37    Display,
38    Clone,
39    Default,
40    PartialEq,
41    Eq,
42    PartialOrd,
43    Ord,
44    Decode,
45    DecodeAsType,
46    Encode,
47    EncodeAsType,
48    Hash,
49    TypeInfo,
50    AsRef,
51    Deref,
52    Into,
53)]
54#[as_ref(forward)]
55#[deref(forward)]
56pub struct LimitedStr<'a, const N: usize = 1024>(Cow<'a, str>);
57
58/// Finds the left-nearest UTF-8 character boundary
59/// to given position in the string.
60fn nearest_char_boundary(s: &str, pos: usize) -> usize {
61    (0..=pos.min(s.len()))
62        .rev()
63        .find(|&pos| s.is_char_boundary(pos))
64        .unwrap_or(0)
65}
66
67impl<'a, const N: usize> LimitedStr<'a, N> {
68    /// Maximum length of the string.
69    pub const MAX_LEN: usize = N;
70
71    /// Constructs a limited string from a string.
72    ///
73    /// Checks the size of the string.
74    pub fn try_new<S: Into<Cow<'a, str>>>(s: S) -> Result<Self, LimitedStrError> {
75        let s = s.into();
76
77        if s.len() > Self::MAX_LEN {
78            Err(LimitedStrError)
79        } else {
80            Ok(Self(s))
81        }
82    }
83
84    /// Constructs a limited string from a string
85    /// truncating it if it's too long.
86    pub fn truncated<S: Into<Cow<'a, str>>>(s: S) -> Self {
87        let s = s.into();
88        let truncation_pos = nearest_char_boundary(&s, Self::MAX_LEN);
89
90        match s {
91            Cow::Borrowed(s) => Self(s[..truncation_pos].into()),
92            Cow::Owned(mut s) => {
93                s.truncate(truncation_pos);
94                Self(s.into())
95            }
96        }
97    }
98
99    /// Constructs a limited string from a static
100    /// string literal small enough to fit the limit.
101    ///
102    /// Should be used only with static string literals.
103    /// In that case it can check the string length
104    /// in compile time.
105    ///
106    /// # Panics
107    ///
108    /// Can panic in runtime if the passed string is
109    /// not a static string literal and is too long.
110    #[track_caller]
111    pub const fn from_small_str(s: &'static str) -> Self {
112        if s.len() > Self::MAX_LEN {
113            panic!("{}", LimitedStrError::MESSAGE)
114        }
115
116        Self(Cow::Borrowed(s))
117    }
118
119    /// Return string slice.
120    pub fn as_str(&self) -> &str {
121        self.as_ref()
122    }
123
124    /// Return inner value.
125    pub fn into_inner(self) -> Cow<'a, str> {
126        self.0
127    }
128}
129
130impl<'a> TryFrom<&'a str> for LimitedStr<'a> {
131    type Error = LimitedStrError;
132
133    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
134        Self::try_new(value)
135    }
136}
137
138impl<'a> TryFrom<String> for LimitedStr<'a> {
139    type Error = LimitedStrError;
140
141    fn try_from(value: String) -> Result<Self, Self::Error> {
142        Self::try_new(value)
143    }
144}
145
146/// The error type returned when a conversion from `&str` to [`LimitedStr`] fails.
147#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Display)]
148#[display("{}", Self::MESSAGE)]
149pub struct LimitedStrError;
150
151impl LimitedStrError {
152    /// Static error message.
153    pub const MESSAGE: &str = "string length limit is exceeded";
154
155    /// Converts the error into a static error message.
156    pub const fn as_str(&self) -> &'static str {
157        Self::MESSAGE
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use rand::{Rng, distributions::Standard};
165
166    fn assert_result(string: &'static str, max_bytes: usize, expectation: &'static str) {
167        let string = &string[..nearest_char_boundary(string, max_bytes)];
168        assert_eq!(string, expectation);
169    }
170
171    fn check_panicking(initial_string: &'static str, upper_boundary: usize) {
172        let initial_size = initial_string.len();
173
174        for max_bytes in 0..=upper_boundary {
175            let string = &initial_string[..nearest_char_boundary(initial_string, max_bytes)];
176
177            // Extra check just for confidence.
178            if max_bytes >= initial_size {
179                assert_eq!(string, initial_string);
180            }
181        }
182    }
183
184    #[test]
185    fn truncate_test() {
186        // String for demonstration with UTF_8 encoding.
187        let utf_8 = "hello";
188        // Length in bytes.
189        assert_eq!(utf_8.len(), 5);
190        // Length in chars.
191        assert_eq!(utf_8.chars().count(), 5);
192
193        // Check that `smart_truncate` never panics.
194        //
195        // It calls the `smart_truncate` with `max_bytes` arg in 0..= len * 2.
196        check_panicking(utf_8, utf_8.len().saturating_mul(2));
197
198        // Asserting results.
199        assert_result(utf_8, 0, "");
200        assert_result(utf_8, 1, "h");
201        assert_result(utf_8, 2, "he");
202        assert_result(utf_8, 3, "hel");
203        assert_result(utf_8, 4, "hell");
204        assert_result(utf_8, 5, "hello");
205        assert_result(utf_8, 6, "hello");
206
207        // String for demonstration with CJK encoding.
208        let cjk = "你好吗";
209        // Length in bytes.
210        assert_eq!(cjk.len(), 9);
211        // Length in chars.
212        assert_eq!(cjk.chars().count(), 3);
213        // Byte length of each char.
214        assert!(cjk.chars().all(|c| c.len_utf8() == 3));
215
216        // Check that `smart_truncate` never panics.
217        //
218        // It calls the `smart_truncate` with `max_bytes` arg in 0..= len * 2.
219        check_panicking(cjk, cjk.len().saturating_mul(2));
220
221        // Asserting results.
222        assert_result(cjk, 0, "");
223        assert_result(cjk, 1, "");
224        assert_result(cjk, 2, "");
225        assert_result(cjk, 3, "你");
226        assert_result(cjk, 4, "你");
227        assert_result(cjk, 5, "你");
228        assert_result(cjk, 6, "你好");
229        assert_result(cjk, 7, "你好");
230        assert_result(cjk, 8, "你好");
231        assert_result(cjk, 9, "你好吗");
232        assert_result(cjk, 10, "你好吗");
233
234        // String for demonstration with mixed CJK and UTF-8 encoding.
235        // Chaotic sum of "hello" and "你好吗".
236        // Length in bytes.
237        let mix = "你he好l吗lo";
238        assert_eq!(mix.len(), utf_8.len() + cjk.len());
239        assert_eq!(mix.len(), 14);
240        // Length in chars.
241        assert_eq!(
242            mix.chars().count(),
243            utf_8.chars().count() + cjk.chars().count()
244        );
245        assert_eq!(mix.chars().count(), 8);
246
247        // Check that `smart_truncate` never panics.
248        //
249        // It calls the `smart_truncate` with `max_bytes` arg in 0..= len * 2.
250        check_panicking(mix, mix.len().saturating_mul(2));
251
252        // Asserting results.
253        assert_result(mix, 0, "");
254        assert_result(mix, 1, "");
255        assert_result(mix, 2, "");
256        assert_result(mix, 3, "你");
257        assert_result(mix, 4, "你h");
258        assert_result(mix, 5, "你he");
259        assert_result(mix, 6, "你he");
260        assert_result(mix, 7, "你he");
261        assert_result(mix, 8, "你he好");
262        assert_result(mix, 9, "你he好l");
263        assert_result(mix, 10, "你he好l");
264        assert_result(mix, 11, "你he好l");
265        assert_result(mix, 12, "你he好l吗");
266        assert_result(mix, 13, "你he好l吗l");
267        assert_result(mix, 14, "你he好l吗lo");
268        assert_result(mix, 15, "你he好l吗lo");
269
270        assert_eq!(LimitedStr::<1>::truncated(String::from(mix)).as_str(), "");
271        assert_eq!(LimitedStr::<5>::truncated(mix).as_str(), "你he");
272        assert_eq!(
273            LimitedStr::<9>::truncated(String::from(mix)).as_str(),
274            "你he好l"
275        );
276        assert_eq!(LimitedStr::<13>::truncated(mix).as_str(), "你he好l吗l");
277    }
278
279    #[test]
280    fn truncate_test_fuzz() {
281        for _ in 0..50 {
282            let mut thread_rng = rand::thread_rng();
283
284            let rand_len = thread_rng.gen_range(0..=100_000);
285            let max_bytes = thread_rng.gen_range(0..=rand_len);
286            let mut string = thread_rng
287                .sample_iter::<char, _>(Standard)
288                .take(rand_len)
289                .collect::<String>();
290            string.truncate(nearest_char_boundary(&string, max_bytes));
291
292            if string.len() > max_bytes {
293                panic!("String '{}' input invalidated algorithms property", string);
294            }
295        }
296    }
297}