iceoryx2_bb_container/
semantic_string.rs

1// Copyright (c) 2023 Contributors to the Eclipse Foundation
2//
3// See the NOTICE file(s) distributed with this work for additional
4// information regarding copyright ownership.
5//
6// This program and the accompanying materials are made available under the
7// terms of the Apache Software License 2.0 which is available at
8// https://www.apache.org/licenses/LICENSE-2.0, or the MIT license
9// which is available at https://opensource.org/licenses/MIT.
10//
11// SPDX-License-Identifier: Apache-2.0 OR MIT
12
13//! The [`SemanticString`](crate::semantic_string::SemanticString) is a trait for
14//! [`FixedSizeByteString`](crate::byte_string::FixedSizeByteString) to create
15//! strong string types with semantic content contracts. They can be created
16//! with the help of the [`semantic_string`](crate::semantic_string!) macro.
17//!
18//! # Example, create a string that can contain a posix group name
19//!
20//! ```
21//! pub use iceoryx2_bb_container::semantic_string::SemanticString;
22//!
23//! use core::hash::{Hash, Hasher};
24//! use iceoryx2_bb_container::semantic_string;
25//!
26//! const GROUP_NAME_LENGTH: usize = 31;
27//! semantic_string! {
28//!   // Name of the type
29//!   name: GroupName,
30//!   // The underlying capacity of the FixedSizeByteString
31//!   capacity: GROUP_NAME_LENGTH,
32//!   // Callable that shall return true when the provided string contains invalid content
33//!   invalid_content: |string: &[u8]| {
34//!     if string.is_empty() {
35//!         // group names are not allowed to be empty
36//!         return true;
37//!     }
38//!
39//!     // group names are not allowed to start with a number or -
40//!     matches!(string[0], b'-' | b'0'..=b'9')
41//!   },
42//!   // Callable that shall return true when the provided string contains invalid characters
43//!   invalid_characters: |string: &[u8]| {
44//!     for value in string {
45//!         match value {
46//!             // only non-capital letters, numbers and - is allowed
47//!             b'a'..=b'z' | b'0'..=b'9' | b'-' => (),
48//!             _ => return true,
49//!         }
50//!     }
51//!
52//!     false
53//!   },
54//!   // When a SemanticString has multiple representations of the same semantic content, this
55//!   // callable shall convert the content to a uniform representation.
56//!   // Example: The path to `/tmp` can be also expressed as `/tmp/` or `////tmp////`
57//!   normalize: |this: &GroupName| {
58//!       *this
59//!   }
60//! }
61//! ```
62
63use crate::byte_string::FixedSizeByteStringModificationError;
64use crate::byte_string::{as_escaped_string, strnlen, FixedSizeByteString};
65use iceoryx2_bb_log::fail;
66use std::fmt::{Debug, Display};
67use std::hash::Hash;
68use std::ops::Deref;
69
70/// Failures that can occur when a [`SemanticString`] is created or modified
71#[derive(Debug, Clone, Copy, Eq, PartialEq)]
72pub enum SemanticStringError {
73    /// The modification would lead to a [`SemanticString`] with invalid content.
74    InvalidContent,
75    /// The added content would exceed the maximum capacity of the [`SemanticString`]
76    ExceedsMaximumLength,
77}
78
79impl From<FixedSizeByteStringModificationError> for SemanticStringError {
80    fn from(_value: FixedSizeByteStringModificationError) -> Self {
81        SemanticStringError::ExceedsMaximumLength
82    }
83}
84
85impl std::fmt::Display for SemanticStringError {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        std::write!(f, "SemanticStringError::{:?}", self)
88    }
89}
90
91impl std::error::Error for SemanticStringError {}
92
93#[doc(hidden)]
94pub mod internal {
95    use super::*;
96
97    pub trait SemanticStringAccessor<const CAPACITY: usize> {
98        unsafe fn new_empty() -> Self;
99        unsafe fn get_mut_string(&mut self) -> &mut FixedSizeByteString<CAPACITY>;
100        fn is_invalid_content(string: &[u8]) -> bool;
101        fn does_contain_invalid_characters(string: &[u8]) -> bool;
102    }
103}
104
105/// Trait that defines the methods a [`FixedSizeByteString`] with context semantics, a
106/// [`SemanticString`] shares. A new [`SemanticString`] can be created with the [`crate::semantic_string!`]
107/// macro. For the usage, see [`mod@crate::semantic_string`].
108pub trait SemanticString<const CAPACITY: usize>:
109    internal::SemanticStringAccessor<CAPACITY>
110    + Debug
111    + Display
112    + Sized
113    + Deref<Target = [u8]>
114    + PartialEq
115    + Eq
116    + Hash
117{
118    /// Returns a reference to the underlying [`FixedSizeByteString`]
119    fn as_string(&self) -> &FixedSizeByteString<CAPACITY>;
120
121    /// Creates a new content. If it contains invalid characters or exceeds the maximum supported
122    /// length of the system or contains illegal strings it fails.
123    fn new(value: &[u8]) -> Result<Self, SemanticStringError> {
124        let msg = "Unable to create SemanticString";
125        let origin = "SemanticString::new()";
126
127        let mut new_self =
128            unsafe { <Self as internal::SemanticStringAccessor<CAPACITY>>::new_empty() };
129        fail!(from origin, when new_self.push_bytes(value),
130            "{} due to an invalid value \"{}\".", msg, as_escaped_string(value));
131
132        Ok(new_self)
133    }
134
135    /// Creates a new content but does not verify that it does not contain invalid characters.
136    ///
137    /// # Safety
138    ///
139    ///   * The slice must contain only valid characters.
140    ///   * The slice must have a length that is less or equal CAPACITY
141    ///   * The slice must not contain invalid UTF-8 characters
142    ///
143    unsafe fn new_unchecked(bytes: &[u8]) -> Self;
144
145    /// Creates a new content from a given ptr. The user has to ensure that it is null-terminated.
146    ///
147    /// # Safety
148    ///
149    ///   * The pointer must be '\0' (null) terminated
150    ///   * The pointer must be valid and non-null
151    ///   * The contents must have a length that is less or equal CAPACITY
152    ///   * The contents must not contain invalid UTF-8 characters
153    ///
154    unsafe fn from_c_str(ptr: *const std::ffi::c_char) -> Result<Self, SemanticStringError> {
155        Self::new(std::slice::from_raw_parts(
156            ptr.cast(),
157            strnlen(ptr, CAPACITY + 1),
158        ))
159    }
160
161    /// Returns the contents as a slice
162    fn as_bytes(&self) -> &[u8] {
163        self.as_string().as_bytes()
164    }
165
166    /// Returns a zero terminated slice of the underlying bytes
167    fn as_c_str(&self) -> *const std::ffi::c_char {
168        self.as_string().as_c_str()
169    }
170
171    /// Returns the capacity of the file system type
172    fn capacity(&self) -> usize {
173        self.as_string().capacity()
174    }
175
176    /// Finds the first occurrence of a  byte string in the given string. If the byte string was
177    /// found the start position of the byte string is returned, otherwise [`None`].
178    fn find(&self, bytes: &[u8]) -> Option<usize> {
179        self.as_string().find(bytes)
180    }
181
182    /// Finds the last occurrence of a byte string in the given string. If the byte string was
183    /// found the start position of the byte string is returned, otherwise [`None`].
184    fn rfind(&self, bytes: &[u8]) -> Option<usize> {
185        self.as_string().find(bytes)
186    }
187
188    /// Returns true when the string is full, otherwise false
189    fn is_full(&self) -> bool {
190        self.as_string().is_full()
191    }
192
193    /// Returns true when the string is empty, otherwise false
194    fn is_empty(&self) -> bool {
195        self.as_string().is_empty()
196    }
197
198    /// Returns the length of the string
199    fn len(&self) -> usize {
200        self.as_string().len()
201    }
202
203    /// Inserts a single byte at a specific position. When the capacity is exceeded, the byte is an
204    /// illegal character or the content would result in an illegal content it fails.
205    fn insert(&mut self, idx: usize, byte: u8) -> Result<(), SemanticStringError> {
206        self.insert_bytes(idx, &[byte; 1])
207    }
208
209    /// Inserts a byte slice at a specific position. When the capacity is exceeded, the byte slice contains
210    /// illegal characters or the content would result in an illegal content it fails.
211    fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) -> Result<(), SemanticStringError> {
212        let msg = "Unable to insert byte string";
213        fail!(from self, when unsafe { self.get_mut_string().insert_bytes(idx, bytes) },
214                with SemanticStringError::ExceedsMaximumLength,
215                    "{} \"{}\" since it would exceed the maximum allowed length of {}.",
216                        msg, as_escaped_string(bytes), CAPACITY);
217
218        if Self::is_invalid_content(self.as_bytes()) {
219            unsafe { self.get_mut_string().remove_range(idx, bytes.len()) };
220            fail!(from self, with SemanticStringError::InvalidContent,
221                "{} \"{}\" since it would result in an illegal content.",
222                msg, as_escaped_string(bytes));
223        }
224
225        Ok(())
226    }
227
228    /// Adds bytes to the string without checking if they only contain valid characters or
229    /// would result in a valid result.
230    ///
231    /// # Safety
232    ///
233    ///   * The user must ensure that the bytes contain only valid characters.
234    ///   * The user must ensure that the result, after the bytes were added, is valid.
235    ///   * The slice must have a length that is less or equal CAPACITY
236    ///   * The slice is not contain invalid UTF-8 characters
237    ///
238    unsafe fn insert_bytes_unchecked(&mut self, idx: usize, bytes: &[u8]);
239
240    /// Normalizes the string. This function is used as basis for [`core::hash::Hash`] and
241    /// [`PartialEq`]. Normalizing a [`SemanticString`] means to bring it to some format so that it
242    /// contains still the same semantic content but in an uniform way so that strings, with the
243    /// same semantic content but different representation compare as equal.
244    fn normalize(&self) -> Self;
245
246    /// Removes the last character. If the string is empty it returns [`None`].
247    /// If the removal would create an illegal content it fails.
248    fn pop(&mut self) -> Result<Option<u8>, SemanticStringError> {
249        if self.len() == 0 {
250            return Ok(None);
251        }
252
253        Ok(Some(self.remove(self.len() - 1)?))
254    }
255
256    /// Adds a single byte at the end. When the capacity is exceeded, the byte is an
257    /// illegal character or the content would result in an illegal content it fails.
258    fn push(&mut self, byte: u8) -> Result<(), SemanticStringError> {
259        self.insert(self.len(), byte)
260    }
261
262    /// Adds a byte slice at the end. When the capacity is exceeded, the byte slice contains
263    /// illegal characters or the content would result in an illegal content it fails.
264    fn push_bytes(&mut self, bytes: &[u8]) -> Result<(), SemanticStringError> {
265        self.insert_bytes(self.len(), bytes)
266    }
267
268    /// Removes a byte at a specific position and returns it.
269    /// If the removal would create an illegal content it fails.
270    fn remove(&mut self, idx: usize) -> Result<u8, SemanticStringError> {
271        let value = unsafe { self.get_mut_string().remove(idx) };
272
273        if Self::is_invalid_content(self.as_bytes()) {
274            unsafe { self.get_mut_string().insert(idx, value).unwrap() };
275            fail!(from self, with SemanticStringError::InvalidContent,
276                "Unable to remove character at position {} since it would result in an illegal content.",
277                idx);
278        }
279
280        Ok(value)
281    }
282
283    /// Removes a range.
284    /// If the removal would create an illegal content it fails.
285    fn remove_range(&mut self, idx: usize, len: usize) -> Result<(), SemanticStringError> {
286        let mut temp = *self.as_string();
287        temp.remove_range(idx, len);
288        if Self::is_invalid_content(temp.as_bytes()) {
289            fail!(from self, with SemanticStringError::InvalidContent,
290                "Unable to remove range from {} with lenght {} since it would result in the illegal content \"{}\".",
291                    idx, len, temp);
292        }
293
294        unsafe { self.get_mut_string().remove_range(idx, len) };
295        Ok(())
296    }
297
298    /// Removes all bytes which satisfy the provided clojure f.
299    /// If the removal would create an illegal content it fails.
300    fn retain<F: FnMut(u8) -> bool>(&mut self, f: F) -> Result<(), SemanticStringError> {
301        let mut temp = *self.as_string();
302        let f = temp.retain_impl(f);
303
304        if Self::is_invalid_content(temp.as_bytes()) {
305            fail!(from self, with SemanticStringError::InvalidContent,
306                "Unable to retain characters from string since it would result in the illegal content \"{}\".",
307                temp);
308        }
309
310        unsafe { self.get_mut_string().retain(f) };
311        Ok(())
312    }
313
314    /// Removes a prefix. If the prefix does not exist it returns false. If the removal would lead
315    /// to an invalid string content it fails and returns [`SemanticStringError::InvalidContent`].
316    /// After a successful removal it returns true.
317    fn strip_prefix(&mut self, bytes: &[u8]) -> Result<bool, SemanticStringError> {
318        let mut temp = *self.as_string();
319        if !temp.strip_prefix(bytes) {
320            return Ok(false);
321        }
322
323        if Self::is_invalid_content(temp.as_bytes()) {
324            let mut prefix = FixedSizeByteString::<123>::new();
325            unsafe { prefix.insert_bytes_unchecked(0, bytes) };
326            fail!(from self, with SemanticStringError::InvalidContent,
327                "Unable to strip prefix \"{}\" from string since it would result in the illegal content \"{}\".",
328                prefix, temp);
329        }
330
331        unsafe { self.get_mut_string().strip_prefix(bytes) };
332
333        Ok(true)
334    }
335
336    /// Removes a suffix. If the suffix does not exist it returns false. If the removal would lead
337    /// to an invalid string content it fails and returns [`SemanticStringError::InvalidContent`].
338    /// After a successful removal it returns true.
339    fn strip_suffix(&mut self, bytes: &[u8]) -> Result<bool, SemanticStringError> {
340        let mut temp = *self.as_string();
341        if !temp.strip_suffix(bytes) {
342            return Ok(false);
343        }
344
345        if Self::is_invalid_content(temp.as_bytes()) {
346            let mut prefix = FixedSizeByteString::<123>::new();
347            unsafe { prefix.insert_bytes_unchecked(0, bytes) };
348            fail!(from self, with SemanticStringError::InvalidContent,
349                "Unable to strip prefix \"{}\" from string since it would result in the illegal content \"{}\".",
350                prefix, temp);
351        }
352
353        unsafe { self.get_mut_string().strip_suffix(bytes) };
354
355        Ok(true)
356    }
357
358    /// Truncates the string to new_len.
359    fn truncate(&mut self, new_len: usize) -> Result<(), SemanticStringError> {
360        let mut temp = *self.as_string();
361        temp.truncate(new_len);
362
363        if Self::is_invalid_content(temp.as_bytes()) {
364            fail!(from self, with SemanticStringError::InvalidContent,
365                "Unable to truncate characters to {} since it would result in the illegal content \"{}\".",
366                new_len, temp);
367        }
368
369        unsafe { self.get_mut_string().truncate(new_len) };
370        Ok(())
371    }
372}
373
374/// Helper macro to create a new [`SemanticString`]. Usage example can be found here:
375/// [`mod@crate::semantic_string`].
376#[macro_export(local_inner_macros)]
377macro_rules! semantic_string {
378    {$(#[$documentation:meta])*
379     /// Name of the struct
380     name: $string_name:ident,
381     /// Capacity of the underlying FixedSizeByteString
382     capacity: $capacity:expr,
383     /// Callable that gets a [`&[u8]`] as input and shall return true when the slice contains
384     /// invalid content.
385     invalid_content: $invalid_content:expr,
386     /// Callable that gets a [`&[u8]`] as input and shall return true when the slice contains
387     /// invalid characters.
388     invalid_characters: $invalid_characters:expr,
389     /// Normalizes the content. Required when the same semantical content has multiple
390     /// representations like paths for instance (`/tmp` == `/tmp/`)
391     normalize: $normalize:expr} => {
392        $(#[$documentation])*
393        #[derive(Debug, Clone, Copy, Eq)]
394        pub struct $string_name {
395            value: iceoryx2_bb_container::byte_string::FixedSizeByteString<$capacity>
396        }
397
398        // BEGIN: serde
399        pub(crate) mod VisitorType {
400            pub(crate) struct $string_name;
401        }
402
403        impl<'de> serde::de::Visitor<'de> for VisitorType::$string_name {
404            type Value = $string_name;
405
406            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
407                formatter.write_str("a string containing the service name")
408            }
409
410            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
411            where
412                E: serde::de::Error,
413            {
414                match $string_name::new(v.as_bytes()) {
415                    Ok(v) => Ok(v),
416                    Err(v) => Err(E::custom(std::format!("invalid {} provided {:?}.", std::stringify!($string_name), v))),
417                }
418            }
419        }
420
421        impl<'de> serde::Deserialize<'de> for $string_name {
422            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
423            where
424                D: serde::Deserializer<'de>,
425            {
426                deserializer.deserialize_str(VisitorType::$string_name)
427            }
428        }
429
430        impl serde::Serialize for $string_name {
431            fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
432            where
433                S: serde::Serializer,
434            {
435                serializer.serialize_str(std::str::from_utf8(self.as_bytes()).unwrap())
436            }
437        }
438        // END: serde
439
440        impl iceoryx2_bb_container::semantic_string::SemanticString<$capacity> for $string_name {
441            fn as_string(&self) -> &iceoryx2_bb_container::byte_string::FixedSizeByteString<$capacity> {
442                &self.value
443            }
444
445            fn normalize(&self) -> Self {
446                $normalize(self)
447            }
448
449            unsafe fn new_unchecked(bytes: &[u8]) -> Self {
450                Self {
451                    value: iceoryx2_bb_container::byte_string::FixedSizeByteString::new_unchecked(bytes),
452                }
453            }
454
455            unsafe fn insert_bytes_unchecked(&mut self, idx: usize, bytes: &[u8]) {
456                self.value.insert_bytes_unchecked(idx, bytes);
457            }
458        }
459
460        impl $string_name {
461            /// Returns the maximum length of [`$string`]
462            pub const fn max_len() -> usize {
463                $capacity
464            }
465        }
466
467        impl std::fmt::Display for $string_name {
468            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
469                std::write!(f, "{}", self.value)
470            }
471        }
472
473        impl Hash for $string_name {
474            fn hash<H: Hasher>(&self, state: &mut H) {
475                self.normalize().as_bytes().hash(state)
476            }
477        }
478
479        impl From<$string_name> for String {
480            fn from(value: $string_name) -> String {
481                // SAFETY: It is ensured that the semantic string contains only valid utf-8 strings
482                unsafe { String::from_utf8_unchecked(value.as_bytes().to_vec()) }
483            }
484        }
485
486        impl From<&$string_name> for String {
487            fn from(value: &$string_name) -> String {
488                // SAFETY: It is ensured that the semantic string contains only valid utf-8 strings
489                unsafe { String::from_utf8_unchecked(value.as_bytes().to_vec()) }
490            }
491        }
492
493        impl std::convert::TryFrom<&str> for $string_name {
494            type Error = iceoryx2_bb_container::semantic_string::SemanticStringError;
495
496            fn try_from(value: &str) -> Result<Self, Self::Error> {
497                Self::new(value.as_bytes())
498            }
499        }
500
501        impl PartialEq<$string_name> for $string_name {
502            fn eq(&self, other: &$string_name) -> bool {
503                *self.normalize().as_bytes() == *other.normalize().as_bytes()
504            }
505        }
506
507        impl PartialEq<&[u8]> for $string_name {
508            fn eq(&self, other: &&[u8]) -> bool {
509                let other = match $string_name::new(other) {
510                    Ok(other) => other,
511                    Err(_) => return false,
512                };
513
514                *self == other
515            }
516        }
517
518        impl PartialEq<&[u8]> for &$string_name {
519            fn eq(&self, other: &&[u8]) -> bool {
520                let other = match $string_name::new(other) {
521                    Ok(other) => other,
522                    Err(_) => return false,
523                };
524
525                **self == other
526            }
527        }
528
529        impl<const CAPACITY: usize> PartialEq<[u8; CAPACITY]> for $string_name {
530            fn eq(&self, other: &[u8; CAPACITY]) -> bool {
531                let other = match $string_name::new(other) {
532                    Ok(other) => other,
533                    Err(_) => return false,
534                };
535
536                *self == other
537            }
538        }
539
540        impl<const CAPACITY: usize> PartialEq<&[u8; CAPACITY]> for $string_name {
541            fn eq(&self, other: &&[u8; CAPACITY]) -> bool {
542                let other = match $string_name::new(*other) {
543                    Ok(other) => other,
544                    Err(_) => return false,
545                };
546
547                *self == other
548            }
549        }
550
551        impl std::ops::Deref for $string_name {
552            type Target = [u8];
553
554            fn deref(&self) -> &Self::Target {
555                self.value.as_bytes()
556            }
557        }
558
559        impl iceoryx2_bb_container::semantic_string::internal::SemanticStringAccessor<$capacity> for $string_name {
560            unsafe fn new_empty() -> Self {
561                Self {
562                    value: iceoryx2_bb_container::byte_string::FixedSizeByteString::new(),
563                }
564            }
565
566            unsafe fn get_mut_string(&mut self) -> &mut iceoryx2_bb_container::byte_string::FixedSizeByteString<$capacity> {
567                &mut self.value
568            }
569
570            fn is_invalid_content(string: &[u8]) -> bool {
571                if Self::does_contain_invalid_characters(string) {
572                    return true;
573                }
574
575                $invalid_content(string)
576            }
577
578            fn does_contain_invalid_characters(string: &[u8]) -> bool {
579                if core::str::from_utf8(string).is_err() {
580                    return true;
581                }
582
583                $invalid_characters(string)
584            }
585        }
586
587    };
588}