Skip to main content

rasn/types/strings/
constrained.rs

1use core::fmt;
2
3use alloc::collections::BTreeMap;
4use num_traits::{AsPrimitive, FromPrimitive, PrimInt, ToPrimitive, Unsigned};
5
6use crate::error::strings::{InvalidRestrictedString, PermittedAlphabetError};
7use alloc::{boxed::Box, vec::Vec};
8use bitvec::prelude::*;
9
10use crate::types;
11pub(crate) enum CharacterSetName {
12    Bmp,
13    General,
14    Graphic,
15    IA5,
16    Numeric,
17    Printable,
18    Teletex,
19    Visible,
20}
21impl fmt::Display for CharacterSetName {
22    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23        match self {
24            Self::Bmp => write!(f, "BMPString"),
25            Self::General => write!(f, "GeneralString"),
26            Self::Graphic => write!(f, "GraphicString"),
27            Self::IA5 => write!(f, "IA5String"),
28            Self::Numeric => write!(f, "NumericString"),
29            Self::Printable => write!(f, "PrintableString"),
30            Self::Teletex => write!(f, "TeletexString"),
31            Self::Visible => write!(f, "VisibleString"),
32        }
33    }
34}
35
36pub(crate) trait StaticPermittedAlphabet: Sized + Default {
37    type T: PrimInt
38        + Unsigned
39        + ToPrimitive
40        + FromPrimitive
41        + AsPrimitive<u8>
42        + AsPrimitive<u16>
43        + AsPrimitive<u32>;
44    const CHARACTER_SET: &'static [u32];
45    /// Bits needed to represent a character in the character set so that every character can be represented
46    /// Encoding specific requirement
47    const CHARACTER_SET_WIDTH: usize = crate::num::log2(Self::CHARACTER_SET.len() as i128) as usize;
48    const CHARACTER_SET_NAME: CharacterSetName;
49
50    fn push_char(&mut self, ch: u32);
51    fn chars(&self) -> impl Iterator<Item = u32> + '_;
52    fn contains_char(ch: u32) -> bool {
53        Self::CHARACTER_SET.contains(&ch)
54    }
55    fn invalid_restricted_string(ch: u32) -> InvalidRestrictedString {
56        match Self::CHARACTER_SET_NAME {
57            CharacterSetName::Bmp => InvalidRestrictedString::InvalidBmpString(ch.into()),
58            CharacterSetName::General => InvalidRestrictedString::InvalidGeneralString(ch.into()),
59            CharacterSetName::Graphic => InvalidRestrictedString::InvalidGraphicString(ch.into()),
60            CharacterSetName::IA5 => InvalidRestrictedString::InvalidIA5String(ch.into()),
61            CharacterSetName::Numeric => InvalidRestrictedString::InvalidNumericString(ch.into()),
62            CharacterSetName::Printable => {
63                InvalidRestrictedString::InvalidPrintableString(ch.into())
64            }
65            CharacterSetName::Teletex => InvalidRestrictedString::InvalidTeletexString(ch.into()),
66            CharacterSetName::Visible => InvalidRestrictedString::InvalidVisibleString(ch.into()),
67        }
68    }
69    fn try_from_slice(input: impl AsRef<[u8]>) -> Result<Vec<Self::T>, PermittedAlphabetError> {
70        Self::try_from_slice_with_width(input, core::mem::size_of::<Self::T>())
71    }
72    fn try_from_slice_with_width(
73        input: impl AsRef<[u8]>,
74        width: usize,
75    ) -> Result<Vec<Self::T>, PermittedAlphabetError> {
76        let input = input.as_ref();
77        // We currently only support character widths up to 4 bytes on error logic
78        // Width can be larger than 4 only if we create new types with larger character widths
79        debug_assert!(width <= 4);
80        if width == 0 {
81            return Err(PermittedAlphabetError::Other {
82                message: alloc::format!(
83                    "Character set width set to zero when parsing string {}",
84                    Self::CHARACTER_SET_NAME
85                ),
86            });
87        }
88        // Input must be aligned with character encoding width to be valid input
89        if input.len() % width != 0 {
90            return Err(PermittedAlphabetError::InvalidData {
91                length: input.len(),
92                width,
93            });
94        }
95        let num_elements = input.len() / width;
96        let mut vec = Vec::with_capacity(num_elements);
97        // Character width can be more than 1 byte, and combined bytes define the character encoding width
98        let process_chunk: fn(&[u8]) -> Option<Self::T> = match width {
99            1 => |chunk: &[u8]| Self::T::from_u8(chunk[0]),
100            2 => |chunk: &[u8]| {
101                Self::T::from_u16(u16::from_be_bytes(chunk.try_into().unwrap_or_default()))
102            },
103            3 | 4 => |chunk: &[u8]| {
104                Self::T::from_u32(u32::from_be_bytes(chunk.try_into().unwrap_or_default()))
105            },
106            _ => unreachable!(),
107        };
108
109        for chunk in input.chunks_exact(width) {
110            if let Some(character) = process_chunk(chunk) {
111                if Self::contains_char(character.as_()) {
112                    vec.push(character);
113                } else {
114                    return Err(PermittedAlphabetError::InvalidRestrictedString {
115                        source: Self::invalid_restricted_string(
116                            character.to_u32().unwrap_or_default(),
117                        ),
118                    });
119                }
120            }
121        }
122        Ok(vec)
123    }
124    fn index_map() -> &'static alloc::collections::BTreeMap<u32, u32>;
125    fn character_map() -> &'static alloc::collections::BTreeMap<u32, u32>;
126    fn char_range_to_bit_range(mut range: core::ops::Range<usize>) -> core::ops::Range<usize> {
127        let width = Self::CHARACTER_SET_WIDTH;
128        range.start *= width;
129        range.end *= width;
130        range
131    }
132
133    fn to_index_or_value_bitstring(&self) -> types::BitString {
134        if should_be_indexed(Self::CHARACTER_SET_WIDTH as u32, Self::CHARACTER_SET) {
135            self.to_index_string()
136        } else {
137            self.to_bit_string()
138        }
139    }
140
141    fn to_index_string(&self) -> types::BitString {
142        let index_map = Self::index_map();
143        let mut index_string = types::BitString::new();
144        let width = Self::CHARACTER_SET_WIDTH;
145        for ch in self.chars() {
146            let index = index_map.get(&ch).unwrap();
147            index_string
148                .extend_from_bitslice(&index.view_bits::<Msb0>()[(u32::BITS as usize - width)..]);
149        }
150        index_string
151    }
152
153    fn to_octet_aligned_index_string(&self) -> Vec<u8> {
154        let index_map = Self::index_map();
155        let mut index_string = types::BitString::new();
156        let width = Self::CHARACTER_SET_WIDTH;
157        let new_width = self.octet_aligned_char_width();
158
159        for ch in self.chars() {
160            let ch = &index_map[&ch].view_bits::<Msb0>()[(u32::BITS as usize - width)..];
161            let mut padding = types::BitString::new();
162            for _ in 0..(new_width - width) {
163                padding.push(false);
164            }
165            padding.extend_from_bitslice(ch);
166            index_string.extend(padding);
167        }
168        index_string.as_raw_slice().to_vec()
169    }
170
171    fn octet_aligned_char_width(&self) -> usize {
172        if Self::CHARACTER_SET_WIDTH.is_power_of_two() {
173            Self::CHARACTER_SET_WIDTH
174        } else {
175            Self::CHARACTER_SET_WIDTH.next_power_of_two()
176        }
177    }
178
179    fn to_bit_string(&self) -> types::BitString {
180        let mut octet_string = types::BitString::new();
181        let width = Self::CHARACTER_SET_WIDTH;
182
183        for ch in self.chars() {
184            octet_string
185                .extend_from_bitslice(&ch.view_bits::<Msb0>()[(u32::BITS as usize - width)..]);
186        }
187        octet_string
188    }
189
190    fn to_octet_aligned_string(&self) -> Vec<u8> {
191        let mut octet_string = types::BitString::new();
192        let width = self.octet_aligned_char_width();
193
194        for ch in self.chars() {
195            octet_string
196                .extend_from_bitslice(&ch.view_bits::<Msb0>()[(u32::BITS as usize - width)..]);
197        }
198        octet_string.as_raw_slice().to_vec()
199    }
200
201    fn character_width() -> u32 {
202        crate::num::log2(Self::CHARACTER_SET.len() as i128)
203    }
204
205    fn len(&self) -> usize {
206        self.chars().count()
207    }
208
209    #[allow(clippy::box_collection)]
210    fn build_index_map() -> Box<alloc::collections::BTreeMap<u32, u32>> {
211        Box::new(
212            Self::CHARACTER_SET
213                .iter()
214                .copied()
215                .enumerate()
216                .map(|(i, e)| (e, u32::from_usize(i).unwrap_or_default()))
217                .collect(),
218        )
219    }
220
221    #[allow(clippy::box_collection)]
222    fn build_character_map() -> Box<alloc::collections::BTreeMap<u32, u32>> {
223        Box::new(
224            Self::CHARACTER_SET
225                .iter()
226                .copied()
227                .enumerate()
228                .map(|(i, e)| (u32::from_usize(i).unwrap_or_default(), e))
229                .collect(),
230        )
231    }
232
233    fn try_from_permitted_alphabet(
234        input: crate::types::BitString,
235        alphabet: Option<&BTreeMap<u32, u32>>,
236    ) -> Result<Self, PermittedAlphabetError> {
237        let alphabet = alphabet.unwrap_or_else(|| Self::character_map());
238        try_from_permitted_alphabet(input, alphabet)
239    }
240
241    #[track_caller]
242    fn try_from_bits(
243        bits: crate::types::BitString,
244        character_width: usize,
245    ) -> Result<Self, PermittedAlphabetError> {
246        let mut string = Self::default();
247        if character_width == 0 || !bits.len().is_multiple_of(character_width) {
248            return Err(PermittedAlphabetError::InvalidData {
249                length: bits.len(),
250                width: character_width,
251            });
252        }
253        for ch in bits.chunks_exact(character_width) {
254            let ch = ch.load_be::<u32>();
255            if Self::contains_char(ch) {
256                string.push_char(ch);
257            } else {
258                return Err(PermittedAlphabetError::InvalidRestrictedString {
259                    source: Self::invalid_restricted_string(ch),
260                });
261            }
262        }
263        Ok(string)
264    }
265}
266
267pub(crate) fn try_from_permitted_alphabet<S: StaticPermittedAlphabet>(
268    input: crate::types::BitString,
269    alphabet: &BTreeMap<u32, u32>,
270) -> Result<S, PermittedAlphabetError> {
271    let mut string = S::default();
272    let permitted_alphabet_char_width = crate::num::log2(alphabet.len() as i128) as usize;
273    // Alphabet should be always indexed key-alphabetvalue pairs at this point
274    let values_only = alphabet.values().copied().collect::<Vec<u32>>();
275    if should_be_indexed(permitted_alphabet_char_width as u32, &values_only) {
276        for ch in input.chunks_exact(permitted_alphabet_char_width as usize) {
277            let index = ch.load_be::<u32>();
278            string.push_char(*alphabet.get(&index).ok_or(
279                PermittedAlphabetError::IndexNotFound {
280                    index: index.to_usize().unwrap_or_default(),
281                },
282            )?);
283        }
284    } else {
285        string = S::try_from_bits(input, permitted_alphabet_char_width)?
286    }
287    Ok(string)
288}
289pub(crate) fn should_be_indexed(width: u32, character_set: &[u32]) -> bool {
290    let largest_value = character_set.iter().copied().max().unwrap_or_default();
291    2u32.pow(width) <= largest_value
292}
293
294#[derive(Debug, Default, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
295pub struct DynConstrainedCharacterString {
296    character_set: BTreeMap<u32, u32>,
297    buffer: types::BitString,
298}
299
300impl DynConstrainedCharacterString {
301    pub fn from_bits(
302        data: impl Iterator<Item = u32>,
303        character_set: &[u32],
304    ) -> Result<Self, PermittedAlphabetError> {
305        let mut buffer = types::BitString::new();
306        let char_width = crate::num::log2(character_set.len() as i128);
307        let indexed = should_be_indexed(char_width, character_set);
308        let alphabet: BTreeMap<u32, u32>;
309        if indexed {
310            alphabet = character_set
311                .iter()
312                .enumerate()
313                .map(|(i, a)| (*a, i as u32))
314                .collect::<BTreeMap<_, _>>();
315            for ch in data {
316                let Some(index) = alphabet.get(&ch).copied() else {
317                    return Err(PermittedAlphabetError::CharacterNotFound { character: ch });
318                };
319                let range = ((u32::BITS - char_width) as usize)..(u32::BITS as usize);
320                let bit_ch = &index.view_bits::<Msb0>()[range];
321                buffer.extend_from_bitslice(bit_ch);
322            }
323        } else {
324            alphabet = character_set
325                .iter()
326                .enumerate()
327                .map(|(i, a)| (i as u32, *a))
328                .collect::<BTreeMap<_, _>>();
329            for ch in data {
330                let range = ((u32::BITS - char_width) as usize)..(u32::BITS as usize);
331                let bit_ch = &ch.view_bits::<Msb0>()[range];
332                buffer.extend_from_bitslice(bit_ch);
333            }
334        }
335
336        Ok(Self {
337            character_set: alphabet,
338            buffer,
339        })
340    }
341
342    pub fn character_width(&self) -> usize {
343        crate::num::log2(self.character_set.len() as i128) as usize
344    }
345
346    #[allow(unused)]
347    pub fn is_empty(&self) -> bool {
348        self.len() == 0
349    }
350
351    #[allow(unused)]
352    pub fn len(&self) -> usize {
353        self.buffer.len() / self.character_width()
354    }
355
356    #[allow(unused)]
357    fn as_bitstr(&self) -> &types::BitStr {
358        &self.buffer
359    }
360
361    #[allow(unused)]
362    fn iter(&self) -> impl Iterator<Item = &types::BitStr> + '_ {
363        self.buffer.chunks_exact(self.character_width())
364    }
365}
366
367impl core::ops::Index<usize> for DynConstrainedCharacterString {
368    type Output = types::BitStr;
369
370    fn index(&self, index: usize) -> &Self::Output {
371        &self.buffer[index..index * self.character_width()]
372    }
373}
374
375impl core::ops::Index<core::ops::Range<usize>> for DynConstrainedCharacterString {
376    type Output = types::BitStr;
377
378    fn index(&self, index: core::ops::Range<usize>) -> &Self::Output {
379        let width = self.character_width();
380        &self.buffer[index.start * width..index.end * width]
381    }
382}