rusmpp_extra/encoding/
ucs2.rs

1//! Ucs2 encoding/decoding support.
2
3mod errors;
4pub use errors::{Ucs2ConcatenateError, Ucs2EncodeError};
5use rusmpp_core::values::DataCoding;
6
7/// UCS2 codec.
8#[derive(Debug)]
9pub struct Ucs2 {
10    /// Whether to allow splitting characters across message parts.
11    allow_split_character: bool,
12}
13
14impl Default for Ucs2 {
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20impl Ucs2 {
21    /// Creates a new [`Ucs2`] codec.
22    ///
23    /// # Defaults
24    ///
25    /// - `allow_split_character`: `false`
26    pub const fn new() -> Self {
27        Self {
28            allow_split_character: false,
29        }
30    }
31
32    /// Returns whether splitting characters is allowed.
33    pub const fn allow_split_character(&self) -> bool {
34        self.allow_split_character
35    }
36
37    /// Sets whether to allow splitting characters across message parts.
38    pub const fn with_allow_split_character(mut self, allow: bool) -> Self {
39        self.allow_split_character = allow;
40        self
41    }
42
43    /// Returns the associated [`DataCoding`].
44    pub const fn data_coding(&self) -> DataCoding {
45        DataCoding::Ucs2
46    }
47}
48
49#[cfg(any(test, feature = "alloc"))]
50#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
51mod impl_owned {
52    use alloc::vec::Vec;
53
54    use crate::{
55        concatenation::{
56            MAX_PARTS,
57            owned::{Concatenation, Concatenator},
58        },
59        encoding::owned::Encoder,
60    };
61
62    use super::*;
63
64    impl Ucs2 {
65        /// Encodes the given message into a vector of bytes.
66        pub fn encode_to_vec(&self, input: &str) -> Result<Vec<u8>, Ucs2EncodeError> {
67            // Maximum possible UCS-2 units = number of chars
68            let char_count = input.chars().count();
69            let mut buffer = alloc::vec![0u16; char_count];
70
71            match ucs2::encode(input, &mut buffer) {
72                Ok(len) => {
73                    let mut encoded = Vec::with_capacity(len * 2);
74
75                    for &code_unit in &buffer[..len] {
76                        encoded.push((code_unit >> 8) as u8);
77                        encoded.push((code_unit & 0xFF) as u8);
78                    }
79
80                    Ok(encoded)
81                }
82                Err(err) => match err {
83                    ucs2::Error::BufferOverflow => {
84                        unreachable!("We allocated more than enough space")
85                    }
86                    ucs2::Error::MultiByte => Err(Ucs2EncodeError::UnencodableCharacter),
87                },
88            }
89        }
90    }
91
92    impl Encoder for Ucs2 {
93        type Error = Ucs2EncodeError;
94
95        fn encode(&self, message: &str) -> Result<(Vec<u8>, DataCoding), Self::Error> {
96            self.encode_to_vec(message)
97                .map(|vec| (vec, self.data_coding()))
98        }
99    }
100
101    impl Concatenator for Ucs2 {
102        type Error = Ucs2ConcatenateError;
103
104        fn concatenate(
105            &self,
106            message: &str,
107            max_message_size: usize,
108            part_header_size: usize,
109        ) -> Result<(Concatenation, DataCoding), Self::Error> {
110            let encoded = self.encode_to_vec(message)?;
111
112            let total = encoded.len();
113
114            if total <= max_message_size {
115                return Ok((Concatenation::single(encoded), self.data_coding()));
116            }
117
118            let part_payload_size = max_message_size.saturating_sub(part_header_size);
119
120            if part_payload_size == 0 {
121                return Err(Ucs2ConcatenateError::PartCapacityExceeded);
122            }
123
124            let mut parts: Vec<Vec<u8>> = Vec::new();
125            let mut i = 0;
126
127            while i < total {
128                let mut end = (i + part_payload_size).min(total);
129
130                if !self.allow_split_character {
131                    // If not at the end and our cut is *not* on a 2-byte boundary,
132                    // shrink the part by 1 byte to align to even boundary.
133                    if end < total && (end % 2 != 0) {
134                        end -= 1;
135
136                        // If shrinking removed the entire part -> impossible
137                        if end == i {
138                            return Err(Ucs2ConcatenateError::InvalidBoundary);
139                        }
140                    }
141                }
142
143                // If allow_split_character == true, we accept uneven boundaries as-is.
144
145                parts.push(encoded[i..end].to_vec());
146                i = end;
147            }
148
149            if parts.len() > MAX_PARTS {
150                return Err(Ucs2ConcatenateError::parts_count_exceeded(parts.len()));
151            }
152
153            Ok((Concatenation::concatenated(parts), self.data_coding()))
154        }
155    }
156}
157
158#[cfg(test)]
159mod tests;