sms_pdu_decoder/
codecs.rs

1use crate::{PDUError, Result};
2use lazy_static::lazy_static;
3use std::collections::HashMap;
4
5/// --- UCS2 Codec ---
6pub struct UCS2;
7
8impl UCS2 {
9    /// Returns an encoded PDU string.
10    pub fn encode(data: &str) -> String {
11        hex::encode_upper(
12            data.encode_utf16()
13                .flat_map(|u| u.to_be_bytes())
14                .collect::<Vec<u8>>(),
15        )
16    }
17
18    /// Returns decoded message from PDU string.
19    pub fn decode(data: &str) -> Result<String> {
20        let bytes = hex::decode(data)?;
21        let mut chars = Vec::with_capacity(bytes.len() / 2);
22        for chunk in bytes.chunks(2) {
23            if chunk.len() == 2 {
24                chars.push(u16::from_be_bytes([chunk[0], chunk[1]]));
25            }
26        }
27        Ok(String::from_utf16_lossy(&chars))
28    }
29
30    /// Decode bytes directly (used internally)
31    pub fn decode_bytes(bytes: &[u8]) -> Result<String> {
32        let mut chars = Vec::with_capacity(bytes.len() / 2);
33        for chunk in bytes.chunks(2) {
34            if chunk.len() == 2 {
35                chars.push(u16::from_be_bytes([chunk[0], chunk[1]]));
36            }
37        }
38        Ok(String::from_utf16_lossy(&chars))
39    }
40}
41
42/// --- GSM 7-bit Codec ---
43pub struct GSM;
44
45lazy_static! {
46    static ref GSM_ALPHABET: String = "@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1BÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà".to_string();
47    static ref GSM_EXT_ALPHABET: HashMap<u8, char> = {
48        let mut m = HashMap::new();
49        m.insert(10, '\x0C'); // Form Feed '\f'
50        m.insert(20, '^');
51        m.insert(40, '{');
52        m.insert(41, '}');
53        m.insert(47, '\\');
54        m.insert(60, '[');
55        m.insert(61, '~');
56        m.insert(62, ']');
57        m.insert(64, '|');
58        m.insert(101, '€');
59        m
60    };
61    static ref GSM_EXT_ALPHABET_INV: HashMap<char, u8> = {
62        let mut m = HashMap::new();
63        for (k, v) in GSM_EXT_ALPHABET.iter() {
64            m.insert(*v, *k);
65        }
66        m
67    };
68}
69
70const CHAR_EXT: u8 = 0x1B; // Escape character septet
71
72impl GSM {
73    /// Reverses octets in a PDU string.
74    fn reversed_octets(data: &str) -> String {
75        data.as_bytes()
76            .chunks(2)
77            .rev()
78            .map(|chunk| std::str::from_utf8(chunk).unwrap_or(""))
79            .collect()
80    }
81
82    /// Returns decoded message from PDU string.
83    pub fn decode(data: &str, strip_padding: bool) -> Result<String> {
84        if data.is_empty() {
85            return Ok(String::new());
86        }
87
88        // Reverse octets and convert to binary string
89        let reversed = Self::reversed_octets(data);
90        let bytes = hex::decode(&reversed)?;
91
92        // Convert bytes to binary string
93        let mut bin_str = String::new();
94        for byte in bytes {
95            bin_str.push_str(&format!("{:08b}", byte));
96        }
97
98        // Extract septets from right to left (7 bits at a time)
99        let mut septets = Vec::new();
100        let len = bin_str.len();
101        if len >= 7 {
102            let mut k = len - 7;
103            loop {
104                let septet_str = &bin_str[k..k + 7];
105                if let Ok(septet) = u8::from_str_radix(septet_str, 2) {
106                    septets.push(septet);
107                }
108                if k < 7 {
109                    break;
110                }
111                k -= 7;
112            }
113        }
114
115        let mut res = String::new();
116        let mut is_extended = false;
117        for &char_index in septets.iter() {
118            if char_index == CHAR_EXT {
119                is_extended = true;
120                continue;
121            }
122            if is_extended {
123                is_extended = false;
124                res.push(*GSM_EXT_ALPHABET.get(&char_index).unwrap_or(&' '));
125            } else {
126                res.push(GSM_ALPHABET.chars().nth(char_index as usize).unwrap_or(' '));
127            }
128        }
129
130        let total_septets = septets.len();
131        if strip_padding && total_septets % 8 == 0 && res.ends_with('\r') {
132            return Ok(res[0..res.len() - 1].to_string());
133        }
134        Ok(res)
135    }
136
137    /// Decode with specified length (used for alphanumeric addresses)
138    pub fn decode_with_length(bytes: &[u8], length: usize) -> Result<String> {
139        // Convert bytes to binary string
140        let mut bin_str = String::new();
141        for byte in bytes {
142            bin_str.push_str(&format!("{:08b}", byte));
143        }
144
145        // Extract septets from left to right (7 bits at a time), up to length
146        let mut septets = Vec::new();
147        for i in 0..length {
148            let start = i * 7;
149            if start + 7 <= bin_str.len() {
150                let septet_str = &bin_str[start..start + 7];
151                if let Ok(septet) = u8::from_str_radix(septet_str, 2) {
152                    septets.push(septet);
153                }
154            }
155        }
156
157        let mut res = String::new();
158        let mut is_extended = false;
159        for &char_index in septets.iter() {
160            if char_index == CHAR_EXT {
161                is_extended = true;
162                continue;
163            }
164            if is_extended {
165                is_extended = false;
166                res.push(*GSM_EXT_ALPHABET.get(&char_index).unwrap_or(&' '));
167            } else {
168                res.push(GSM_ALPHABET.chars().nth(char_index as usize).unwrap_or(' '));
169            }
170        }
171
172        Ok(res)
173    }
174
175    /// Returns an encoded PDU string.
176    pub fn encode(data: &str, with_padding: bool) -> Result<String> {
177        if data.is_empty() {
178            return Ok(String::new());
179        }
180
181        let mut chars = Vec::new();
182        for char in data.chars() {
183            // Find character position in GSM alphabet (not byte position!)
184            if let Some(index) = GSM_ALPHABET.chars().position(|c| c == char) {
185                chars.push(index as u8);
186            } else if let Some(&index) = GSM_EXT_ALPHABET_INV.get(&char) {
187                chars.push(CHAR_EXT);
188                chars.push(index);
189            } else {
190                return Err(PDUError::InvalidGsmChar(char));
191            }
192        }
193
194        if with_padding {
195            let cr_index = GSM_ALPHABET.chars().position(|c| c == '\r').unwrap() as u8;
196
197            // If the total number of characters is 8n, and the last char is CR, add another CR.
198            if chars.len() % 8 == 0 && data.ends_with('\r') {
199                chars.push(cr_index);
200            }
201            // If the total number of characters is 8n + 7, add a CR padding character.
202            if chars.len() % 8 == 7 {
203                chars.push(cr_index);
204            }
205        }
206
207        // Convert chars to binary string (reversed, 7 bits each)
208        let mut bin_str = String::new();
209        for &char in chars.iter().rev() {
210            bin_str.push_str(&format!("{:07b}", char));
211        }
212
213        // Pad with zeros on the left to make it a multiple of 8
214        let padding = (8 - (bin_str.len() % 8)) % 8;
215        let padded = "0".repeat(padding) + &bin_str;
216
217        // Convert binary string to hex
218        let mut hex_str = String::new();
219        for chunk in padded.as_bytes().chunks(8) {
220            let byte_str = std::str::from_utf8(chunk).unwrap_or("00000000");
221            if let Ok(byte) = u8::from_str_radix(byte_str, 2) {
222                hex_str.push_str(&format!("{:02X}", byte));
223            }
224        }
225
226        // Reverse octets
227        Ok(Self::reversed_octets(&hex_str))
228    }
229}
230
231// Helper functions for BCD encoding (used in parser/fields)
232pub fn decode_bcd(data: &[u8]) -> String {
233    let mut result = String::new();
234    for &byte in data {
235        let low = byte & 0x0F;
236        let high = (byte >> 4) & 0x0F;
237
238        if low <= 9 {
239            result.push((b'0' + low) as char);
240        } else if low == 0x0F {
241            // Padding, skip
242        } else {
243            result.push((b'a' + (low - 10)) as char);
244        }
245
246        if high <= 9 {
247            result.push((b'0' + high) as char);
248        } else if high == 0x0F {
249            // Padding, skip
250        } else {
251            result.push((b'a' + (high - 10)) as char);
252        }
253    }
254    result
255}
256
257pub fn hex_to_bytes(hex_str: &str) -> Result<Vec<u8>> {
258    hex::decode(hex_str.replace(' ', "")).map_err(PDUError::InvalidHex)
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    // --- GSM Tests (from test_codecs.py & codecs.py doctests) ---
266
267    #[test]
268    fn test_gsm_decode_basic() -> Result<()> {
269        assert_eq!(
270            GSM::decode("C8F71D14969741F977FD07", false)?,
271            "How are you?"
272        );
273        Ok(())
274    }
275
276    #[test]
277    fn test_gsm_decode_extended_char() -> Result<()> {
278        assert_eq!(
279            GSM::decode("32D0A60C8287E5A0F63B3D07", false)?,
280            "2 € par mois"
281        );
282        Ok(())
283    }
284
285    #[test]
286    fn test_gsm_decode_with_padding_no_strip() -> Result<()> {
287        assert_eq!(GSM::decode("AA58ACA6AA8D1A", false)?, "*115*5#\r");
288        Ok(())
289    }
290
291    #[test]
292    fn test_gsm_decode_with_padding_and_strip() -> Result<()> {
293        assert_eq!(GSM::decode("AA58ACA6AA8D1A", true)?, "*115*5#");
294        Ok(())
295    }
296
297    #[test]
298    fn test_gsm_encode_basic() -> Result<()> {
299        assert_eq!(GSM::encode("hellohello", false)?, "E8329BFD4697D9EC37");
300        Ok(())
301    }
302
303    #[test]
304    fn test_gsm_encode_extended() -> Result<()> {
305        assert_eq!(
306            GSM::encode("2 € par mois", false)?,
307            "32D0A60C8287E5A0F63B3D07"
308        );
309        Ok(())
310    }
311
312    #[test]
313    fn test_gsm_encode_7_chars_no_padding() -> Result<()> {
314        assert_eq!(GSM::encode("1234567", false)?, "31D98C56B3DD00");
315        Ok(())
316    }
317
318    #[test]
319    fn test_gsm_encode_7_chars_with_padding() -> Result<()> {
320        assert_eq!(GSM::encode("1234567", true)?, "31D98C56B3DD1A");
321        Ok(())
322    }
323
324    // --- test_codecs.py tests ---
325
326    #[test]
327    fn test_hello() -> Result<()> {
328        assert_eq!(GSM::encode("hello", false)?, "E8329BFD06");
329        assert_eq!(GSM::decode("E8329BFD06", false)?, "hello");
330        Ok(())
331    }
332
333    #[test]
334    fn test_8chars() -> Result<()> {
335        assert_eq!(GSM::encode("12345678", false)?, "31D98C56B3DD70");
336        assert_eq!(GSM::decode("31D98C56B3DD70", false)?, "12345678");
337        Ok(())
338    }
339
340    #[test]
341    fn test_extended() -> Result<()> {
342        assert_eq!(GSM::encode("[10€]", false)?, "1B5E0CB6296F7C");
343        assert_eq!(GSM::decode("1B5E0CB6296F7C", false)?, "[10€]");
344        Ok(())
345    }
346
347    #[test]
348    fn test_empty() -> Result<()> {
349        assert_eq!(GSM::encode("", false)?, "");
350        assert_eq!(GSM::decode("", false)?, "");
351        assert_eq!(GSM::encode("", true)?, "");
352        assert_eq!(GSM::decode("", true)?, "");
353        Ok(())
354    }
355
356    #[test]
357    fn test_long() -> Result<()> {
358        let data_decoded = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.";
359        let data_encoded = "CCB7BCDC06A5E1F37A1B447EB3DF72D03C4D0785DB653A0B347EBBE7E531BD4CAFCB4161721A9E9E8FD3EE33A8CC4ED359A079990C22BF41E5747DDE7E9341F4721BFE9683D2EE719A9C26D7DD74509D0E6287C56F791954A683C86FF65B5E06B5C36777181466A7E3F5B00B";
360        assert_eq!(GSM::encode(data_decoded, false)?, data_encoded);
361        assert_eq!(GSM::decode(data_encoded, false)?, data_decoded);
362        Ok(())
363    }
364
365    #[test]
366    fn test_ext_alphabet() -> Result<()> {
367        assert_eq!(GSM::encode("123456€\r", true)?, "31D98C56B36DCA0D");
368        assert_eq!(GSM::encode("12345678\r", true)?, "31D98C56B3DD700D");
369        Ok(())
370    }
371
372    #[test]
373    fn test_double_cr() -> Result<()> {
374        // '1234567\r' (len 8, ends with \r)
375        assert_eq!(
376            GSM::decode(&GSM::encode("1234567\r", true)?, true)?,
377            "1234567\r\r"
378        );
379        assert_eq!(
380            GSM::decode(&GSM::encode("1234567\r", true)?, false)?,
381            "1234567\r\r"
382        );
383
384        // '1234567\r' (len 8, no padding)
385        assert_eq!(
386            GSM::decode(&GSM::encode("1234567\r", false)?, true)?,
387            "1234567"
388        );
389        assert_eq!(
390            GSM::decode(&GSM::encode("1234567\r", false)?, false)?,
391            "1234567\r"
392        );
393
394        // '123456\r' (len 7, ends with \r) -> padding adds another CR. Septets: 8. Decoded: '123456\r\r'
395        assert_eq!(
396            GSM::decode(&GSM::encode("123456\r", true)?, true)?,
397            "123456\r"
398        ); // strip_padding removes last CR
399        assert_eq!(
400            GSM::decode(&GSM::encode("123456\r", true)?, false)?,
401            "123456\r\r"
402        );
403
404        // '12345\r' (len 6) -> padding adds a CR. Septets: 7. Decoded: '12345\r'
405        assert_eq!(
406            GSM::decode(&GSM::encode("12345\r", true)?, true)?,
407            "12345\r"
408        );
409        assert_eq!(
410            GSM::decode(&GSM::encode("12345\r", true)?, false)?,
411            "12345\r"
412        );
413
414        // '123456\r\r' (len 8, ends with \r) -> padding adds another CR. Septets: 9. Decoded: '123456\r\r\r'
415        assert_eq!(
416            GSM::decode(&GSM::encode("123456\r\r", true)?, true)?,
417            "123456\r\r\r"
418        );
419        assert_eq!(
420            GSM::decode(&GSM::encode("123456\r\r", true)?, false)?,
421            "123456\r\r\r"
422        );
423
424        Ok(())
425    }
426
427    #[test]
428    fn test_8n_1_encode() -> Result<()> {
429        assert_eq!(GSM::encode("1234567", true)?, "31D98C56B3DD1A");
430        assert_eq!(
431            GSM::encode("0123456789ABCDE", true)?,
432            "B0986C46ABD96EB85C503824161B"
433        );
434
435        assert_eq!(
436            GSM::decode(&GSM::encode("1234567", true)?, true)?,
437            "1234567"
438        );
439        assert_eq!(GSM::decode(&GSM::encode("12345^", true)?, true)?, "12345^");
440        assert_eq!(
441            GSM::decode(&GSM::encode("12345^", true)?, false)?,
442            "12345^\r"
443        );
444        assert_eq!(
445            GSM::decode(&GSM::encode("123456^", true)?, true)?,
446            "123456^"
447        );
448        assert_eq!(
449            GSM::decode(&GSM::encode("123456^", true)?, false)?,
450            "123456^"
451        );
452        Ok(())
453    }
454
455    // --- UCS2 Tests (from codecs.py doctests) ---
456
457    #[test]
458    fn test_ucs2_encode() {
459        assert_eq!(UCS2::encode("Je pompe donc je suis."), "004A006500200070006F006D0070006500200064006F006E00630020006A006500200073007500690073002E");
460    }
461
462    #[test]
463    fn test_ucs2_decode() -> Result<()> {
464        assert_eq!(
465            UCS2::decode("004C006F00720065006D00200049007000730075006D")?,
466            "Lorem Ipsum"
467        );
468        Ok(())
469    }
470}