Skip to main content

smpp_codec/
encoding.rs

1// src/encoding.rs
2use std::collections::HashMap;
3
4const GSM_BASIC_CHARSET: &str = "@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑÜ`¿abcdefghijklmnopqrstuvwxyzäöñüà";
5
6/// Encodes text into GSM 7-bit packed format (unpacked representation).
7///
8/// Returns an error if the text contains characters not supported by GSM 03.38.
9///
10/// # Examples
11///
12/// ```
13/// use smpp_codec::encoding::gsm_7bit_encode;
14/// let encoded = gsm_7bit_encode("Hello").unwrap();
15/// assert_eq!(encoded.len(), 5);
16/// ```
17pub fn gsm_7bit_encode(text: &str) -> Result<Vec<u8>, String> {
18    let mut encoded_text = Vec::new();
19    let mut gsm_extended_charset = HashMap::new();
20    gsm_extended_charset.insert('^', 20);
21    gsm_extended_charset.insert('{', 40);
22    gsm_extended_charset.insert('}', 41);
23    gsm_extended_charset.insert('\\', 47);
24    gsm_extended_charset.insert('[', 60);
25    gsm_extended_charset.insert('~', 61);
26    gsm_extended_charset.insert(']', 62);
27    gsm_extended_charset.insert('|', 64);
28    gsm_extended_charset.insert('€', 101);
29
30    for char in text.chars() {
31        if let Some(index) = GSM_BASIC_CHARSET.chars().position(|c| c == char) {
32            encoded_text.push(index as u8);
33        } else if let Some(&code) = gsm_extended_charset.get(&char) {
34            encoded_text.push(0x1B);
35            encoded_text.push(code);
36        } else {
37            return Err(format!("Character '{}' not supported in GSM 03.38", char));
38        }
39    }
40    Ok(encoded_text)
41}
42
43pub fn encode_8bit(text: &str) -> Vec<u8> {
44    text.chars()
45        .map(|c| if (c as u32) <= 0xFF { c as u8 } else { b'?' })
46        .collect()
47}
48
49pub fn encode_16bit(text: &str) -> Vec<u8> {
50    text.encode_utf16().flat_map(|u| u.to_be_bytes()).collect()
51}
52
53/// Decodes GSM 7-bit data (unpacked) into a String.
54///
55/// # Examples
56///
57/// ```
58/// use smpp_codec::encoding::gsm_7bit_decode;
59/// let decoded = gsm_7bit_decode(&[0x48, 0x65, 0x6C, 0x6C, 0x6F]);
60/// assert_eq!(decoded, "Hello");
61/// ```
62pub fn gsm_7bit_decode(bytes: &[u8]) -> String {
63    let basic_chars: Vec<char> = GSM_BASIC_CHARSET.chars().collect();
64    let mut result = String::new();
65    let mut i = 0;
66
67    while i < bytes.len() {
68        let byte = bytes[i];
69        if byte == 0x1B {
70            // Handle Extended Character
71            if i + 1 < bytes.len() {
72                let next_byte = bytes[i + 1];
73                let decoded_char = match next_byte {
74                    20 => '^',
75                    40 => '{',
76                    41 => '}',
77                    47 => '\\',
78                    60 => '[',
79                    61 => '~',
80                    62 => ']',
81                    64 => '|',
82                    101 => '€',
83                    _ => '?', // Unknown extended char
84                };
85                result.push(decoded_char);
86                i += 2; // Skip escape + char
87            } else {
88                i += 1; // Trailing escape, ignore
89            }
90        } else {
91            // Handle Basic Character
92            if (byte as usize) < basic_chars.len() {
93                result.push(basic_chars[byte as usize]);
94            } else {
95                result.push('?');
96            }
97            i += 1;
98        }
99    }
100    result
101}
102
103pub fn decode_8bit(bytes: &[u8]) -> String {
104    // Latin1 (ISO-8859-1) maps 1:1 to first 256 Unicode code points
105    bytes.iter().map(|&b| b as char).collect()
106}
107
108pub fn decode_16bit(bytes: &[u8]) -> String {
109    // UCS-2 (Big Endian)
110    let u16_vec: Vec<u16> = bytes
111        .chunks_exact(2)
112        .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
113        .collect();
114    String::from_utf16_lossy(&u16_vec)
115}
116
117// --- SMSC HELPER ---
118
119#[derive(Debug, PartialEq, Clone)]
120pub enum MessageBody {
121    Text(String),    // It was GSM7, UCS2, or Latin1
122    Binary(Vec<u8>), // It was Class 2, 8-bit Data, or Unknown
123}
124
125#[derive(Debug, Clone, Copy)]
126enum RawEncoding {
127    Gsm7Bit,
128    Latin1,
129    Ucs2,
130    Binary8Bit, // Pure data (Class 2, OTA, etc)
131}
132
133/// distinguishing Latin1 (0x03) from Binary (0x04, Class 2)
134fn detect_raw_encoding(dcs: u8) -> RawEncoding {
135    match dcs {
136        // Standard "Safe" Values
137        0x00 | 0x01 => RawEncoding::Gsm7Bit,
138        0x03 => RawEncoding::Latin1, // Explicit Latin-1
139        0x08 => RawEncoding::Ucs2,
140        0x02 | 0x04 => RawEncoding::Binary8Bit, // Explicit 8-bit Data
141
142        // Bitmask / Classes logic
143        _ => {
144            let group = dcs >> 4;
145            match group {
146                // Group 00xx: General Data Coding
147                0x00..=0x03 => {
148                    match (dcs & 0x0C) >> 2 {
149                        0x02 => RawEncoding::Ucs2,
150                        0x01 => RawEncoding::Binary8Bit, // 8-bit data
151                        _ => RawEncoding::Gsm7Bit,
152                    }
153                }
154                // Group 1111: Data Coding / Message Class (OTA often lives here)
155                0x0F => {
156                    if (dcs & 0x04) != 0 {
157                        RawEncoding::Binary8Bit // 8-bit Data
158                    } else {
159                        RawEncoding::Gsm7Bit
160                    }
161                }
162                _ => RawEncoding::Binary8Bit, // Treat unknown as binary to be safe
163            }
164        }
165    }
166}
167
168// The Public Helper
169pub fn process_body(body: &[u8], dcs: u8, udhi: bool) -> MessageBody {
170    // 1. Strip UDH if present
171    let payload = if udhi && !body.is_empty() {
172        let udh_len = body[0] as usize;
173        if body.len() > udh_len + 1 {
174            &body[udh_len + 1..]
175        } else {
176            // Malformed UDH? Return raw bytes to be safe.
177            return MessageBody::Binary(body.to_vec());
178        }
179    } else {
180        body
181    };
182
183    // 2. Decode based on detected type
184    match detect_raw_encoding(dcs) {
185        RawEncoding::Gsm7Bit => MessageBody::Text(gsm_7bit_decode(payload)),
186        RawEncoding::Latin1 => MessageBody::Text(decode_8bit(payload)),
187        RawEncoding::Ucs2 => MessageBody::Text(decode_16bit(payload)),
188        RawEncoding::Binary8Bit => MessageBody::Binary(payload.to_vec()),
189    }
190}