Skip to main content

smpp_codec/
encoding.rs

1//! # Content Encoding/Decoding Module
2//!
3//! This module contains encoding/decoding logic for message content
4use std::collections::HashMap;
5
6const GSM_BASIC_CHARSET: &str = "@£$¥èéùìòÇ\nØø\rÅåΔ_ΦΓΛΩΠΨΣΘΞ\x1bÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑÜ`¿abcdefghijklmnopqrstuvwxyzäöñüà";
7
8/// Encodes text into GSM 7-bit packed format (unpacked representation).
9///
10/// Returns an error if the text contains characters not supported by GSM 03.38.
11///
12/// # Examples
13///
14/// ```
15/// use smpp_codec::encoding::gsm_7bit_encode;
16/// let encoded = gsm_7bit_encode("Hello").unwrap();
17/// assert_eq!(encoded.len(), 5);
18/// ```
19pub fn gsm_7bit_encode(text: &str) -> Result<Vec<u8>, String> {
20    let mut encoded_text = Vec::new();
21    let mut gsm_extended_charset = HashMap::new();
22    gsm_extended_charset.insert('^', 20);
23    gsm_extended_charset.insert('{', 40);
24    gsm_extended_charset.insert('}', 41);
25    gsm_extended_charset.insert('\\', 47);
26    gsm_extended_charset.insert('[', 60);
27    gsm_extended_charset.insert('~', 61);
28    gsm_extended_charset.insert(']', 62);
29    gsm_extended_charset.insert('|', 64);
30    gsm_extended_charset.insert('€', 101);
31
32    for char in text.chars() {
33        if let Some(index) = GSM_BASIC_CHARSET.chars().position(|c| c == char) {
34            encoded_text.push(index as u8);
35        } else if let Some(&code) = gsm_extended_charset.get(&char) {
36            encoded_text.push(0x1B);
37            encoded_text.push(code);
38        } else {
39            return Err(format!("Character '{}' not supported in GSM 03.38", char));
40        }
41    }
42    Ok(encoded_text)
43}
44
45/// Encodes text into 8-bit Latin1 (ISO-8859-1).
46/// Replaces unsupported characters with '?'.
47pub fn encode_8bit(text: &str) -> Vec<u8> {
48    text.chars()
49        .map(|c| if (c as u32) <= 0xFF { c as u8 } else { b'?' })
50        .collect()
51}
52
53/// Encodes text into 16-bit UCS-2 (Big Endian).
54pub fn encode_16bit(text: &str) -> Vec<u8> {
55    text.encode_utf16().flat_map(|u| u.to_be_bytes()).collect()
56}
57
58/// Decodes GSM 7-bit data (unpacked) into a String.
59///
60/// # Examples
61///
62/// ```
63/// use smpp_codec::encoding::gsm_7bit_decode;
64/// let decoded = gsm_7bit_decode(&[0x48, 0x65, 0x6C, 0x6C, 0x6F]);
65/// assert_eq!(decoded, "Hello");
66/// ```
67pub fn gsm_7bit_decode(bytes: &[u8]) -> String {
68    let basic_chars: Vec<char> = GSM_BASIC_CHARSET.chars().collect();
69    let mut result = String::new();
70    let mut i = 0;
71
72    while i < bytes.len() {
73        let byte = bytes[i];
74        if byte == 0x1B {
75            // Handle Extended Character
76            if i + 1 < bytes.len() {
77                let next_byte = bytes[i + 1];
78                let decoded_char = match next_byte {
79                    20 => '^',
80                    40 => '{',
81                    41 => '}',
82                    47 => '\\',
83                    60 => '[',
84                    61 => '~',
85                    62 => ']',
86                    64 => '|',
87                    101 => '€',
88                    _ => '?', // Unknown extended char
89                };
90                result.push(decoded_char);
91                i += 2; // Skip escape + char
92            } else {
93                i += 1; // Trailing escape, ignore
94            }
95        } else {
96            // Handle Basic Character
97            if (byte as usize) < basic_chars.len() {
98                result.push(basic_chars[byte as usize]);
99            } else {
100                result.push('?');
101            }
102            i += 1;
103        }
104    }
105    result
106}
107
108/// Decodes 8-bit Latin1 (ISO-8859-1) data into a String.
109pub fn decode_8bit(bytes: &[u8]) -> String {
110    // Latin1 (ISO-8859-1) maps 1:1 to first 256 Unicode code points
111    bytes.iter().map(|&b| b as char).collect()
112}
113
114/// Decodes 16-bit UCS-2 (Big Endian) data into a String.
115pub fn decode_16bit(bytes: &[u8]) -> String {
116    // UCS-2 (Big Endian)
117    let u16_vec: Vec<u16> = bytes
118        .chunks_exact(2)
119        .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
120        .collect();
121    String::from_utf16_lossy(&u16_vec)
122}
123
124// --- SMSC HELPER ---
125
126/// Represents the body of a Short Message, either Text or Binary.
127#[derive(Debug, PartialEq, Clone)]
128pub enum MessageBody {
129    /// Text content (decoded from GSM7, Latin1, or UCS2)
130    Text(String), // It was GSM7, UCS2, or Latin1
131    /// Binary content (8-bit Data, Class 2, or Unknown)
132    Binary(Vec<u8>), // It was Class 2, 8-bit Data, or Unknown
133}
134
135#[derive(Debug, Clone, Copy)]
136enum RawEncoding {
137    Gsm7Bit,
138    Latin1,
139    Ucs2,
140    Binary8Bit, // Pure data (Class 2, OTA, etc)
141}
142
143/// distinguishing Latin1 (0x03) from Binary (0x04, Class 2)
144fn detect_raw_encoding(dcs: u8) -> RawEncoding {
145    match dcs {
146        // Standard "Safe" Values
147        0x00 | 0x01 => RawEncoding::Gsm7Bit,
148        0x03 => RawEncoding::Latin1, // Explicit Latin-1
149        0x08 => RawEncoding::Ucs2,
150        0x02 | 0x04 => RawEncoding::Binary8Bit, // Explicit 8-bit Data
151
152        // Bitmask / Classes logic
153        _ => {
154            let group = dcs >> 4;
155            match group {
156                // Group 00xx: General Data Coding
157                0x00..=0x03 => {
158                    match (dcs & 0x0C) >> 2 {
159                        0x02 => RawEncoding::Ucs2,
160                        0x01 => RawEncoding::Binary8Bit, // 8-bit data
161                        _ => RawEncoding::Gsm7Bit,
162                    }
163                }
164                // Group 1111: Data Coding / Message Class (OTA often lives here)
165                0x0F => {
166                    if (dcs & 0x04) != 0 {
167                        RawEncoding::Binary8Bit // 8-bit Data
168                    } else {
169                        RawEncoding::Gsm7Bit
170                    }
171                }
172                _ => RawEncoding::Binary8Bit, // Treat unknown as binary to be safe
173            }
174        }
175    }
176}
177
178// The Public Helper
179/// Processes the raw message body based on Data Coding Scheme (DCS) and UDHI flag.
180/// Returns a `MessageBody` which is either `Text` (if decodable) or `Binary`.
181pub fn process_body(body: &[u8], dcs: u8, udhi: bool) -> MessageBody {
182    // 1. Strip UDH if present
183    let payload = if udhi && !body.is_empty() {
184        let udh_len = body[0] as usize;
185        if body.len() > udh_len + 1 {
186            &body[udh_len + 1..]
187        } else {
188            // Malformed UDH? Return raw bytes to be safe.
189            return MessageBody::Binary(body.to_vec());
190        }
191    } else {
192        body
193    };
194
195    // 2. Decode based on detected type
196    match detect_raw_encoding(dcs) {
197        RawEncoding::Gsm7Bit => MessageBody::Text(gsm_7bit_decode(payload)),
198        RawEncoding::Latin1 => MessageBody::Text(decode_8bit(payload)),
199        RawEncoding::Ucs2 => MessageBody::Text(decode_16bit(payload)),
200        RawEncoding::Binary8Bit => MessageBody::Binary(payload.to_vec()),
201    }
202}