use encoding_rs::GBK;
use std::sync::atomic::{AtomicU32, Ordering};
pub const MSG_FMT_ASCII: u8 = 0;
pub const MSG_FMT_UCS2: u8 = 8;
pub const MSG_FMT_GBK: u8 = 15;
const SINGLE_MAX_BYTES: usize = 140;
const MULTIPART_MAX_BYTES: usize = 134;
static UDH_REF_COUNTER: AtomicU32 = AtomicU32::new(0);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SmsEncoding {
Ascii,
Ucs2,
Gbk,
}
impl SmsEncoding {
pub fn msg_fmt(self) -> u8 {
match self {
SmsEncoding::Ascii => MSG_FMT_ASCII,
SmsEncoding::Ucs2 => MSG_FMT_UCS2,
SmsEncoding::Gbk => MSG_FMT_GBK,
}
}
}
#[derive(Debug, Clone)]
pub struct SmsSegment {
pub msg_fmt: u8,
pub tp_udhi: u8,
pub pk_total: u8,
pub pk_number: u8,
pub content: Vec<u8>,
}
fn encode_char(c: char, enc: SmsEncoding) -> Vec<u8> {
match enc {
SmsEncoding::Ascii => vec![c as u8],
SmsEncoding::Ucs2 => {
let mut buf = [0u16; 2];
let units = c.encode_utf16(&mut buf);
let mut out = Vec::with_capacity(units.len() * 2);
for u in units.iter() {
out.extend_from_slice(&u.to_be_bytes());
}
out
}
SmsEncoding::Gbk => {
let mut tmp = [0u8; 4];
let s = c.encode_utf8(&mut tmp);
let (cow, _, _) = GBK.encode(s);
cow.into_owned()
}
}
}
pub fn choose_encoding(content: &str) -> SmsEncoding {
if content.is_ascii() {
SmsEncoding::Ascii
} else {
SmsEncoding::Ucs2
}
}
pub fn encode_content(content: &str, enc: SmsEncoding) -> Vec<u8> {
let mut out = Vec::new();
for c in content.chars() {
out.extend_from_slice(&encode_char(c, enc));
}
out
}
pub fn decode_msg_content(msg_fmt: u8, tp_udhi: u8, content: &[u8]) -> String {
let payload = if tp_udhi == 1 && content.len() > 6 {
&content[6..]
} else {
content
};
match msg_fmt {
MSG_FMT_ASCII => String::from_utf8_lossy(payload).into_owned(),
MSG_FMT_UCS2 => {
if payload.len() % 2 != 0 {
return String::from_utf8_lossy(payload).into_owned();
}
let units: Vec<u16> = payload
.chunks_exact(2)
.map(|c| u16::from_be_bytes([c[0], c[1]]))
.collect();
String::from_utf16_lossy(&units)
}
MSG_FMT_GBK => {
let (cow, _, _) = GBK.decode(payload);
cow.into_owned()
}
_ => String::from_utf8_lossy(payload).into_owned(),
}
}
pub fn split_content(content: &str) -> Vec<SmsSegment> {
let enc = choose_encoding(content);
let msg_fmt = enc.msg_fmt();
let units: Vec<Vec<u8>> = content.chars().map(|c| encode_char(c, enc)).collect();
let total_bytes: usize = units.iter().map(|u| u.len()).sum();
if total_bytes <= SINGLE_MAX_BYTES {
let mut body = Vec::with_capacity(total_bytes);
for u in &units {
body.extend_from_slice(u);
}
return vec![SmsSegment {
msg_fmt,
tp_udhi: 0,
pk_total: 1,
pk_number: 1,
content: body,
}];
}
let mut chunks: Vec<Vec<u8>> = Vec::new();
let mut current: Vec<u8> = Vec::new();
for u in &units {
if !current.is_empty() && current.len() + u.len() > MULTIPART_MAX_BYTES {
chunks.push(std::mem::take(&mut current));
}
current.extend_from_slice(u);
}
if !current.is_empty() {
chunks.push(current);
}
let total = chunks.len().min(255) as u8;
let ref_num = (UDH_REF_COUNTER.fetch_add(1, Ordering::Relaxed) & 0xFF) as u8;
chunks
.into_iter()
.enumerate()
.map(|(i, chunk)| {
let seq = (i + 1) as u8;
let mut body = Vec::with_capacity(6 + chunk.len());
body.extend_from_slice(&[0x05, 0x00, 0x03, ref_num, total, seq]);
body.extend_from_slice(&chunk);
SmsSegment {
msg_fmt,
tp_udhi: 1,
pk_total: total,
pk_number: seq,
content: body,
}
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ascii_single_segment() {
let segs = split_content("hello world");
assert_eq!(segs.len(), 1);
assert_eq!(segs[0].msg_fmt, MSG_FMT_ASCII);
assert_eq!(segs[0].tp_udhi, 0);
assert_eq!(segs[0].content, b"hello world");
}
#[test]
fn chinese_single_segment_ucs2() {
let segs = split_content("你好");
assert_eq!(segs.len(), 1);
assert_eq!(segs[0].msg_fmt, MSG_FMT_UCS2);
assert_eq!(segs[0].content, vec![0x4f, 0x60, 0x59, 0x7d]);
}
#[test]
fn long_chinese_splits_with_udh() {
let content: String = "中".repeat(71);
let segs = split_content(&content);
assert!(segs.len() >= 2);
let total = segs[0].pk_total;
for (i, seg) in segs.iter().enumerate() {
assert_eq!(seg.tp_udhi, 1);
assert_eq!(seg.pk_total, total);
assert_eq!(seg.pk_number as usize, i + 1);
assert_eq!(&seg.content[0..3], &[0x05, 0x00, 0x03]);
assert!(seg.content.len() <= SINGLE_MAX_BYTES);
}
}
#[test]
fn surrogate_pair_not_split() {
let content: String = "\u{1F600}".repeat(40);
let segs = split_content(&content);
assert!(segs.len() >= 2);
for seg in &segs {
let payload = seg.content.len() - 6;
assert_eq!(payload % 4, 0);
}
}
}