Skip to main content

dxcode/
lib.rs

1//! dxcode - 带有 `dx` 前缀的自定义编码算法
2//!
3//! Rust 实现 - 带 CRC16 校验和
4//!
5//! # 示例
6//!
7//! ```
8//! use dxcode::{encode, decode, encode_str, decode_str};
9//!
10//! // 编码字符串
11//! let encoded = encode_str("你好,Dogxi!");
12//! println!("{}", encoded); // dxXXXX...
13//!
14//! // 解码(自动验证校验和)
15//! let decoded = decode_str(&encoded).unwrap();
16//! println!("{}", decoded); // 你好,Dogxi!
17//!
18//! // 验证完整性
19//! use dxcode::verify;
20//! assert!(verify(&encoded).unwrap());
21//! ```
22//!
23//! # 作者
24//!
25//! Dogxi
26//!
27//! # 版本
28//!
29//! 2.0.0
30//!
31//! # 许可证
32//!
33//! MIT
34
35use std::collections::HashMap;
36use std::error::Error;
37use std::fmt;
38use std::sync::LazyLock;
39
40/// DX 字符集 - 以 DXdx 开头作为签名,共64个字符
41pub const CHARSET: &str = "DXdx0123456789ABCEFGHIJKLMNOPQRSTUVWYZabcefghijklmnopqrstuvwyz-_";
42
43/// 魔数 - 用于 XOR 变换,'D' 的 ASCII 值
44pub const MAGIC: u8 = 0x44;
45
46/// 前缀
47pub const PREFIX: &str = "dx";
48
49/// 填充字符
50pub const PADDING: char = '=';
51
52/// 头部大小(原始字节): 2 字节 CRC16
53const HEADER_SIZE: usize = 2;
54
55/// 字符集字节数组
56static CHARSET_BYTES: LazyLock<Vec<u8>> = LazyLock::new(|| CHARSET.as_bytes().to_vec());
57
58/// 反向查找表
59static DECODE_MAP: LazyLock<HashMap<u8, u8>> = LazyLock::new(|| {
60    let mut map = HashMap::new();
61    for (i, &byte) in CHARSET_BYTES.iter().enumerate() {
62        map.insert(byte, i as u8);
63    }
64    map
65});
66
67/// CRC16 查找表 (CRC-16-CCITT)
68static CRC16_TABLE: LazyLock<[u16; 256]> = LazyLock::new(|| {
69    let mut table = [0u16; 256];
70    for i in 0..256 {
71        let mut crc = (i as u16) << 8;
72        for _ in 0..8 {
73            if crc & 0x8000 != 0 {
74                crc = (crc << 1) ^ 0x1021;
75            } else {
76                crc <<= 1;
77            }
78        }
79        table[i] = crc;
80    }
81    table
82});
83
84/// DX 编码错误类型
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub enum DxError {
87    /// 缺少 dx 前缀
88    InvalidPrefix,
89    /// 长度不正确
90    InvalidLength,
91    /// 包含非法字符
92    InvalidCharacter(char),
93    /// UTF-8 解码错误
94    Utf8Error(String),
95    /// 校验和不匹配
96    ChecksumMismatch { expected: u16, actual: u16 },
97    /// 头部无效
98    InvalidHeader,
99}
100
101impl fmt::Display for DxError {
102    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103        match self {
104            DxError::InvalidPrefix => write!(f, "无效的 DX 编码:缺少 dx 前缀"),
105            DxError::InvalidLength => write!(f, "无效的 DX 编码:长度不正确"),
106            DxError::InvalidCharacter(c) => write!(f, "无效的 DX 编码:包含非法字符 '{}'", c),
107            DxError::Utf8Error(s) => write!(f, "UTF-8 解码错误:{}", s),
108            DxError::ChecksumMismatch { expected, actual } => {
109                write!(f, "校验和不匹配:期望 0x{:04X},实际 0x{:04X}", expected, actual)
110            }
111            DxError::InvalidHeader => write!(f, "无效的格式头部"),
112        }
113    }
114}
115
116impl Error for DxError {}
117
118/// DX 编码结果类型
119pub type Result<T> = std::result::Result<T, DxError>;
120
121/// 计算 CRC16-CCITT 校验和
122pub fn crc16(data: &[u8]) -> u16 {
123    let mut crc: u16 = 0xFFFF;
124    for &byte in data {
125        let index = ((crc >> 8) ^ (byte as u16)) as usize;
126        crc = (crc << 8) ^ CRC16_TABLE[index];
127    }
128    crc
129}
130
131/// 内部编码函数(不带前缀)
132fn encode_raw(data: &[u8]) -> String {
133    if data.is_empty() {
134        return String::new();
135    }
136
137    let mut result = String::with_capacity((data.len() + 2) / 3 * 4);
138    let charset = &*CHARSET_BYTES;
139
140    // 每 3 字节处理一组
141    for chunk in data.chunks(3) {
142        let b0 = chunk[0];
143        let b1 = chunk.get(1).copied().unwrap_or(0);
144        let b2 = chunk.get(2).copied().unwrap_or(0);
145
146        // 将 3 字节(24位)分成 4 个 6 位组
147        let v0 = (b0 >> 2) & 0x3F;
148        let v1 = ((b0 & 0x03) << 4 | (b1 >> 4)) & 0x3F;
149        let v2 = ((b1 & 0x0F) << 2 | (b2 >> 6)) & 0x3F;
150        let v3 = b2 & 0x3F;
151
152        // XOR 变换并映射到字符
153        result.push(charset[((v0 ^ MAGIC) & 0x3F) as usize] as char);
154        result.push(charset[((v1 ^ MAGIC) & 0x3F) as usize] as char);
155
156        if chunk.len() > 1 {
157            result.push(charset[((v2 ^ MAGIC) & 0x3F) as usize] as char);
158        } else {
159            result.push(PADDING);
160        }
161
162        if chunk.len() > 2 {
163            result.push(charset[((v3 ^ MAGIC) & 0x3F) as usize] as char);
164        } else {
165            result.push(PADDING);
166        }
167    }
168
169    result
170}
171
172/// 内部解码函数(不带前缀验证)
173fn decode_raw(data: &str) -> Result<Vec<u8>> {
174    if data.is_empty() {
175        return Ok(Vec::new());
176    }
177
178    // 验证长度
179    if data.len() % 4 != 0 {
180        return Err(DxError::InvalidLength);
181    }
182
183    // 计算填充数量
184    let padding_count = if data.ends_with("==") {
185        2
186    } else if data.ends_with('=') {
187        1
188    } else {
189        0
190    };
191
192    // 计算输出长度
193    let output_len = (data.len() / 4) * 3 - padding_count;
194    let mut result = Vec::with_capacity(output_len);
195
196    let decode_map = &*DECODE_MAP;
197    let data_bytes = data.as_bytes();
198
199    // 每 4 字符处理一组
200    for chunk in data_bytes.chunks(4) {
201        let c0 = chunk[0];
202        let c1 = chunk[1];
203        let c2 = chunk[2];
204        let c3 = chunk[3];
205
206        // 字符转索引
207        let i0 = *decode_map
208            .get(&c0)
209            .ok_or_else(|| DxError::InvalidCharacter(c0 as char))?;
210        let i1 = *decode_map
211            .get(&c1)
212            .ok_or_else(|| DxError::InvalidCharacter(c1 as char))?;
213
214        let i2 = if c2 == PADDING as u8 {
215            0
216        } else {
217            *decode_map
218                .get(&c2)
219                .ok_or_else(|| DxError::InvalidCharacter(c2 as char))?
220        };
221
222        let i3 = if c3 == PADDING as u8 {
223            0
224        } else {
225            *decode_map
226                .get(&c3)
227                .ok_or_else(|| DxError::InvalidCharacter(c3 as char))?
228        };
229
230        // XOR 逆变换
231        let v0 = (i0 ^ MAGIC) & 0x3F;
232        let v1 = (i1 ^ MAGIC) & 0x3F;
233        let v2 = (i2 ^ MAGIC) & 0x3F;
234        let v3 = (i3 ^ MAGIC) & 0x3F;
235
236        // 重建字节
237        let b0 = (v0 << 2) | (v1 >> 4);
238        let b1 = ((v1 & 0x0F) << 4) | (v2 >> 2);
239        let b2 = ((v2 & 0x03) << 6) | v3;
240
241        if result.len() < output_len {
242            result.push(b0);
243        }
244        if result.len() < output_len {
245            result.push(b1);
246        }
247        if result.len() < output_len {
248            result.push(b2);
249        }
250    }
251
252    Ok(result)
253}
254
255/// 将字节切片编码为 DX 格式(带 CRC16 校验和)
256///
257/// # 参数
258///
259/// * `data` - 要编码的字节数据
260///
261/// # 返回值
262///
263/// 以 'dx' 为前缀、包含 CRC16 校验和的编码字符串
264///
265/// # 示例
266///
267/// ```
268/// use dxcode::encode;
269///
270/// let encoded = encode(b"Hello, Dogxi!");
271/// assert!(encoded.starts_with("dx"));
272/// ```
273pub fn encode(data: &[u8]) -> String {
274    // 计算 CRC16
275    let checksum = crc16(data);
276
277    // 构建头部(2字节 CRC16,大端序)
278    let header = [(checksum >> 8) as u8, (checksum & 0xFF) as u8];
279
280    // 合并头部和数据
281    let mut combined = Vec::with_capacity(HEADER_SIZE + data.len());
282    combined.extend_from_slice(&header);
283    combined.extend_from_slice(data);
284
285    // 编码
286    let mut result = String::with_capacity(PREFIX.len() + (combined.len() + 2) / 3 * 4);
287    result.push_str(PREFIX);
288    result.push_str(&encode_raw(&combined));
289    result
290}
291
292/// 将字符串编码为 DX 格式(带 CRC16 校验和)
293///
294/// # 参数
295///
296/// * `s` - 要编码的字符串
297///
298/// # 返回值
299///
300/// 以 'dx' 为前缀、包含 CRC16 校验和的编码字符串
301///
302/// # 示例
303///
304/// ```
305/// use dxcode::encode_str;
306///
307/// let encoded = encode_str("你好,Dogxi!");
308/// assert!(encoded.starts_with("dx"));
309/// ```
310pub fn encode_str(s: &str) -> String {
311    encode(s.as_bytes())
312}
313
314/// 将 DX 编码的字符串解码为字节向量(带校验和验证)
315///
316/// # 参数
317///
318/// * `encoded` - DX 编码的字符串(必须以 'dx' 开头)
319///
320/// # 返回值
321///
322/// 解码后的字节向量,如果输入无效或校验和不匹配则返回错误
323///
324/// # 示例
325///
326/// ```
327/// use dxcode::{encode, decode};
328///
329/// let encoded = encode(b"Hello");
330/// let decoded = decode(&encoded).unwrap();
331/// assert_eq!(decoded, b"Hello");
332/// ```
333pub fn decode(encoded: &str) -> Result<Vec<u8>> {
334    // 验证前缀
335    if !encoded.starts_with(PREFIX) {
336        return Err(DxError::InvalidPrefix);
337    }
338
339    // 移除前缀
340    let data = &encoded[PREFIX.len()..];
341
342    // 解码
343    let combined = decode_raw(data)?;
344
345    // 验证长度
346    if combined.len() < HEADER_SIZE {
347        return Err(DxError::InvalidHeader);
348    }
349
350    // 提取头部
351    let expected_checksum = ((combined[0] as u16) << 8) | (combined[1] as u16);
352
353    // 提取数据
354    let payload = &combined[HEADER_SIZE..];
355
356    // 验证校验和
357    let actual_checksum = crc16(payload);
358    if expected_checksum != actual_checksum {
359        return Err(DxError::ChecksumMismatch {
360            expected: expected_checksum,
361            actual: actual_checksum,
362        });
363    }
364
365    Ok(payload.to_vec())
366}
367
368/// 将 DX 编码的字符串解码为字符串(带校验和验证)
369///
370/// # 参数
371///
372/// * `encoded` - DX 编码的字符串
373///
374/// # 返回值
375///
376/// 解码后的字符串,如果输入无效、校验和不匹配或不是有效的 UTF-8 则返回错误
377///
378/// # 示例
379///
380/// ```
381/// use dxcode::{encode_str, decode_str};
382///
383/// let encoded = encode_str("你好,Dogxi!");
384/// let decoded = decode_str(&encoded).unwrap();
385/// assert_eq!(decoded, "你好,Dogxi!");
386/// ```
387pub fn decode_str(encoded: &str) -> Result<String> {
388    let bytes = decode(encoded)?;
389    String::from_utf8(bytes).map_err(|e| DxError::Utf8Error(e.to_string()))
390}
391
392/// 检查字符串是否为有效的 DX 编码
393///
394/// # 参数
395///
396/// * `s` - 要检查的字符串
397///
398/// # 返回值
399///
400/// 如果是有效的 DX 编码返回 `true`,否则返回 `false`
401///
402/// # 示例
403///
404/// ```
405/// use dxcode::{encode_str, is_encoded};
406///
407/// let encoded = encode_str("Hello");
408/// assert!(is_encoded(&encoded));
409/// assert!(!is_encoded("hello"));
410/// ```
411pub fn is_encoded(s: &str) -> bool {
412    if !s.starts_with(PREFIX) {
413        return false;
414    }
415
416    let data = &s[PREFIX.len()..];
417
418    // 检查长度(至少需要头部)
419    if data.is_empty() || data.len() % 4 != 0 {
420        return false;
421    }
422
423    let decode_map = &*DECODE_MAP;
424
425    // 检查字符
426    for (i, c) in data.bytes().enumerate() {
427        if c == PADDING as u8 {
428            // 填充只能在末尾
429            if i < data.len() - 2 {
430                return false;
431            }
432        } else if !decode_map.contains_key(&c) {
433            return false;
434        }
435    }
436
437    true
438}
439
440/// 验证 DX 编码的校验和(不返回解码数据)
441///
442/// # 参数
443///
444/// * `encoded` - DX 编码的字符串
445///
446/// # 返回值
447///
448/// 如果校验和匹配返回 `Ok(true)`,不匹配返回 `Ok(false)`,格式无效返回错误
449///
450/// # 示例
451///
452/// ```
453/// use dxcode::{encode_str, verify};
454///
455/// let encoded = encode_str("Hello");
456/// assert!(verify(&encoded).unwrap());
457/// ```
458pub fn verify(encoded: &str) -> Result<bool> {
459    match decode(encoded) {
460        Ok(_) => Ok(true),
461        Err(DxError::ChecksumMismatch { .. }) => Ok(false),
462        Err(e) => Err(e),
463    }
464}
465
466/// 获取 DX 编码的校验和信息
467///
468/// # 参数
469///
470/// * `encoded` - DX 编码的字符串
471///
472/// # 返回值
473///
474/// 返回 `(存储的校验和, 实际计算的校验和)`
475///
476/// # 示例
477///
478/// ```
479/// use dxcode::{encode_str, get_checksum};
480///
481/// let encoded = encode_str("Hello");
482/// let (stored, computed) = get_checksum(&encoded).unwrap();
483/// assert_eq!(stored, computed);
484/// ```
485pub fn get_checksum(encoded: &str) -> Result<(u16, u16)> {
486    // 验证前缀
487    if !encoded.starts_with(PREFIX) {
488        return Err(DxError::InvalidPrefix);
489    }
490
491    // 移除前缀
492    let data = &encoded[PREFIX.len()..];
493
494    // 解码
495    let combined = decode_raw(data)?;
496
497    // 验证长度
498    if combined.len() < HEADER_SIZE {
499        return Err(DxError::InvalidHeader);
500    }
501
502    // 提取校验和
503    let stored = ((combined[0] as u16) << 8) | (combined[1] as u16);
504    let payload = &combined[HEADER_SIZE..];
505    let computed = crc16(payload);
506
507    Ok((stored, computed))
508}
509
510/// DX 编码信息
511#[derive(Debug, Clone)]
512pub struct Info {
513    pub name: &'static str,
514    pub version: &'static str,
515    pub author: &'static str,
516    pub charset: &'static str,
517    pub prefix: &'static str,
518    pub magic: u8,
519    pub padding: char,
520    pub checksum: &'static str,
521}
522
523/// 获取 DX 编码的信息
524///
525/// # 返回值
526///
527/// 包含版本、作者、字符集等信息的 `Info` 结构体
528///
529/// # 示例
530///
531/// ```
532/// use dxcode::get_info;
533///
534/// let info = get_info();
535/// println!("名称: {}", info.name);
536/// println!("作者: {}", info.author);
537/// ```
538pub fn get_info() -> Info {
539    Info {
540        name: "DX Encoding",
541        version: "2.0.0",
542        author: "Dogxi",
543        charset: CHARSET,
544        prefix: PREFIX,
545        magic: MAGIC,
546        padding: PADDING,
547        checksum: "CRC16-CCITT",
548    }
549}
550
551#[cfg(test)]
552mod tests {
553    use super::*;
554
555    #[test]
556    fn test_simple_string() {
557        let original = "Hello";
558        let encoded = encode_str(original);
559        let decoded = decode_str(&encoded).unwrap();
560        assert_eq!(decoded, original);
561        assert!(encoded.starts_with("dx"));
562    }
563
564    #[test]
565    fn test_chinese_string() {
566        let original = "你好,世界!";
567        let encoded = encode_str(original);
568        let decoded = decode_str(&encoded).unwrap();
569        assert_eq!(decoded, original);
570    }
571
572    #[test]
573    fn test_emoji() {
574        let original = "🎉🚀✨";
575        let encoded = encode_str(original);
576        let decoded = decode_str(&encoded).unwrap();
577        assert_eq!(decoded, original);
578    }
579
580    #[test]
581    fn test_empty_string() {
582        let original = "";
583        let encoded = encode_str(original);
584        let decoded = decode_str(&encoded).unwrap();
585        assert_eq!(decoded, original);
586        assert!(encoded.starts_with("dx"));
587    }
588
589    #[test]
590    fn test_binary_data() {
591        let original: Vec<u8> = vec![0x00, 0x01, 0x02, 0xFE, 0xFF];
592        let encoded = encode(&original);
593        let decoded = decode(&encoded).unwrap();
594        assert_eq!(decoded, original);
595    }
596
597    #[test]
598    fn test_all_byte_values() {
599        let original: Vec<u8> = (0..=255).collect();
600        let encoded = encode(&original);
601        let decoded = decode(&encoded).unwrap();
602        assert_eq!(decoded, original);
603    }
604
605    #[test]
606    fn test_is_encoded() {
607        let encoded = encode_str("Hello");
608        assert!(is_encoded(&encoded));
609        assert!(!is_encoded("hello"));
610        assert!(!is_encoded(""));
611        assert!(!is_encoded("dxABC")); // 长度不对
612    }
613
614    #[test]
615    fn test_decode_invalid_prefix() {
616        let result = decode("invalid");
617        assert!(matches!(result, Err(DxError::InvalidPrefix)));
618    }
619
620    #[test]
621    fn test_decode_invalid_length() {
622        let result = decode("dxABC");
623        assert!(matches!(result, Err(DxError::InvalidLength)));
624    }
625
626    #[test]
627    fn test_checksum_verification() {
628        let encoded = encode_str("Hello");
629        assert!(verify(&encoded).unwrap());
630
631        let (stored, computed) = get_checksum(&encoded).unwrap();
632        assert_eq!(stored, computed);
633    }
634
635    #[test]
636    fn test_checksum_mismatch() {
637        let encoded = encode_str("Hello World Test Data");
638
639        // 篡改数据 - 修改数据部分(跳过前缀和校验和头部区域)
640        // 编码格式: "dx" + 编码后的(2字节CRC + 数据)
641        // 我们需要修改数据部分的字符
642        let mut chars: Vec<char> = encoded.chars().collect();
643
644        // 找到一个可以修改的位置(跳过 "dx" 前缀,在数据部分修改)
645        // 修改位置 6(在数据区域内)
646        if chars.len() > 10 {
647            let pos = 10;
648            let original_char = chars[pos];
649            // 用字符集中的另一个有效字符替换
650            chars[pos] = if original_char == 'A' { 'B' } else { 'A' };
651        }
652
653        let modified: String = chars.into_iter().collect();
654
655        // 验证应该失败(校验和不匹配或无效字符)
656        let result = decode(&modified);
657        assert!(
658            matches!(result, Err(DxError::ChecksumMismatch { .. }))
659                || matches!(result, Err(DxError::InvalidCharacter(_)))
660        );
661    }
662
663    #[test]
664    fn test_get_info() {
665        let info = get_info();
666        assert_eq!(info.name, "DX Encoding");
667        assert_eq!(info.author, "Dogxi");
668        assert_eq!(info.prefix, "dx");
669        assert_eq!(info.magic, 0x44);
670        assert_eq!(info.charset.len(), 64);
671        assert_eq!(info.version, "2.0.0");
672        assert_eq!(info.checksum, "CRC16-CCITT");
673    }
674
675    #[test]
676    fn test_various_lengths() {
677        for length in 0..100 {
678            let original: Vec<u8> = (0..length).map(|i| (i % 256) as u8).collect();
679            let encoded = encode(&original);
680            let decoded = decode(&encoded).unwrap();
681            assert_eq!(decoded, original, "长度 {} 失败", length);
682        }
683    }
684
685    #[test]
686    fn test_crc16() {
687        // 测试空数据
688        assert_eq!(crc16(&[]), 0xFFFF);
689
690        // 测试已知值 - CRC-16-CCITT for "123456789" should be 0x29B1
691        let data = b"123456789";
692        let crc = crc16(data);
693        assert_eq!(crc, 0x29B1);
694    }
695
696    #[test]
697    fn test_crc16_deterministic() {
698        let data = b"Hello, World!";
699        let crc1 = crc16(data);
700        let crc2 = crc16(data);
701        assert_eq!(crc1, crc2);
702    }
703
704    #[test]
705    fn test_verify_function() {
706        let encoded = encode_str("Test data for verification");
707        assert!(verify(&encoded).unwrap());
708    }
709}