1use std::fmt::Display;
2
3use crate::constants::{UTF16BE_BOM, UTF16LE_BOM, UTF16_BUFFER_SIZE, UTF8_BOM};
4
5#[derive(Debug, PartialEq, Clone, Copy)]
7pub enum Encoding {
8 Utf8,
9 Utf8Bom,
10 Utf16Be,
11 Utf16Le,
12}
13
14impl From<Encoding> for String {
15 fn from(encoding: Encoding) -> Self {
16 encoding.to_string()
17 }
18}
19
20impl Display for Encoding {
21 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22 match self {
23 Encoding::Utf8 => write!(f, "UTF-8"),
24 Encoding::Utf8Bom => write!(f, "UTF-8-BOM"),
25 Encoding::Utf16Be => write!(f, "UTF-16-BE"),
26 Encoding::Utf16Le => write!(f, "UTF-16-LE"),
27 }
28 }
29}
30
31pub fn to_utf8_bom(s: &String) -> Vec<u8> {
33 [UTF8_BOM, s.as_bytes()].concat()
34}
35
36pub fn to_utf16_be(s: &str) -> Vec<u8> {
38 let mut bytes = UTF16BE_BOM.to_vec();
39 let mut buffer = [0u16; UTF16_BUFFER_SIZE];
40 for c in s.chars() {
41 for u16_unit in c.encode_utf16(&mut buffer) {
42 bytes.extend_from_slice(u16_unit.to_be_bytes().as_slice())
43 }
44 }
45
46 bytes
47}
48
49pub fn to_utf16_le(s: &str) -> Vec<u8> {
51 let mut bytes = UTF16LE_BOM.to_vec();
52 let mut buffer = [0u16; UTF16_BUFFER_SIZE];
53 for c in s.chars() {
54 for u16_unit in c.encode_utf16(&mut buffer) {
55 bytes.extend_from_slice(u16_unit.to_le_bytes().as_slice())
56 }
57 }
58
59 bytes
60}
61
62#[cfg(test)]
63mod tests {
64 use test_case::test_case;
65
66 use super::{to_utf16_be, to_utf16_le, to_utf8_bom};
67
68 #[test_case("", b"\xEF\xBB\xBF"; "no chars")] #[test_case("Hello!", b"\xEF\xBB\xBF\x48\x65\x6C\x6C\x6F\x21"; "ascii chars (8-bit chars)")]
70 #[test_case("éüñç", b"\xEF\xBB\xBF\xC3\xA9\xC3\xBC\xC3\xB1\xC3\xA7"; "latin-1 chars (16-bit chars)")]
71 #[test_case("你好", b"\xEF\xBB\xBF\xE4\xBD\xA0\xE5\xA5\xBD"; "mandarin chars (24-bit chars)")]
72 #[test_case("🌍🚀", b"\xEF\xBB\xBF\xF0\x9F\x8C\x8D\xF0\x9F\x9A\x80"; "Supplementary Multilingual Plane chars (32-bit chars)")]
73 fn test_to_utf8_bom(input: &str, expected_bytes: &[u8]) {
74 let bytes = to_utf8_bom(&input.into());
75 assert_eq!(bytes, expected_bytes);
76 }
77
78 #[test_case("", b"\xFE\xFF"; "no chars")]
79 #[test_case("Hello!", b"\xFE\xFF\x00\x48\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x21"; "16-bit chars")]
80 #[test_case("🌍🚀", b"\xFE\xFF\xD8\x3C\xDF\x0D\xD8\x3D\xDE\x80"; "32-bit chars with BE BOM")]
81 #[test_case("Hello! 😊", b"\xFE\xFF\x00\x48\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x21\x00\x20\xD8\x3D\xDE\x0A"; "mixed-length chars with BE BOM")]
82 fn test_to_utf16_be(input: &str, expected_bytes: &[u8]) {
83 let bytes = to_utf16_be(input);
84 assert_eq!(bytes, expected_bytes);
85 }
86
87 #[test_case("", b"\xFF\xFE"; "no chars")]
88 #[test_case("Hello!", b"\xFF\xFE\x48\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x21\x00"; "16-bit chars")]
89 #[test_case("🌍🚀", b"\xFF\xFE\x3C\xD8\x0D\xDF\x3D\xD8\x80\xDE"; "32-bit chars with BE BOM")]
90 #[test_case("Hello! 😊", b"\xFF\xFE\x48\x00\x65\x00\x6C\x00\x6C\x00\x6F\x00\x21\x00\x20\x00\x3D\xD8\x0A\xDE"; "mixed-length chars with BE BOM")]
91 fn test_to_utf16_le(input: &str, expected_bytes: &[u8]) {
92 let bytes = to_utf16_le(input);
93 assert_eq!(bytes, expected_bytes);
94 }
95}