pub fn to_char_code(c: char) -> u8 {
c as u8
}
pub fn to_hex_string(num: u8) -> String {
format!("{:02X}", num)
}
pub fn to_hex_string_of_min_length(num: u16, min_length: usize) -> String {
let hex = format!("{:X}", num);
if hex.len() < min_length {
let padding = "0".repeat(min_length - hex.len());
format!("{}{}", padding, hex)
} else {
hex
}
}
pub fn char_from_hex_code(hex: &str) -> char {
u8::from_str_radix(hex, 16).unwrap_or(0) as char
}
pub fn copy_string_into_buffer(s: &str, buffer: &mut [u8], offset: usize) -> usize {
let bytes = s.as_bytes();
let len = bytes.len();
buffer[offset..offset + len].copy_from_slice(bytes);
len
}
pub fn number_to_string(value: f64) -> String {
if value.fract() == 0.0 && value.abs() < 1e20 {
format!("{}", value as i64)
} else if value.abs() >= 1e20 || (value != 0.0 && value.abs() < 1e-6) {
format_no_exponent(value)
} else {
let s = format!("{}", value);
if s.contains('.') {
let trimmed = s.trim_end_matches('0');
let trimmed = trimmed.trim_end_matches('.');
trimmed.to_string()
} else {
s
}
}
}
fn format_no_exponent(value: f64) -> String {
let s = format!("{:.50}", value);
if s.contains('.') {
let trimmed = s.trim_end_matches('0');
let trimmed = trimmed.trim_end_matches('.');
trimmed.to_string()
} else {
s
}
}
pub fn typed_array_for(s: &str) -> Vec<u8> {
s.bytes().collect()
}
pub fn array_as_string(bytes: &[u8]) -> String {
bytes.iter().map(|&b| b as char).collect()
}
pub fn merge_into_typed_array(parts: &[&[u8]]) -> Vec<u8> {
let total_len: usize = parts.iter().map(|p| p.len()).sum();
let mut result = Vec::with_capacity(total_len);
for part in parts {
result.extend_from_slice(part);
}
result
}
pub fn has_utf16_bom(bytes: &[u8]) -> bool {
bytes.len() >= 2 && ((bytes[0] == 0xFE && bytes[1] == 0xFF) || (bytes[0] == 0xFF && bytes[1] == 0xFE))
}
pub fn utf16_decode(bytes: &[u8]) -> String {
if bytes.len() < 2 {
return String::new();
}
let big_endian = bytes[0] == 0xFE && bytes[1] == 0xFF;
let data = &bytes[2..];
let mut code_units: Vec<u16> = Vec::with_capacity(data.len() / 2);
let mut i = 0;
while i + 1 < data.len() {
let unit = if big_endian {
((data[i] as u16) << 8) | (data[i + 1] as u16)
} else {
((data[i + 1] as u16) << 8) | (data[i] as u16)
};
code_units.push(unit);
i += 2;
}
String::from_utf16_lossy(&code_units)
}
pub fn utf16_encode(text: &str) -> Vec<u16> {
let mut result = vec![0xFEFF]; for c in text.chars() {
let mut buf = [0u16; 2];
let encoded = c.encode_utf16(&mut buf);
result.extend_from_slice(encoded);
}
result
}
pub fn pdf_doc_encoding_decode(bytes: &[u8]) -> String {
bytes.iter().map(|&b| {
match b {
0x00..=0x7F => b as char,
0x80 => '\u{2022}', 0x81 => '\u{2020}', 0x82 => '\u{2021}', 0x83 => '\u{2026}', 0x84 => '\u{2014}', 0x85 => '\u{2013}', 0x86 => '\u{0192}', 0x87 => '\u{2044}', 0x88 => '\u{2039}', 0x89 => '\u{203A}', 0x8A => '\u{2212}', 0x8B => '\u{2030}', 0x8C => '\u{201E}', 0x8D => '\u{201C}', 0x8E => '\u{201D}', 0x8F => '\u{2018}', 0x90 => '\u{2019}', 0x91 => '\u{201A}', 0x92 => '\u{2122}', 0x93 => '\u{FB01}', 0x94 => '\u{FB02}', 0x95 => '\u{0141}', 0x96 => '\u{0152}', 0x97 => '\u{0160}', 0x98 => '\u{0178}', 0x99 => '\u{017D}', 0x9A => '\u{0131}', 0x9B => '\u{0142}', 0x9C => '\u{0153}', 0x9D => '\u{0161}', 0x9E => '\u{017E}', 0x9F => '\u{FFFD}', 0xA0 => '\u{00A0}',
0xA1 => '\u{00A1}',
0xA2..=0xAC => b as char,
0xAD => '\u{00AD}', 0xAE..=0xFF => b as char,
}
}).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_to_char_code() {
assert_eq!(to_char_code('A'), 65);
assert_eq!(to_char_code(' '), 32);
assert_eq!(to_char_code('\n'), 10);
}
#[test]
fn test_to_hex_string() {
assert_eq!(to_hex_string(0), "00");
assert_eq!(to_hex_string(255), "FF");
assert_eq!(to_hex_string(16), "10");
assert_eq!(to_hex_string(9), "09");
}
#[test]
fn test_char_from_hex_code() {
assert_eq!(char_from_hex_code("20"), ' ');
assert_eq!(char_from_hex_code("41"), 'A');
assert_eq!(char_from_hex_code("42"), 'B');
}
#[test]
fn test_copy_string_into_buffer() {
let mut buf = vec![b' '; 10];
let written = copy_string_into_buffer("hello", &mut buf, 2);
assert_eq!(written, 5);
assert_eq!(&buf, b" hello ");
}
#[test]
fn test_number_to_string_integers() {
assert_eq!(number_to_string(21.0), "21");
assert_eq!(number_to_string(-43.0), "-43");
assert_eq!(number_to_string(0.0), "0");
}
#[test]
fn test_typed_array_for() {
assert_eq!(typed_array_for("ABC"), vec![65, 66, 67]);
assert_eq!(typed_array_for(" "), vec![32, 32, 32]);
}
#[test]
fn test_has_utf16_bom() {
assert!(has_utf16_bom(&[0xFE, 0xFF, 0x00, 0x41])); assert!(has_utf16_bom(&[0xFF, 0xFE, 0x41, 0x00])); assert!(!has_utf16_bom(&[0x41, 0x42]));
assert!(!has_utf16_bom(&[0xFE]));
}
#[test]
fn test_utf16_decode_be() {
let bytes = vec![0xFE, 0xFF, 0x00, 0x45, 0x00, 0x67, 0x00, 0x67, 0x00, 0x20];
assert_eq!(utf16_decode(&bytes), "Egg ");
}
#[test]
fn test_utf16_decode_le() {
let bytes = vec![0xFF, 0xFE, 0x45, 0x00, 0x67, 0x00, 0x67, 0x00, 0x20, 0x00];
assert_eq!(utf16_decode(&bytes), "Egg ");
}
#[test]
fn test_utf16_encode() {
let encoded = utf16_encode("");
assert_eq!(encoded, vec![0xFEFF]);
let encoded = utf16_encode("A");
assert_eq!(encoded, vec![0xFEFF, 0x0041]);
}
#[test]
fn test_pdf_doc_encoding_decode_ascii() {
let bytes = vec![0x61, 0x45, 0x62, 0x73]; assert_eq!(pdf_doc_encoding_decode(&bytes), "aEbs");
}
}