pub(crate) fn decode_pdf_text_bytes(bytes: &[u8]) -> String {
let obj = lopdf::Object::String(bytes.to_vec(), lopdf::StringFormat::Literal);
lopdf::decode_text_string(&obj).unwrap_or_else(|_| String::from_utf8_lossy(bytes).into_owned())
}
pub(crate) fn decode_name_bytes(bytes: &[u8]) -> String {
match std::str::from_utf8(bytes) {
Ok(s) => s.to_owned(),
Err(_) => bytes.iter().map(|&b| b as char).collect(),
}
}
pub(crate) fn encode_winansi(text: &str) -> Option<Vec<u8>> {
let mut out = Vec::with_capacity(text.len());
for ch in text.chars() {
out.push(winansi_byte(ch)?);
}
Some(out)
}
fn winansi_byte(ch: char) -> Option<u8> {
let cp = ch as u32;
match cp {
0x00..=0x7F => Some(cp as u8),
0xA0..=0xFF => Some(cp as u8),
0x20AC => Some(0x80), 0x201A => Some(0x82), 0x0192 => Some(0x83), 0x201E => Some(0x84), 0x2026 => Some(0x85), 0x2020 => Some(0x86), 0x2021 => Some(0x87), 0x02C6 => Some(0x88), 0x2030 => Some(0x89), 0x0160 => Some(0x8A), 0x2039 => Some(0x8B), 0x0152 => Some(0x8C), 0x017D => Some(0x8E), 0x2018 => Some(0x91), 0x2019 => Some(0x92), 0x201C => Some(0x93), 0x201D => Some(0x94), 0x2022 => Some(0x95), 0x2013 => Some(0x96), 0x2014 => Some(0x97), 0x02DC => Some(0x98), 0x2122 => Some(0x99), 0x0161 => Some(0x9A), 0x203A => Some(0x9B), 0x0153 => Some(0x9C), 0x017E => Some(0x9E), 0x0178 => Some(0x9F), _ => None,
}
}
pub(crate) fn escape_string_bytes(bytes: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(bytes.len() + 4);
for &b in bytes {
match b {
b'(' | b')' | b'\\' => {
out.push(b'\\');
out.push(b);
}
b'\r' => out.extend_from_slice(b"\\r"),
b'\n' => out.extend_from_slice(b"\\n"),
_ => out.push(b),
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ascii_passthrough() {
assert_eq!(encode_winansi("Hello").unwrap(), b"Hello");
}
#[test]
fn latin1_and_euro() {
assert_eq!(
encode_winansi("Café Zürich € ñ").unwrap(),
vec![
b'C', b'a', b'f', 0xE9, b' ', b'Z', 0xFC, b'r', b'i', b'c', b'h', b' ', 0x80, b' ',
0xF1
]
);
}
#[test]
fn windows_1252_block() {
assert_eq!(encode_winansi("\u{2019}").unwrap(), vec![0x92]); assert_eq!(encode_winansi("\u{2013}").unwrap(), vec![0x96]); assert_eq!(encode_winansi("\u{0153}").unwrap(), vec![0x9C]); }
#[test]
fn unmappable_returns_none() {
assert!(encode_winansi("Привет").is_none()); assert!(encode_winansi("日本語").is_none()); assert!(encode_winansi("a\u{0101}b").is_none()); }
#[test]
fn escape_metachars_after_encoding() {
assert_eq!(escape_string_bytes(b"a(b)c\\"), b"a\\(b\\)c\\\\".to_vec());
assert_eq!(escape_string_bytes(&[0xE9]), vec![0xE9]);
}
}