#![warn(missing_docs)]
use std::borrow::Cow;
use std::cell::Cell;
use std::{fmt, ops, str};
mod base64;
mod hex;
pub use base64::FormatBase64;
pub use hex::{DEFAULT_HEX, FormatHex};
use std::fmt::Write;
#[derive(Clone, Debug)]
enum MaybeOwned<'a, T: 'a> {
Borrowed(&'a T),
Owned(T),
}
impl<'a, T: 'a> ops::Deref for MaybeOwned<'a, T> {
type Target = T;
fn deref(&self) -> &T {
match *self {
MaybeOwned::Borrowed(b) => b,
MaybeOwned::Owned(ref o) => o,
}
}
}
impl<'a, T: 'a> From<&'a T> for MaybeOwned<'a, T> {
fn from(refr: &'a T) -> Self { MaybeOwned::Borrowed(refr) }
}
impl<'a, T: 'a> From<T> for MaybeOwned<'a, T> {
fn from(owned: T) -> Self { MaybeOwned::Owned(owned) }
}
pub const HEX_ASCII: DisplayBytesConfig<'static, FormatHex<'static>> = DisplayBytesConfig {
delim: [" {{ ", " }} "],
ascii_only: true,
min_str_len: 6,
print_terms: true,
invert_delims: false,
escape_ctl: false,
byte_format: DEFAULT_HEX,
};
pub const HEX_UTF8: DisplayBytesConfig<'static, FormatHex<'static>> = DisplayBytesConfig {
delim: [" {{ ", " }} "],
ascii_only: false,
min_str_len: 6,
print_terms: true,
invert_delims: false,
escape_ctl: false,
byte_format: DEFAULT_HEX
};
pub const BASE64_ASCII: DisplayBytesConfig<'static, FormatBase64> = DisplayBytesConfig {
delim: [" {{ ", " }} "],
ascii_only: true,
min_str_len: 6,
print_terms: true,
invert_delims: false,
escape_ctl: false,
byte_format: FormatBase64,
};
pub const BASE64_UTF8: DisplayBytesConfig<'static, FormatBase64> = DisplayBytesConfig {
delim: [" {{ ", " }} "],
ascii_only: false,
min_str_len: 6,
print_terms: true,
invert_delims: false,
escape_ctl: false,
byte_format: FormatBase64,
};
#[derive(Clone, Debug)]
pub struct DisplayBytesConfig<'d, F> {
delim: [&'d str; 2],
ascii_only: bool,
min_str_len: usize,
print_terms: bool,
invert_delims: bool,
escape_ctl: bool,
byte_format: F
}
impl Default for DisplayBytesConfig<'static, FormatHex<'static>> {
fn default() -> Self {
HEX_UTF8.clone()
}
}
impl Default for DisplayBytesConfig<'static, FormatBase64> {
fn default() -> Self { BASE64_UTF8.clone() }
}
impl<'d, F> DisplayBytesConfig<'d, F> {
pub fn byte_format<F_: ByteFormat>(self, format: F_) -> DisplayBytesConfig<'d, F_> {
DisplayBytesConfig {
delim: self.delim,
ascii_only: self.ascii_only,
min_str_len: self.min_str_len,
print_terms: self.print_terms,
invert_delims: self.invert_delims,
escape_ctl: self.escape_ctl,
byte_format: format,
}
}
pub fn byte_format_mut(&mut self) -> &mut F {
&mut self.byte_format
}
pub fn delimiters<'d_>(self, delimiters: [&'d_ str; 2]) -> DisplayBytesConfig<'d_, F> {
DisplayBytesConfig {
delim: delimiters,
ascii_only: self.ascii_only,
min_str_len: self.min_str_len,
print_terms: self.print_terms,
invert_delims: self.invert_delims,
escape_ctl: self.escape_ctl,
byte_format: self.byte_format
}
}
pub fn delimiters_mut(&mut self) -> &mut [&'d str; 2] {
&mut self.delim
}
pub fn ascii_only(self, ascii_only: bool) -> Self {
DisplayBytesConfig { ascii_only, ..self }
}
pub fn min_str_len(self, min_str_len: usize) -> Self {
DisplayBytesConfig { min_str_len, ..self }
}
pub fn print_terminators(self, print_terminators: bool) -> Self {
DisplayBytesConfig{ print_terms: print_terminators, .. self }
}
pub fn escape_control(self, escape_ctl: bool) -> Self {
DisplayBytesConfig{ escape_ctl, .. self }
}
pub fn invert_delimiters(self, invert_delimiters: bool) -> Self {
DisplayBytesConfig { invert_delims: invert_delimiters, .. self }
}
}
impl<'d, F: ByteFormat> DisplayBytesConfig<'d, F> {
fn valid_subseq<'b>(&self, bytes: &'b [u8]) -> Option<(&'b str, &'b [u8])> {
match self.try_convert(bytes) {
Ok(all_good) => Some((all_good, &[])),
Err(valid_end) if valid_end > 0 =>
Some((assume_utf8(&bytes[..valid_end]), &bytes[valid_end..])),
_ => None,
}
}
fn try_convert<'b>(&self, bytes: &'b [u8]) -> Result<&'b str, usize> {
if self.ascii_only {
if bytes.is_ascii() {
Ok(assume_utf8(bytes))
} else {
Err(bytes.iter().position(|b| !b.is_ascii()).unwrap_or(0))
}
} else {
str::from_utf8(bytes).map_err(|e| e.valid_up_to())
}
}
fn next_valid_idx(&self, bytes: &[u8]) -> Option<usize> {
if self.ascii_only {
bytes.iter().position(u8::is_ascii)
} else {
next_valid_idx(bytes)
}
}
fn next_valid_subseq<'b>(&self, bytes: &'b [u8]) -> Option<(&'b [u8], &'b str, &'b [u8])> {
let mut start = 0;
while let Some(next_valid) = self.next_valid_idx(&bytes[start..]) {
start += next_valid;
if let Some((valid, after)) = self.valid_subseq(&bytes[start..]) {
if valid.len() >= self.min_str_len || (after.is_empty() && self.print_terms) {
return Some((&bytes[..start], valid, after));
}
}
start += 1;
}
None
}
pub fn display_bytes_string<'b>(&self, bytes: &'b [u8]) -> Cow<'b, str> where 'd: 'b, F: 'b {
match self.try_convert(bytes) {
Ok(s) => s.into(),
Err(valid_end) => DisplayBytes {
bytes, config: self.into(), valid_end: Some(valid_end).into(),
}.to_string().into(),
}
}
pub fn ref_display_bytes<'b>(&'b self, bytes: &'b [u8]) -> DisplayBytes<'b, F> {
DisplayBytes {
bytes,
valid_end: Cell::new(None),
config: self.into(),
}
}
}
impl<'d, F: ByteFormat> DisplayBytesConfig<'d, F> {
pub fn display_bytes<'b>(self, bytes: &'b [u8]) -> DisplayBytes<'b, F> where 'd: 'b {
DisplayBytes {
bytes,
valid_end: Cell::new(None),
config: self.into(),
}
}
}
pub trait ByteFormat {
fn fmt_bytes(&self, bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Result;
fn bytes_to_string(&self, bytes: &[u8]) -> String {
struct DisplayAdapter<'a, F: ?Sized + 'a>(&'a [u8], &'a F);
impl<'a, F: ByteFormat + ?Sized + 'a> fmt::Display for DisplayAdapter<'a, F> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.1.fmt_bytes(self.0, f)
}
}
format!("{}", DisplayAdapter(bytes, self))
}
}
fn next_valid_idx(bytes: &[u8]) -> Option<usize> {
if bytes.len() < 4 {
(0 .. bytes.len()).position(|start| starts_valid(&bytes[start ..]))
} else {
bytes.windows(4).position(starts_valid)
.or_else(|| next_valid_idx(&bytes[bytes.len() - 3 ..]))
}
}
fn starts_valid(bytes: &[u8]) -> bool {
match str::from_utf8(bytes) {
Ok(_) => true,
Err(e) => e.valid_up_to() > 0,
}
}
pub fn display_bytes_string(bytes: &[u8]) -> Cow<str> {
HEX_UTF8.display_bytes_string(bytes)
}
pub fn display_bytes<'b>(bytes: &'b [u8]) -> impl fmt::Display + 'b {
HEX_ASCII.display_bytes(bytes)
}
#[derive(Debug)]
pub struct DisplayBytes<'b, F: 'b> {
bytes: &'b [u8],
valid_end: Cell<Option<usize>>,
config: MaybeOwned<'b, DisplayBytesConfig<'b, F>>,
}
impl<'b, F> DisplayBytes<'b, F> {
fn maybe_escape(&self, str: &str, f: &mut fmt::Formatter) -> fmt::Result {
if self.config.escape_ctl {
for c in str.chars() {
if c.is_ascii_control() {
for c in c.escape_default() {
f.write_char(c)?;
}
} else {
f.write_char(c)?;
}
}
Ok(())
} else {
f.write_str(str)
}
}
}
impl<'b, F: ByteFormat + 'b> fmt::Display for DisplayBytes<'b, F> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let maybe_valid = self.valid_end.get()
.map(|valid_end| {
let (valid, rest) = self.bytes.split_at(valid_end);
(assume_utf8(valid), rest)
})
.or_else(|| self.config.valid_subseq(self.bytes));
let accept_start = |s: &str| self.config.print_terms ||
s.len() >= self.config.min_str_len;
let mut rem = match maybe_valid {
Some((valid, rem)) if accept_start(valid) => {
self.valid_end.set(Some(valid.len()));
if self.config.invert_delims {
f.write_str(self.config.delim[0])?;
}
self.maybe_escape(valid, f)?;
if self.config.invert_delims {
f.write_str(self.config.delim[1])?;
}
rem
},
_ => {
self.valid_end.set(Some(0));
self.bytes
}
};
while let Some((before, valid, after)) = self.config.next_valid_subseq(rem) {
if !self.config.invert_delims {
f.write_str(self.config.delim[0])?;
self.config.byte_format.fmt_bytes(before, f)?;
f.write_str(self.config.delim[1])?;
self.maybe_escape(valid, f)?;
} else {
self.config.byte_format.fmt_bytes(before, f)?;
f.write_str(self.config.delim[0])?;
self.maybe_escape(valid, f)?;
f.write_str(self.config.delim[1])?;
}
rem = after;
}
if !rem.is_empty() {
if !self.config.invert_delims {
f.write_str(self.config.delim[0])?;
}
self.config.byte_format.fmt_bytes(rem, f)?;
if !self.config.invert_delims {
f.write_str(self.config.delim[1])?;
}
}
Ok(())
}
}
fn assume_utf8(bytes: &[u8]) -> &str {
if cfg!(debug) {
str::from_utf8(bytes).unwrap_or_else(|e|
panic!("{}; lossy conversion: {}", e, String::from_utf8_lossy(bytes))
)
} else {
unsafe { str::from_utf8_unchecked(bytes) }
}
}
#[test]
fn basic_test() {
let format = &HEX_UTF8;
assert_eq!(format.display_bytes_string(b"Hello, world!"), "Hello, world!");
assert_eq!(format.display_bytes_string(b"Hello,\xAB\xCD\xEF"), "Hello, {{ AB CD EF }} ");
assert_eq!(format.display_bytes_string(b"\xF0o\xBAr"), " {{ F0 6F BA }} r");
assert_eq!(format.display_bytes_string(b"\xF0o\xBAr foobar\xAB\xCD\xEF"),
" {{ F0 6F BA }} r foobar {{ AB CD EF }} ");
}
#[test]
fn test_memoization() {
let display = HEX_UTF8.display_bytes(b"Hello,\xAB\xCD\xEF");
assert_eq!(display.to_string(), "Hello, {{ AB CD EF }} ");
assert_eq!(display.to_string(), "Hello, {{ AB CD EF }} ");
}
#[test]
fn test_print_terminators() {
let bytes = b"ab\xCD \xEFgh";
let display = HEX_UTF8.display_bytes(bytes);
let config = HEX_UTF8.clone().print_terminators(false);
let display2 = config.display_bytes(bytes);
assert_eq!(display.to_string(), "ab {{ CD 20 EF }} gh");
assert_eq!(display2.to_string(), " {{ 61 62 CD 20 EF 67 68 }} ");
}
#[test]
fn test_invert_delims() {
let bytes = b"\x80\x90Hello, world!\xAB\xCD";
let config = HEX_UTF8.clone().invert_delimiters(true);
let display = config.display_bytes(bytes);
let display2 = HEX_UTF8.display_bytes(bytes);
assert_eq!(display.to_string(), "80 90 {{ Hello, world! }} AB CD");
assert_eq!(display2.to_string(), " {{ 80 90 }} Hello, world! {{ AB CD }} ")
}