base256u
Just a simple Rust crate to map between bytes and unicode glyphs. Includes
reference printable-ascii-preserved Unicode (papu) encoder and decoder
functions. The papu encoding will preserve all text that is already only
printable ascii characters and all the other bytes map to single-codepoint
non-combining printable glyphs, skipping odd things like NBSP and SHY.
You can find the documentation in the usual place.
Using this crate is as simple as use base256u::{Decode, Encode}; and then
calling the base256u() method or base256u_papu() to get the default papu
encoding.
use crate::{Decode, Encode};
#[test]
fn encoding() {
let encoded: String = (u8::MIN..=u8::MAX).base256u_papu().collect();
assert_eq!(encoded, "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ");
let encoded: String = b"Pack my box with five dozen liquor jugs."
.into_iter()
.copied()
.base256u_papu()
.collect();
assert_eq!(encoded, "Pack my box with five dozen liquor jugs.");
}
#[test]
fn decoding() {
let decoded: Vec<Option<u8>> = "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƝʼn".chars().base256u_papu().collect();
let mut matcher: Vec<Option<u8>> = (u8::MIN..=u8::MAX).map(|b| Some(b)).collect();
matcher.push(None);
matcher.push(None);
assert_eq!(decoded, matcher);
let decoded: Vec<u8> = "Pack my box with five dozen liquor jugs."
.chars()
.base256u_papu()
.map(|c| c.unwrap())
.collect();
assert_eq!(
String::from_utf8(decoded).unwrap(),
"Pack my box with five dozen liquor jugs."
);
}