base256u 1.0.0

Simple mapping between bytes and Unicode codepoints
Documentation

base256u

Just a simple Rust crate to map between bytes and unicode glyphs. Includes reference printable-ascii-preserved Unicode (papu) encoder and decoder functions. The papu encoding will preserve all text that is already only printable ascii characters and all the other bytes map to single-codepoint non-combining printable glyphs, skipping odd things like NBSP and SHY.

You can find the documentation in the usual place.

Using this crate is as simple as use base256u::{Decode, Encode}; and then calling the base256u() method or base256u_papu() to get the default papu encoding.

use crate::{Decode, Encode};

#[test]
fn encoding() {
    let encoded: String = (u8::MIN..=u8::MAX).base256u_papu().collect();
    assert_eq!(encoded, "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ");
    let encoded: String = b"Pack my box with five dozen liquor jugs."
        .into_iter()
        .copied()
        .base256u_papu()
        .collect();
    assert_eq!(encoded, "Pack my box with five dozen liquor jugs.");
}

#[test]
fn decoding() {
    let decoded: Vec<Option<u8>> = "°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~§ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇň¤ŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƝʼn".chars().base256u_papu().collect();
    let mut matcher: Vec<Option<u8>> = (u8::MIN..=u8::MAX).map(|b| Some(b)).collect();
    matcher.push(None);
    matcher.push(None);
    assert_eq!(decoded, matcher);
    let decoded: Vec<u8> = "Pack my box with five dozen liquor jugs."
        .chars()
        .base256u_papu()
        .map(|c| c.unwrap())
        .collect();
    assert_eq!(
        String::from_utf8(decoded).unwrap(),
        "Pack my box with five dozen liquor jugs."
    );
}