#![doc = include_str!("../README.md")]
pub const BITS_PER_PAIR: usize = 6;
pub const NUM_CONSONANTS: usize = 16;
pub const NUM_VOWELS: usize = 4;
pub const DEFAULT_CONSONANTS: &[u8; NUM_CONSONANTS] = b"bcdfghjklmnprstv";
pub const DEFAULT_VOWELS: &[u8; NUM_VOWELS] = b"aiou";
pub const DEFAULT_GROUP_SIZE: usize = 3;
pub const DEFAULT_SEPARATOR: char = '-';
pub const MAX_GROUP_SIZE: usize = 7;
const VALID_CONSONANTS: &[u8] = b"bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ";
const VALID_VOWELS: &[u8] = b"aeiouAEIOU";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AlphabetError {
InvalidConsonant( usize, u8),
InvalidVowel( usize, u8),
DuplicateLetter( u8),
}
impl std::fmt::Display for AlphabetError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidConsonant(i, v) => write!(f, "AlphabetError::InvalidConsonant(pos = {i}, hex = {v:#04x})"),
Self::InvalidVowel(i, v) => write!(f, "AlphabetError::InvalidVowel(pos = {i}, hex = {v:#04x})"),
Self::DuplicateLetter(ch) => write!(f, "AlphabetError::DuplicateLetter(char = {:?})", *ch as char),
}
}
}
impl std::error::Error for AlphabetError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Alphabet {
consonants: [u8; NUM_CONSONANTS],
vowels: [u8; NUM_VOWELS],
}
pub const DEFAULT_ALPHABET: Alphabet = Alphabet { consonants: *DEFAULT_CONSONANTS, vowels: *DEFAULT_VOWELS };
const fn slice_contains(haystack: &[u8], needle: u8) -> bool {
let mut i = 0;
while i < haystack.len() {
if haystack[i] == needle {
return true;
}
i += 1;
}
false
}
impl Alphabet {
pub const fn new(consonants: [u8; NUM_CONSONANTS], vowels: [u8; NUM_VOWELS]) -> Result<Self, AlphabetError> {
let alpha = Self { consonants, vowels };
match alpha.validate() {
Ok(()) => Ok(alpha),
Err(e) => Err(e),
}
}
const fn validate(&self) -> Result<(), AlphabetError> {
let mut seen = [false; 26];
let total = NUM_CONSONANTS + NUM_VOWELS;
let mut i = 0;
while i < total {
let (byte, idx, valid) = if i < NUM_CONSONANTS {
(self.consonants[i], i, VALID_CONSONANTS)
} else {
(self.vowels[i - NUM_CONSONANTS], i - NUM_CONSONANTS, VALID_VOWELS)
};
if !slice_contains(valid, byte) {
return if i < NUM_CONSONANTS {
Err(AlphabetError::InvalidConsonant(idx, byte))
} else {
Err(AlphabetError::InvalidVowel(idx, byte))
};
}
let letter = (byte.to_ascii_lowercase() - b'a') as usize;
if seen[letter] {
return Err(AlphabetError::DuplicateLetter(byte));
}
seen[letter] = true;
i += 1;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Padding {
PairEnd = 0,
GroupEnd = 1,
}
impl Padding {
const fn from_bits(bits: u8) -> Option<Self> {
match bits {
0 => Some(Self::PairEnd),
1 => Some(Self::GroupEnd),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ConfigError {
GroupSizeTooLarge(usize),
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let ConfigError::GroupSizeTooLarge(n) = self;
write!(f, "ConfigError::GroupSizeTooLarge(group_size = {n}, max = {MAX_GROUP_SIZE})")
}
}
impl std::error::Error for ConfigError {}
#[derive(Debug, Clone)]
pub struct Config {
alphabet: Alphabet,
group_size: usize,
separator: char,
padding: Padding,
}
pub const DEFAULT: Config = Config {
alphabet: DEFAULT_ALPHABET,
group_size: DEFAULT_GROUP_SIZE,
separator: DEFAULT_SEPARATOR,
padding: Padding::PairEnd,
};
impl Config {
pub fn builder() -> ConfigBuilder {
ConfigBuilder(DEFAULT)
}
}
#[derive(Debug)]
pub struct ConfigBuilder(Config);
impl ConfigBuilder {
pub fn alphabet(mut self, a: Alphabet) -> Self {
self.0.alphabet = a;
self
}
pub fn group_size(mut self, n: usize) -> Self {
self.0.group_size = n;
self
}
pub fn separator(mut self, c: char) -> Self {
self.0.separator = c;
self
}
pub fn padding(mut self, p: Padding) -> Self {
self.0.padding = p;
self
}
pub fn build(mut self) -> Result<Config, ConfigError> {
if self.0.padding == Padding::GroupEnd {
if self.0.group_size == 0 {
self.0.group_size = DEFAULT_GROUP_SIZE;
}
if self.0.group_size > MAX_GROUP_SIZE {
return Err(ConfigError::GroupSizeTooLarge(self.0.group_size));
}
}
Ok(self.0)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeError {
ChecksumMismatch,
InvalidCharacter( usize),
InvalidLength,
InvalidMetadata,
InvalidPadding,
InvalidFilling,
}
impl std::fmt::Display for DecodeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DecodeError::InvalidCharacter(pos) => write!(f, "DecodeError::InvalidCharacter(pos = {pos})"),
other => write!(f, "{other:?}"),
}
}
}
impl std::error::Error for DecodeError {}
pub fn encode(data: &[u8]) -> String {
encode_with(data, &DEFAULT)
}
pub fn decode(encoded: &str) -> Result<Vec<u8>, DecodeError> {
decode_with(encoded, &DEFAULT_ALPHABET)
}
const CRC32_TABLE: [u32; 256] = {
let mut table = [0u32; 256];
let mut i = 0u32;
while i < 256 {
let mut crc = i;
let mut j = 0;
while j < 8 {
crc = if crc & 1 != 0 { (crc >> 1) ^ 0xEDB88320 } else { crc >> 1 };
j += 1;
}
table[i as usize] = crc;
i += 1;
}
table
};
fn crc32_l12(six_bit_values: &[u8]) -> u16 {
let mut crc: u32 = 0xFFFFFFFF;
let mut buf: usize = 0;
let mut buf_len: usize = 0;
for &v in six_bit_values {
buf = (buf << 6) | (v & 0x3F) as usize;
buf_len += 6;
while buf_len >= 8 {
buf_len -= 8;
let byte = ((buf >> buf_len) & 0xFF) as u8;
crc = CRC32_TABLE[((crc ^ byte as u32) & 0xFF) as usize] ^ (crc >> 8);
}
}
if buf_len > 0 {
let byte = ((buf << (8 - buf_len)) & 0xFF) as u8;
crc = CRC32_TABLE[((crc ^ byte as u32) & 0xFF) as usize] ^ (crc >> 8);
}
(crc ^ 0xFFFFFFFF) as u16 & 0x0FFF
}
pub fn encode_with(data: &[u8], config: &Config) -> String {
let consonants = &config.alphabet.consonants;
let vowels = &config.alphabet.vowels;
let data_pair_count = (data.len() * 8).div_ceil(BITS_PER_PAIR);
let mut payload: Vec<u8> = Vec::with_capacity(1 + data_pair_count + MAX_GROUP_SIZE);
payload.push(0);
let mut bit_buf: usize = 0;
let mut buf_len: usize = 0;
for &byte in data {
bit_buf = (bit_buf << 8) | byte as usize;
buf_len += 8;
while buf_len >= BITS_PER_PAIR {
buf_len -= BITS_PER_PAIR;
payload.push(((bit_buf >> buf_len) & 0x3F) as u8);
}
}
if buf_len > 0 {
let fill = BITS_PER_PAIR - buf_len;
payload.push(((bit_buf << fill) & 0x3F) as u8);
}
let pad_count = match config.padding {
Padding::PairEnd => 0,
Padding::GroupEnd => {
let r = (3 + data_pair_count) % config.group_size;
if r == 0 { 0 } else { config.group_size - r }
}
};
let meta: u8 = ((config.padding as u8) << 4) | ((pad_count as u8) << 1);
payload[0] = meta;
if pad_count > 0 {
let base = payload.last().copied().unwrap_or(0);
for i in 1..=pad_count {
payload.push((base.wrapping_add(i as u8)) % 64);
}
}
let crc = crc32_l12(&payload);
let cksum_hi: u8 = ((crc >> 6) & 0x3F) as u8;
let cksum_lo: u8 = (crc & 0x3F) as u8;
let total_pairs = 2 + payload.len();
let sep_count = if config.group_size > 0 { (total_pairs - 1) / config.group_size } else { 0 };
let mut out = String::with_capacity(total_pairs * 2 + sep_count);
let mut push_pair = |pair_idx: usize, six_bits: u8| {
if config.group_size > 0 && pair_idx > 0 && pair_idx.is_multiple_of(config.group_size) {
out.push(config.separator);
}
out.push(consonants[(six_bits >> 2) as usize] as char);
out.push(vowels[(six_bits & 0x03) as usize] as char);
};
push_pair(0, cksum_hi);
push_pair(1, cksum_lo);
for (i, &v) in payload.iter().enumerate() {
push_pair(2 + i, v);
}
out
}
pub fn decode_with(encoded: &str, alphabet: &Alphabet) -> Result<Vec<u8>, DecodeError> {
let mut consonant_map: [Option<u8>; 128] = [None; 128];
for (i, &c) in alphabet.consonants.iter().enumerate() {
consonant_map[c as usize] = Some(i as u8);
}
let mut vowel_map: [Option<u8>; 128] = [None; 128];
for (i, &v) in alphabet.vowels.iter().enumerate() {
vowel_map[v as usize] = Some(i as u8);
}
let mut is_alpha = [false; 128];
for &c in alphabet.consonants.iter().chain(alphabet.vowels.iter()) {
is_alpha[c as usize] = true;
}
let mut six_bits: Vec<u8> = Vec::with_capacity(encoded.len() / 2);
let mut pending: Option<(u8, usize)> = None;
for (pos, ch) in encoded.char_indices() {
if (ch as usize) >= 128 || !is_alpha[ch as usize] {
continue;
}
let byte = ch as u8;
match pending.take() {
None => {
let c_val = consonant_map[byte as usize].ok_or(DecodeError::InvalidCharacter(pos))?;
pending = Some((c_val, pos));
}
Some((c_val, _c_pos)) => {
let v_val = vowel_map[byte as usize].ok_or(DecodeError::InvalidCharacter(pos))?;
six_bits.push((c_val << 2) | v_val);
}
}
}
if pending.is_some() {
return Err(DecodeError::InvalidLength);
}
let num_pairs = six_bits.len();
if num_pairs < 3 {
return Err(DecodeError::InvalidLength);
}
let stored_crc = ((six_bits[0] as u16) << 6) | (six_bits[1] as u16);
let computed_crc = crc32_l12(&six_bits[2..]);
if stored_crc != computed_crc {
return Err(DecodeError::ChecksumMismatch);
}
let meta = six_bits[2];
let mode_bits = (meta >> 4) & 0x03;
let pad_count = ((meta >> 1) & 0x07) as usize;
let reserved = meta & 0x01;
if reserved != 0 {
return Err(DecodeError::InvalidMetadata);
}
let mode = Padding::from_bits(mode_bits).ok_or(DecodeError::InvalidMetadata)?;
let data_end = match mode {
Padding::PairEnd if pad_count == 0 => num_pairs,
Padding::PairEnd => return Err(DecodeError::InvalidPadding),
Padding::GroupEnd if num_pairs >= 3 + pad_count => num_pairs - pad_count,
Padding::GroupEnd => return Err(DecodeError::InvalidPadding),
};
let data_six_bits = &six_bits[3..data_end];
let fill_bits = (data_six_bits.len() * BITS_PER_PAIR) % 8;
let num_bytes = (data_six_bits.len() * BITS_PER_PAIR) / 8;
let mut bit_buf: usize = 0;
let mut buf_len: usize = 0;
let mut result: Vec<u8> = Vec::with_capacity(num_bytes);
for &six in data_six_bits {
bit_buf = (bit_buf << BITS_PER_PAIR) | six as usize;
buf_len += BITS_PER_PAIR;
while buf_len >= 8 {
buf_len -= 8;
result.push(((bit_buf >> buf_len) & 0xFF) as u8);
}
}
if fill_bits > 0 && (bit_buf & ((1usize << fill_bits) - 1)) != 0 {
return Err(DecodeError::InvalidFilling);
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
fn no_group() -> Config {
Config::builder().group_size(0).build().unwrap()
}
const VECTORS: &[(&[u8], &str)] = &[
(b"\xAB", "tojiba-nora"),
(b"\xCA\xFE", "bafaba-roputa"),
(b"\xCA\xFE\x42", "pasiba-roputi-bo"),
(b"\x00\x00\x00", "vakaba-bababa-ba"),
(b"\xFF\xFF\xFF", "kijoba-vuvuvu-vu"),
];
#[test]
fn encode_decode() {
for &(data, expected) in VECTORS {
assert_eq!(encode(data), expected);
assert_eq!(decode(expected).unwrap(), data);
}
}
#[test]
fn realistic_key() {
const KEY: &[u8] = b"covo is consonant-vowel encoding";
const ENCODED: &str = "sifuba-jasovi-sojuro-binika-robilu-jusoti-rujuso-tiliju-mugapi-kimovi-sujiho-ralaji-hotilu-jusogi-nijumo-ka";
assert_eq!(encode(KEY), ENCODED);
assert_eq!(decode(ENCODED).unwrap(), KEY);
}
#[test]
fn custom_alphabet() {
let alpha = Alphabet::new(*b"BCDFGHJKLMNPRSTV", *b"AIOU").unwrap();
let config = Config::builder().alphabet(alpha.clone()).group_size(0).build().unwrap();
let enc = encode_with(&[0xFF, 0x00], &config);
assert_eq!(decode_with(&enc, &alpha).unwrap(), [0xFF, 0x00]);
}
#[test]
fn no_grouping() {
let enc = encode_with(&[0xCA, 0xFE, 0x42], &no_group());
assert!(!enc.contains('-'));
assert_eq!(decode_with(&enc, &DEFAULT_ALPHABET).unwrap(), [0xCA, 0xFE, 0x42]);
}
#[test]
fn decode_ignores_non_alphabet_chars() {
let enc = encode(&[0xAB]);
let mangled = enc.replace('-', " --?? ");
assert_eq!(decode(&mangled).unwrap(), [0xAB]);
}
#[test]
fn decode_swapped_consonant_vowel() {
assert_eq!(decode("aababababa"), Err(DecodeError::InvalidCharacter(0)));
assert_eq!(decode("bbbabababa"), Err(DecodeError::InvalidCharacter(1)));
}
#[test]
fn decode_odd_alphabet_chars() {
assert_eq!(decode("b"), Err(DecodeError::InvalidLength));
assert_eq!(decode("bai"), Err(DecodeError::InvalidCharacter(2)));
}
#[test]
fn decode_too_short() {
assert_eq!(decode("baba"), Err(DecodeError::InvalidLength));
assert_eq!(decode("bababa"), Err(DecodeError::ChecksumMismatch));
}
#[test]
fn crc_mismatch_detection() {
let enc = encode(&[0xCA, 0xFE, 0x42]);
let mut corrupted = enc.clone();
let len = corrupted.len();
corrupted.replace_range(len - 2..len, "vu");
assert_eq!(decode(&corrupted), Err(DecodeError::ChecksumMismatch));
}
#[test]
fn crc_catches_checksum_corruption() {
let enc = encode(&[0xCA, 0xFE, 0x42]);
let mut corrupted = String::from("ba");
corrupted.push_str(&enc[2..]);
assert_eq!(decode(&corrupted), Err(DecodeError::ChecksumMismatch));
}
fn group_end() -> Config {
Config::builder().padding(Padding::GroupEnd).build().unwrap()
}
#[test]
fn group_end_one_byte() {
let enc = encode_with(&[0xAB], &group_end());
assert_eq!(enc, "dujugo-norari");
assert_eq!(decode(&enc).unwrap(), [0xAB]);
}
#[test]
fn group_end_three_bytes() {
let enc = encode_with(&[0xCA, 0xFE, 0x42], &group_end());
assert_eq!(enc, "nudaha-roputi-bobuca");
assert_eq!(decode(&enc).unwrap(), [0xCA, 0xFE, 0x42]);
}
#[test]
fn group_end_all_zeros() {
let enc = encode_with(&[0x00, 0x00, 0x00], &group_end());
assert_eq!(enc, "lamiha-bababa-babibo");
assert_eq!(decode(&enc).unwrap(), [0x00, 0x00, 0x00]);
}
#[test]
fn group_end_all_ones() {
let enc = encode_with(&[0xFF, 0xFF, 0xFF], &group_end());
assert_eq!(enc, "coluha-vuvuvu-vubabi");
assert_eq!(decode(&enc).unwrap(), [0xFF, 0xFF, 0xFF]);
}
#[test]
fn group_end_32_byte_key() {
const KEY: &[u8] = b"covo is consonant-vowel encoding";
let enc = encode_with(KEY, &group_end());
assert_eq!(
enc,
"vikuha-jasovi-sojuro-binika-robilu-jusoti-rujuso-tiliju-mugapi-kimovi-sujiho-ralaji-hotilu-jusogi-nijumo-kakiko"
);
assert_eq!(decode(&enc).unwrap(), KEY);
}
#[test]
fn group_end_crc_mismatch() {
let enc = encode_with(&[0xCA, 0xFE, 0x42], &group_end());
let mut corrupted = enc.clone();
let len = corrupted.len();
corrupted.replace_range(len - 2..len, "vu");
assert_eq!(decode(&corrupted), Err(DecodeError::ChecksumMismatch));
}
#[test]
fn decode_is_self_describing() {
let data = b"self-describing";
let enc = encode_with(data, &group_end());
assert_eq!(decode(&enc).unwrap(), data);
let enc2 = encode(data);
assert_eq!(decode(&enc2).unwrap(), data);
}
#[test]
fn round_trip_various_lengths() {
for len in 1..=40 {
let data: Vec<u8> = (0..len).map(|i| i as u8).collect();
let enc = encode(&data);
assert_eq!(decode(&enc).unwrap(), data, "PairEnd round-trip failed for len={}", len);
let enc_g = encode_with(&data, &group_end());
assert_eq!(decode(&enc_g).unwrap(), data, "GroupEnd round-trip failed for len={}", len);
}
}
#[test]
fn single_bit_corruption_detected() {
let data = b"hello world";
let enc = encode(data);
let raw: String = enc.chars().filter(|&c| c != '-').collect();
assert!(raw.len().is_multiple_of(2));
for i in (0..raw.len()).step_by(2) {
let mut chars: Vec<u8> = raw.bytes().collect();
let orig = chars[i];
chars[i] = if orig == b'b' { b'c' } else { b'b' };
let corrupted = String::from_utf8(chars).unwrap();
if corrupted != raw {
assert!(decode(&corrupted).is_err(), "Corruption at pair {} not detected", i / 2);
}
}
}
#[cfg(feature = "__proptest")]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn round_trip_pair_end(data in proptest::collection::vec(any::<u8>(), 1..=4096)) {
let encoded = encode(&data);
let decoded = decode(&encoded).unwrap();
prop_assert_eq!(decoded, data);
}
#[test]
fn round_trip_group_end(
data in proptest::collection::vec(any::<u8>(), 1..=4096),
group_size in 1..=MAX_GROUP_SIZE,
) {
let config = Config::builder()
.padding(Padding::GroupEnd)
.group_size(group_size)
.build()
.unwrap();
let encoded = encode_with(&data, &config);
let decoded = decode(&encoded).unwrap();
prop_assert_eq!(decoded, data);
}
#[test]
fn decode_never_panics(s in "\\PC*") {
let _ = decode(&s);
}
#[test]
fn single_char_corruption_detected(data in proptest::collection::vec(any::<u8>(), 1..=64)) {
let encoded = encode(&data);
let raw: Vec<u8> = encoded.bytes().filter(|&b| b != b'-').collect();
for i in (0..raw.len()).step_by(2) {
let mut corrupted = raw.clone();
let orig = corrupted[i];
corrupted[i] = if orig == b'b' { b'c' } else { b'b' };
let s = String::from_utf8(corrupted).unwrap();
if s != String::from_utf8(raw.clone()).unwrap() {
prop_assert!(decode(&s).is_err(), "Corruption at pair {} not detected", i / 2);
}
}
}
#[test]
fn multi_char_corruption_detected(
data in proptest::collection::vec(any::<u8>(), 1..=256),
corrupt_count in 2..=6usize,
) {
let encoded = encode(&data);
let raw: Vec<u8> = encoded.bytes().filter(|&b| b != b'-').collect();
let pair_count = raw.len() / 2;
if pair_count < corrupt_count {
return Ok(());
}
let mut corrupted = raw.clone();
for k in 0..corrupt_count {
let pair_idx = k * pair_count / corrupt_count;
let i = pair_idx * 2;
let orig = corrupted[i];
corrupted[i] = if orig == b'b' { b'c' } else { b'b' };
}
let s = String::from_utf8(corrupted).unwrap();
if s != String::from_utf8(raw.clone()).unwrap() {
prop_assert!(decode(&s).is_err(), "Multi-corruption ({corrupt_count} chars) not detected");
}
}
}
}
}