pub static TRANSPOSE_MAP_LATIN: &[u8] = &[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 0, 0, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 0, 0, 0, 0, 39, 0, 0, 40, 0, 41, 0, 42, 0, 43, 0, 0, 0, 0, 0, 0, 44, 0, 45, 0, 46, 0, 0, 0, 0, 0, 47, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 0, 0, 0, 0, 0, 53, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 56, 0, 57, 0, ];
pub static TRANSPOSE_MAP_FA: &[u8] = &[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 0, 0, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, ];
pub static PAIRS_RU: &[u16] = &[
0x010c, 0x010e, 0x0113, 0x0301, 0x030f, 0x060e, 0x0611, 0x0903, 0x0b01, 0x0b0f, 0x0c01, 0x0c09, 0x0e01, 0x0e06, 0x0e09, 0x0e0e, 0x0e0f, 0x0e1c, 0x0f03, 0x0f11, 0x0f12, 0x100f, 0x1011, 0x1101, 0x1106, 0x1109, 0x110f, 0x1213, 0x1220, 0x7fff, ];
#[derive(Clone, Debug)]
pub struct TransposeConfig {
pub transpose_min: u32,
pub transpose_max: u32,
pub transpose_map: Option<&'static [u8]>,
pub frequent_pairs: Option<&'static [u16]>,
}
impl TransposeConfig {
pub const LATIN: Self = TransposeConfig {
transpose_min: 0x60,
transpose_max: 0x17f,
transpose_map: Some(TRANSPOSE_MAP_LATIN),
frequent_pairs: None,
};
pub const CYRILLIC: Self = TransposeConfig {
transpose_min: 0x430,
transpose_max: 0x451,
transpose_map: None, frequent_pairs: Some(PAIRS_RU),
};
pub const ARABIC: Self = TransposeConfig {
transpose_min: 0x600,
transpose_max: 0x65f,
transpose_map: None, frequent_pairs: None,
};
pub const PERSIAN: Self = TransposeConfig {
transpose_min: 0x620,
transpose_max: 0x6cc,
transpose_map: Some(TRANSPOSE_MAP_FA),
frequent_pairs: None,
};
pub const NONE: Self = TransposeConfig {
transpose_min: 0,
transpose_max: 0,
transpose_map: None,
frequent_pairs: None,
};
pub fn is_active(&self) -> bool { self.transpose_min > 0 }
}
#[derive(Clone, Debug)]
pub struct TransposeResult {
pub bytes: Vec<u8>,
pub wlen: u8,
}
impl TransposeResult {
pub fn is_compressed(&self) -> bool { self.wlen & 0x40 != 0 }
pub fn byte_len(&self) -> u8 { self.wlen & 0x3f }
}
pub fn transpose_alphabet(word: &str, cfg: &TransposeConfig) -> TransposeResult {
if !cfg.is_active() {
let bytes = word.as_bytes().to_vec();
let wlen = bytes.len() as u8;
return TransposeResult { bytes, wlen };
}
let pairs_start = cfg.transpose_max - cfg.transpose_min + 2;
let mut codes: Vec<u32> = Vec::with_capacity(word.len());
let mut all_alpha = true;
for c in word.chars() {
let cp = c as u32;
if cp >= cfg.transpose_min && cp <= cfg.transpose_max {
let code = match cfg.transpose_map {
None => {
cp - cfg.transpose_min + 1
}
Some(map) => {
let idx = (cp - cfg.transpose_min) as usize;
if idx < map.len() { map[idx] as u32 } else { 0 }
}
};
if code > 0 {
codes.push(code);
continue;
}
}
all_alpha = false;
break;
}
if !all_alpha {
let bytes = word.as_bytes().to_vec();
let wlen = bytes.len() as u8;
return TransposeResult { bytes, wlen };
}
if let Some(pairs_list) = cfg.frequent_pairs {
let mut i = 0;
let mut merged: Vec<u32> = Vec::with_capacity(codes.len());
while i < codes.len() {
if i + 1 < codes.len() {
let c2 = codes[i] | (codes[i + 1] << 8);
let mut found = false;
for (ix, &pair) in pairs_list.iter().enumerate() {
if pair == 0x7fff || pair == 0 { break; }
if c2 as u16 == pair {
merged.push(ix as u32 + pairs_start);
i += 2;
found = true;
break;
}
if c2 < pair as u32 { break; }
}
if !found {
merged.push(codes[i]);
i += 1;
}
} else {
merged.push(codes[i]);
i += 1;
}
}
codes = merged;
}
let mut out: Vec<u8> = Vec::with_capacity((codes.len() * 6 + 7) / 8);
let mut acc: u32 = 0;
let mut bits: u32 = 0;
for c in &codes {
acc = (acc << 6) | (c & 0x3f);
bits += 6;
if bits >= 8 {
bits -= 8;
out.push(((acc >> bits) & 0xff) as u8);
}
}
if bits > 0 {
out.push(((acc << (8 - bits)) & 0xff) as u8);
}
let byte_count = out.len() as u8;
let wlen = byte_count | 0x40; TransposeResult { bytes: out, wlen }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn transpose_a() {
let r = transpose_alphabet("a", &TransposeConfig::LATIN);
assert!(r.is_compressed(), "'a' should compress");
assert_eq!(r.bytes, &[0x04]);
assert_eq!(r.byte_len(), 1);
}
#[test]
fn transpose_the() {
let r = transpose_alphabet("the", &TransposeConfig::LATIN);
assert!(r.is_compressed());
assert_eq!(r.bytes, &[0x50, 0x81, 0x40]);
assert_eq!(r.byte_len(), 3);
assert_eq!(r.wlen, 0x43);
}
#[test]
fn transpose_hello() {
let r = transpose_alphabet("hello", &TransposeConfig::LATIN);
assert!(r.is_compressed());
assert_eq!(r.byte_len(), 4);
}
#[test]
fn transpose_no_compress() {
let r = transpose_alphabet("abc123", &TransposeConfig::LATIN);
assert!(!r.is_compressed());
assert_eq!(r.bytes, b"abc123");
}
#[test]
fn transpose_none_config() {
let r = transpose_alphabet("hello", &TransposeConfig::NONE);
assert!(!r.is_compressed());
assert_eq!(r.bytes, b"hello");
}
#[test]
fn hash_compressed_the() {
use crate::dictionary::lookup::hash_word;
let r = transpose_alphabet("the", &TransposeConfig::LATIN);
let h = hash_word(&r.bytes);
assert_eq!(h, 75, "hash of compressed 'the' should be 75");
}
#[test]
fn hash_compressed_a() {
use crate::dictionary::lookup::hash_word;
let r = transpose_alphabet("a", &TransposeConfig::LATIN);
let h = hash_word(&r.bytes);
assert_eq!(h, 5, "hash of compressed 'a' should be 5");
}
#[test]
fn cyrillic_basic_mapping() {
let r = transpose_alphabet("а", &TransposeConfig::CYRILLIC);
assert!(r.is_compressed(), "Cyrillic 'а' should compress");
assert_eq!(r.bytes, &[0x04]);
}
#[test]
fn cyrillic_privet() {
let r = transpose_alphabet("привет", &TransposeConfig::CYRILLIC);
assert!(r.is_compressed());
assert_eq!(r.byte_len(), 5);
}
}