#![allow(dead_code)]
pub(crate) const STATE_UPPER: u8 = 0;
pub(crate) const STATE_LOWER: u8 = 1;
pub(crate) const STATE_MIXED: u8 = 2;
pub(crate) const STATE_PUNCT: u8 = 3;
pub(crate) const STATE_DIGIT: u8 = 4;
pub(crate) const STATE_BYTE: u8 = 5;
pub(crate) const LATCH_UPPER: i32 = -2;
pub(crate) const LATCH_LOWER: i32 = -3;
pub(crate) const LATCH_MIXED: i32 = -4;
pub(crate) const LATCH_PUNCT: i32 = -5;
pub(crate) const LATCH_DIGIT: i32 = -6;
pub(crate) const SHIFT_UPPER: i32 = -7;
pub(crate) const SHIFT_PUNCT: i32 = -8;
pub(crate) const SHIFT_BYTE: i32 = -9;
pub(crate) const FLG_NEXT: i32 = -10;
pub(crate) const PAIR_2: i32 = -11; pub(crate) const PAIR_3: i32 = -12; pub(crate) const PAIR_4: i32 = -13; pub(crate) const PAIR_5: i32 = -14;
pub(crate) const CHAR_SIZES: [u8; 6] = [5, 5, 5, 5, 4, 8];
pub(crate) const LATCH_LEN: [[u8; 6]; 6] = [
[0, 5, 5, 10, 5, 10],
[9, 0, 5, 10, 5, 10],
[5, 5, 0, 5, 10, 10],
[5, 10, 10, 0, 10, 15],
[4, 9, 9, 14, 0, 14],
[0, 0, 0, 0, 0, 0],
];
pub(crate) const SHIFT_LEN: [[u16; 5]; 5] = [
[u16::MAX, u16::MAX, u16::MAX, 5, u16::MAX],
[5, u16::MAX, u16::MAX, 5, u16::MAX],
[u16::MAX, u16::MAX, u16::MAX, 5, u16::MAX],
[u16::MAX, u16::MAX, u16::MAX, u16::MAX, u16::MAX],
[4, u16::MAX, u16::MAX, 4, u16::MAX],
];
#[derive(Debug, Clone, Copy)]
pub(crate) struct AztecMetrics {
pub format: &'static str,
pub layers: u8,
pub has_data: u8,
pub ncws: u16,
pub bps: u8,
}
pub(crate) const METRICS: [AztecMetrics; 37] = [
AztecMetrics {
format: "rune",
layers: 0,
has_data: 0,
ncws: 0,
bps: 6,
},
AztecMetrics {
format: "compact",
layers: 1,
has_data: 1,
ncws: 17,
bps: 6,
},
AztecMetrics {
format: "full",
layers: 1,
has_data: 1,
ncws: 21,
bps: 6,
},
AztecMetrics {
format: "compact",
layers: 2,
has_data: 0,
ncws: 40,
bps: 6,
},
AztecMetrics {
format: "full",
layers: 2,
has_data: 1,
ncws: 48,
bps: 6,
},
AztecMetrics {
format: "compact",
layers: 3,
has_data: 0,
ncws: 51,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 3,
has_data: 1,
ncws: 60,
bps: 8,
},
AztecMetrics {
format: "compact",
layers: 4,
has_data: 0,
ncws: 76,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 4,
has_data: 1,
ncws: 88,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 5,
has_data: 1,
ncws: 120,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 6,
has_data: 1,
ncws: 156,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 7,
has_data: 1,
ncws: 196,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 8,
has_data: 1,
ncws: 240,
bps: 8,
},
AztecMetrics {
format: "full",
layers: 9,
has_data: 1,
ncws: 230,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 10,
has_data: 1,
ncws: 272,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 11,
has_data: 1,
ncws: 316,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 12,
has_data: 1,
ncws: 364,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 13,
has_data: 1,
ncws: 416,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 14,
has_data: 1,
ncws: 470,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 15,
has_data: 1,
ncws: 528,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 16,
has_data: 1,
ncws: 588,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 17,
has_data: 1,
ncws: 652,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 18,
has_data: 1,
ncws: 720,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 19,
has_data: 1,
ncws: 790,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 20,
has_data: 1,
ncws: 864,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 21,
has_data: 1,
ncws: 940,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 22,
has_data: 1,
ncws: 1020,
bps: 10,
},
AztecMetrics {
format: "full",
layers: 23,
has_data: 0,
ncws: 920,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 24,
has_data: 0,
ncws: 992,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 25,
has_data: 0,
ncws: 1066,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 26,
has_data: 0,
ncws: 1144,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 27,
has_data: 0,
ncws: 1224,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 28,
has_data: 0,
ncws: 1306,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 29,
has_data: 0,
ncws: 1392,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 30,
has_data: 0,
ncws: 1480,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 31,
has_data: 0,
ncws: 1570,
bps: 12,
},
AztecMetrics {
format: "full",
layers: 32,
has_data: 0,
ncws: 1664,
bps: 12,
},
];
pub(crate) const UPPER_PS: u8 = 0;
pub(crate) const UPPER_SPACE: u8 = 1;
pub(crate) const UPPER_LL: u8 = 28;
pub(crate) const UPPER_LM: u8 = 29;
pub(crate) const UPPER_LD: u8 = 30;
pub(crate) const UPPER_SB: u8 = 31;
pub(crate) fn upper_codeword(byte: u8) -> Option<u8> {
match byte {
b' ' => Some(UPPER_SPACE),
b'A'..=b'Z' => Some(byte - b'A' + 2),
_ => None,
}
}
pub(crate) fn lower_codeword(byte: u8) -> Option<u8> {
match byte {
b' ' => Some(1),
b'a'..=b'z' => Some(byte - b'a' + 2),
_ => None,
}
}
pub(crate) fn digit_codeword(byte: u8) -> Option<u8> {
match byte {
b' ' => Some(1),
b'0'..=b'9' => Some(byte - b'0' + 2),
b',' => Some(12),
b'.' => Some(13),
_ => None,
}
}
pub(crate) fn mixed_codeword(byte: u8) -> Option<u8> {
match byte {
b' ' => Some(1),
1..=13 => Some(byte + 1), 27 => Some(15),
28..=31 => Some(byte - 12), b'@' => Some(20),
b'\\' => Some(21),
b'^' => Some(22),
b'_' => Some(23),
b'`' => Some(24),
b'|' => Some(25),
b'~' => Some(26),
127 => Some(27),
_ => None,
}
}
pub(crate) fn punct_codeword(byte: u8) -> Option<u8> {
match byte {
13 => Some(1),
b'!' => Some(6),
b'"' => Some(7),
b'#' => Some(8),
b'$' => Some(9),
b'%' => Some(10),
b'&' => Some(11),
b'\'' => Some(12),
b'(' => Some(13),
b')' => Some(14),
b'*' => Some(15),
b'+' => Some(16),
b',' => Some(17),
b'-' => Some(18),
b'.' => Some(19),
b'/' => Some(20),
b':' => Some(21),
b';' => Some(22),
b'<' => Some(23),
b'=' => Some(24),
b'>' => Some(25),
b'?' => Some(26),
b'[' => Some(27),
b']' => Some(28),
b'{' => Some(29),
b'}' => Some(30),
_ => None,
}
}
pub(crate) fn encode_single_state(state: u8, bytes: &[u8]) -> Option<Vec<u8>> {
let lookup: fn(u8) -> Option<u8> = match state {
STATE_UPPER => upper_codeword,
STATE_LOWER => lower_codeword,
STATE_MIXED => mixed_codeword,
STATE_PUNCT => punct_codeword,
STATE_DIGIT => digit_codeword,
_ => return None,
};
let mut out = Vec::with_capacity(bytes.len());
for &b in bytes {
out.push(lookup(b)?);
}
Some(out)
}
pub(crate) fn pack_codewords_to_bits(codewords: &[u8], bits_per_codeword: u8) -> Vec<bool> {
let mut bits = Vec::with_capacity(codewords.len() * bits_per_codeword as usize);
for &cw in codewords {
for k in (0..bits_per_codeword).rev() {
bits.push((cw >> k) & 1 == 1);
}
}
bits
}
fn preferred_state(byte: u8) -> u8 {
match byte {
b' ' => STATE_UPPER, b'A'..=b'Z' => STATE_UPPER,
b'a'..=b'z' => STATE_LOWER,
b'0'..=b'9' | b',' | b'.' => STATE_DIGIT,
13 => STATE_PUNCT, b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b'-'
| b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'[' | b']' | b'{' | b'}' => STATE_PUNCT,
1..=12 | 14..=26 | 27..=31 | b'@' | b'\\' | b'^' | b'_' | b'`' | b'|' | b'~' | 127 => {
STATE_MIXED
}
_ => STATE_BYTE,
}
}
fn latch_codeword(from: u8, to: u8) -> Option<u8> {
match (from, to) {
(STATE_UPPER, STATE_LOWER) => Some(UPPER_LL),
(STATE_UPPER, STATE_MIXED) => Some(UPPER_LM),
(STATE_UPPER, STATE_DIGIT) => Some(UPPER_LD),
(STATE_LOWER, STATE_MIXED) => Some(29), (STATE_LOWER, STATE_DIGIT) => Some(30), (STATE_MIXED, STATE_LOWER) => Some(28),
(STATE_MIXED, STATE_UPPER) => Some(29),
(STATE_MIXED, STATE_PUNCT) => Some(30),
(STATE_PUNCT, STATE_UPPER) => Some(31),
(STATE_DIGIT, STATE_UPPER) => Some(14),
_ => None,
}
}
fn append_codeword(bits: &mut Vec<bool>, value: u8, width: u8) {
for k in (0..width).rev() {
bits.push((value >> k) & 1 == 1);
}
}
fn encode_byte_in_state(state: u8, byte: u8) -> Option<u8> {
match state {
STATE_UPPER => upper_codeword(byte),
STATE_LOWER => lower_codeword(byte),
STATE_MIXED => mixed_codeword(byte),
STATE_PUNCT => punct_codeword(byte),
STATE_DIGIT => digit_codeword(byte),
_ => None,
}
}
pub(crate) fn encode_greedy(bytes: &[u8]) -> Result<Vec<bool>, crate::error::Error> {
let mut bits = Vec::with_capacity(bytes.len() * 6);
let mut state = STATE_UPPER;
for &b in bytes {
if let Some(cw) = encode_byte_in_state(state, b) {
append_codeword(&mut bits, cw, CHAR_SIZES[state as usize]);
continue;
}
let target = preferred_state(b);
if target == STATE_BYTE {
return Err(crate::error::Error::InvalidData(format!(
"Aztec encode_greedy: byte 0x{b:02x} requires the Byte-state shift (BS); this Rust port covers Upper/Lower/Mixed/Punct/Digit but not Byte-mode shifts (use BWIPP for binary-heavy payloads)",
)));
}
let latch = latch_codeword(state, target).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec encode_greedy: no direct latch from state {state} to {target} \
— multi-step latches are outside this port's scope",
))
})?;
append_codeword(&mut bits, latch, CHAR_SIZES[state as usize]);
state = target;
let cw = encode_byte_in_state(state, b).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec encode_greedy: byte 0x{b:02x} not encodable in state {state} after latch",
))
})?;
append_codeword(&mut bits, cw, CHAR_SIZES[state as usize]);
}
Ok(bits)
}
pub(crate) const LATCH_SEQ: [[&[i32]; 6]; 6] = [
[
&[],
&[LATCH_LOWER],
&[LATCH_MIXED],
&[LATCH_MIXED, LATCH_PUNCT],
&[LATCH_DIGIT],
&[SHIFT_BYTE],
],
[
&[LATCH_DIGIT, LATCH_UPPER],
&[],
&[LATCH_MIXED],
&[LATCH_MIXED, LATCH_PUNCT],
&[LATCH_DIGIT],
&[SHIFT_BYTE],
],
[
&[LATCH_UPPER],
&[LATCH_LOWER],
&[],
&[LATCH_PUNCT],
&[LATCH_UPPER, LATCH_DIGIT],
&[SHIFT_BYTE],
],
[
&[LATCH_UPPER],
&[LATCH_UPPER, LATCH_LOWER],
&[LATCH_UPPER, LATCH_MIXED],
&[],
&[LATCH_UPPER, LATCH_DIGIT],
&[LATCH_UPPER, SHIFT_BYTE],
],
[
&[LATCH_UPPER],
&[LATCH_UPPER, LATCH_LOWER],
&[LATCH_UPPER, LATCH_MIXED],
&[LATCH_UPPER, LATCH_MIXED, LATCH_PUNCT],
&[],
&[LATCH_UPPER, SHIFT_BYTE],
],
[
&[LATCH_UPPER],
&[LATCH_LOWER],
&[LATCH_MIXED],
&[],
&[],
&[],
],
];
fn sentinel_codeword(state: u8, sentinel: i32) -> Option<u8> {
match (state, sentinel) {
(STATE_UPPER, LATCH_LOWER) => Some(28),
(STATE_UPPER, LATCH_MIXED) => Some(29),
(STATE_UPPER, LATCH_DIGIT) => Some(30),
(STATE_UPPER, SHIFT_BYTE) => Some(31),
(STATE_UPPER, SHIFT_PUNCT) => Some(0),
(STATE_LOWER, SHIFT_UPPER) => Some(28),
(STATE_LOWER, LATCH_MIXED) => Some(29),
(STATE_LOWER, LATCH_DIGIT) => Some(30),
(STATE_LOWER, SHIFT_BYTE) => Some(31),
(STATE_LOWER, SHIFT_PUNCT) => Some(0),
(STATE_MIXED, LATCH_LOWER) => Some(28),
(STATE_MIXED, LATCH_UPPER) => Some(29),
(STATE_MIXED, LATCH_PUNCT) => Some(30),
(STATE_MIXED, SHIFT_BYTE) => Some(31),
(STATE_MIXED, SHIFT_PUNCT) => Some(0),
(STATE_PUNCT, LATCH_UPPER) => Some(31),
(STATE_PUNCT, FLG_NEXT) => Some(0),
(STATE_PUNCT, PAIR_2) => Some(2),
(STATE_PUNCT, PAIR_3) => Some(3),
(STATE_PUNCT, PAIR_4) => Some(4),
(STATE_PUNCT, PAIR_5) => Some(5),
(STATE_DIGIT, LATCH_UPPER) => Some(14),
(STATE_DIGIT, SHIFT_UPPER) => Some(15),
(STATE_DIGIT, SHIFT_PUNCT) => Some(0),
_ => None,
}
}
fn latch_target(sentinel: i32) -> Option<u8> {
match sentinel {
LATCH_UPPER => Some(STATE_UPPER),
LATCH_LOWER => Some(STATE_LOWER),
LATCH_MIXED => Some(STATE_MIXED),
LATCH_PUNCT => Some(STATE_PUNCT),
LATCH_DIGIT => Some(STATE_DIGIT),
_ => None,
}
}
fn charsize(state: u8, ch: i32) -> u16 {
if ch >= 0 {
CHAR_SIZES[state as usize] as u16
} else {
u16::MAX
}
}
const INF: u32 = 1_000_000;
fn pair_sentinel(last: u8, cur: u8) -> Option<i32> {
match (last, cur) {
(0x0D, 0x0A) => Some(PAIR_2), (b'.', b' ') => Some(PAIR_3),
(b',', b' ') => Some(PAIR_4),
(b':', b' ') => Some(PAIR_5),
_ => None,
}
}
fn is_pair_sentinel(item: i32) -> bool {
matches!(item, PAIR_2 | PAIR_3 | PAIR_4 | PAIR_5)
}
pub(crate) fn encode_dp(msg: &[u8]) -> Result<Vec<i32>, crate::error::Error> {
let mut curseq: [Vec<i32>; 6] = Default::default();
let mut curlen: [u32; 6] = [0, INF, INF, INF, INF, INF];
let mut backto: u8 = STATE_UPPER;
let mut lastchar: Option<u8> = None;
for &b in msg {
let ch = b as i32;
loop {
let mut improved = false;
for x in 0..6u8 {
for y in 0..6u8 {
if x == STATE_BYTE && y != backto {
continue;
}
let lat = LATCH_LEN[x as usize][y as usize] as u32;
let cost = curlen[x as usize].saturating_add(lat);
if cost < curlen[y as usize] {
curlen[y as usize] = cost;
let mut new_seq = curseq[x as usize].clone();
new_seq.extend_from_slice(LATCH_SEQ[x as usize][y as usize]);
curseq[y as usize] = new_seq;
if y == STATE_BYTE {
backto = if x == STATE_PUNCT || x == STATE_DIGIT {
STATE_UPPER
} else {
x
};
}
improved = true;
}
}
}
if !improved {
break;
}
}
let mut nxtseq: [Vec<i32>; 6] = Default::default();
let mut nxtlen: [u32; 6] = [INF; 6];
for x in 0..6u8 {
let encodable_in_x = if x == STATE_BYTE {
true
} else {
encode_byte_in_state(x, b).is_some()
};
if !encodable_in_x {
continue;
}
let cost = curlen[x as usize].saturating_add(charsize(x, ch) as u32);
if cost < nxtlen[x as usize] {
nxtlen[x as usize] = cost;
let mut s = curseq[x as usize].clone();
s.push(ch);
nxtseq[x as usize] = s;
}
if x == STATE_BYTE {
continue;
}
for y in 0..5u8 {
if y == x {
continue;
}
let shft = SHIFT_LEN[y as usize][x as usize] as u32;
if shft == u16::MAX as u32 {
continue;
}
let cost = curlen[y as usize]
.saturating_add(shft)
.saturating_add(charsize(x, ch) as u32);
if cost < nxtlen[y as usize] {
nxtlen[y as usize] = cost;
let shift_token = if x == STATE_PUNCT {
SHIFT_PUNCT
} else if x == STATE_UPPER {
SHIFT_UPPER
} else {
continue; };
let mut s = curseq[y as usize].clone();
s.push(shift_token);
s.push(ch);
nxtseq[y as usize] = s;
}
}
}
if let Some(last) = lastchar {
if let Some(pair_sent) = pair_sentinel(last, b) {
for &i_state in &[
STATE_UPPER,
STATE_LOWER,
STATE_MIXED,
STATE_PUNCT,
STATE_DIGIT,
] {
let mut in_p = true;
if (i_state == STATE_MIXED && last == 0x0D)
|| (i_state == STATE_DIGIT && (last == b',' || last == b'.'))
{
in_p = false;
}
if !in_p {
continue;
}
let curseq_i_len = curseq[i_state as usize].len();
if curlen[i_state as usize] >= nxtlen[i_state as usize] {
continue;
}
let mut lastld = false;
let mut lastsp = false;
let mut lastidx: Option<usize> = None;
let seq_i = &curseq[i_state as usize];
for idx in (0..curseq_i_len).rev() {
let ch = seq_i[idx];
if lastidx.is_none() {
if ch >= 0 && ch as u8 == last {
lastidx = Some(idx);
if idx > 0 && seq_i[idx - 1] == SHIFT_PUNCT {
lastsp = true;
}
}
} else if ch == SHIFT_BYTE {
lastidx = None;
break;
} else if (LATCH_DIGIT..0).contains(&ch) {
if i_state == STATE_PUNCT {
if ch == LATCH_DIGIT {
lastld = true;
}
} else if ch != LATCH_PUNCT {
in_p = lastsp;
}
break;
}
}
if !in_p || lastidx.is_none() {
continue;
}
let lastidx = lastidx.unwrap();
let mut new_cost = curlen[i_state as usize];
let new_seq: Vec<i32>;
if lastidx < curseq_i_len - 1 {
if i_state == STATE_PUNCT {
if lastld {
new_cost = new_cost.saturating_add(1);
}
let mut s = Vec::with_capacity(curseq_i_len);
s.extend_from_slice(&seq_i[..lastidx]);
s.extend_from_slice(&seq_i[lastidx + 1..]);
s.push(pair_sent);
new_seq = s;
} else {
let mut s = seq_i.clone();
s[lastidx] = pair_sent;
new_seq = s;
}
} else {
let mut s: Vec<i32> = seq_i[..curseq_i_len - 1].to_vec();
s.push(pair_sent);
new_seq = s;
}
if new_cost < nxtlen[i_state as usize] {
nxtlen[i_state as usize] = new_cost;
nxtseq[i_state as usize] = new_seq;
}
}
}
}
if !nxtseq[STATE_BYTE as usize].is_empty() {
let mut numbytes = 0u32;
for &ch in &nxtseq[STATE_BYTE as usize] {
if ch == SHIFT_BYTE {
numbytes = 0;
} else {
numbytes += 1;
}
}
if numbytes == 32 {
nxtlen[STATE_BYTE as usize] = nxtlen[STATE_BYTE as usize].saturating_add(11);
}
}
curseq = nxtseq;
curlen = nxtlen;
lastchar = Some(b);
}
let mut best_state = 0u8;
let mut best_len = curlen[0];
for x in 1..6u8 {
if curlen[x as usize] < best_len {
best_len = curlen[x as usize];
best_state = x;
}
}
if best_len == INF {
return Err(crate::error::Error::InvalidData(
"Aztec encode_dp: no reachable encoding found".into(),
));
}
Ok(curseq[best_state as usize].clone())
}
pub(crate) fn seq_to_bits(seq: &[i32]) -> Result<Vec<bool>, crate::error::Error> {
let mut bits = Vec::new();
let mut state = STATE_UPPER;
let mut i = 0;
while i < seq.len() {
if state == STATE_BYTE {
let mut count: usize = 0;
while i + count < seq.len() && seq[i + count] >= 0 && count < 2078 {
count += 1;
}
if count == 0 {
return Err(crate::error::Error::InvalidData(
"Aztec seq_to_bits: BYTE state with no bytes to emit".into(),
));
}
if count <= 31 {
append_codeword(&mut bits, count as u8, 5);
} else {
append_codeword(&mut bits, 0, 5);
let extra = (count - 31) as u32;
for k in (0..11).rev() {
bits.push((extra >> k) & 1 == 1);
}
}
for _ in 0..count {
let b = seq[i] as u8;
append_codeword(&mut bits, b, 8);
i += 1;
}
if i < seq.len() {
let exit = seq[i];
state = match exit {
LATCH_UPPER => STATE_UPPER,
LATCH_LOWER => STATE_LOWER,
LATCH_MIXED => STATE_MIXED,
_ => {
return Err(crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: invalid BYTE-exit sentinel {exit}",
)));
}
};
i += 1;
}
continue;
}
let item = seq[i];
if item >= 0 {
let cw = encode_byte_in_state(state, item as u8).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: byte 0x{:02x} not encodable in state {state}",
item,
))
})?;
append_codeword(&mut bits, cw, CHAR_SIZES[state as usize]);
i += 1;
} else if item == SHIFT_PUNCT || item == SHIFT_UPPER {
let cw_shift = sentinel_codeword(state, item).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: shift {item} not available from state {state}",
))
})?;
append_codeword(&mut bits, cw_shift, CHAR_SIZES[state as usize]);
if i + 1 >= seq.len() {
return Err(crate::error::Error::InvalidData(
"Aztec seq_to_bits: shift not followed by a char".into(),
));
}
let nxt = seq[i + 1];
let target = if item == SHIFT_PUNCT {
STATE_PUNCT
} else {
STATE_UPPER
};
let cw_char = if nxt >= 0 {
encode_byte_in_state(target, nxt as u8).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: byte 0x{nxt:02x} not encodable in shifted state {target}",
))
})?
} else if target == STATE_PUNCT && is_pair_sentinel(nxt) {
sentinel_codeword(STATE_PUNCT, nxt).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: pair {nxt} not encodable after SP",
))
})?
} else {
return Err(crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: shift {item} followed by sentinel {nxt}",
)));
};
append_codeword(&mut bits, cw_char, CHAR_SIZES[target as usize]);
i += 2;
} else if item == SHIFT_BYTE {
let cw = sentinel_codeword(state, item).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: SHIFT_BYTE not available from state {state}",
))
})?;
append_codeword(&mut bits, cw, CHAR_SIZES[state as usize]);
state = STATE_BYTE;
i += 1;
} else {
let cw = sentinel_codeword(state, item).ok_or_else(|| {
crate::error::Error::InvalidData(format!(
"Aztec seq_to_bits: latch {item} not available from state {state}",
))
})?;
append_codeword(&mut bits, cw, CHAR_SIZES[state as usize]);
if let Some(t) = latch_target(item) {
state = t;
}
i += 1;
}
}
Ok(bits)
}
pub(crate) fn encode_msg(bytes: &[u8]) -> Result<Vec<bool>, crate::error::Error> {
let seq = encode_dp(bytes)?;
seq_to_bits(&seq)
}
pub(crate) fn bit_stuff(msgbits: &[bool], bps: u8) -> Vec<u32> {
let mut cws = Vec::new();
let mut m: usize = 0;
let n = msgbits.len();
let bps_u = bps as usize;
let bpm1 = bps_u - 1;
while m < n {
let remaining = n - m;
if remaining >= bps_u {
let pre = &msgbits[m..m + bpm1];
let actual_next = msgbits[m + bpm1];
let cwf;
let advance;
if pre.iter().all(|&b| !b) {
cwf = true;
advance = bpm1;
} else if pre.iter().all(|&b| b) {
cwf = false;
advance = bpm1;
} else {
cwf = actual_next;
advance = bps_u;
}
let mut v: u32 = 0;
for &b in pre {
v = (v << 1) | (b as u32);
}
v = (v << 1) | (cwf as u32);
cws.push(v);
m += advance;
} else {
let mut bits: Vec<bool> = msgbits[m..].to_vec();
while bits.len() < bps_u {
bits.push(true);
}
if bits.iter().all(|&b| b) {
*bits.last_mut().unwrap() = false;
}
let mut v: u32 = 0;
for &b in &bits {
v = (v << 1) | (b as u32);
}
cws.push(v);
m = n;
}
}
cws
}
pub(crate) fn fit_metric(
msgbits_len: usize,
format: &str,
requested_layers: i32,
eclevel: u32,
ecaddchars: u32,
readerinit: bool,
) -> Option<usize> {
for (i, m) in METRICS.iter().enumerate() {
if m.format != format {
continue;
}
if readerinit && m.has_data != 1 {
continue;
}
if requested_layers > 0 && requested_layers as u8 != m.layers {
continue;
}
let ncws = m.ncws as u32;
if ncws == 0 {
continue;
}
let bpcw = m.bps as u32;
let numecw = (ncws * eclevel).div_ceil(100) + ecaddchars;
if numecw >= ncws {
continue;
}
let numdcw = ncws - numecw;
let dcw_needed = (msgbits_len as u32).div_ceil(bpcw);
if dcw_needed <= numdcw {
return Some(i);
}
}
None
}
pub(crate) fn build_codewords(
msgbits: &[bool],
metrics_idx: usize,
) -> Result<Vec<u32>, crate::error::Error> {
let m = METRICS[metrics_idx];
let bpcw = m.bps;
let ncws = m.ncws as usize;
let cws = bit_stuff(msgbits, bpcw);
if cws.len() > ncws {
return Err(crate::error::Error::InvalidData(format!(
"Aztec: data codewords ({}) exceed symbol capacity ({ncws})",
cws.len(),
)));
}
let n_ecc = ncws - cws.len();
if n_ecc == 0 {
return Ok(cws);
}
let gf = crate::util::rs_gf2k::gf_for_bps(bpcw).ok_or_else(|| {
crate::error::Error::InvalidData(format!("Aztec: no GF parameters for bps={bpcw}"))
})?;
let ecc = crate::util::rs_gf2k::encode_k(&cws, n_ecc, gf);
let mut out = cws;
for &e in ecc.iter().rev() {
out.push(e);
}
Ok(out)
}
pub(crate) fn build_mode_bits(
format: &str,
layers: u8,
cw_count: usize,
readerinit: bool,
rune_value: Option<u8>,
) -> Vec<bool> {
let gf16 = crate::util::rs_gf2k::GF16;
let (data_nibbles, ecc_count): (Vec<u32>, usize) = match format {
"full" => {
let mut mode = (layers as u32 - 1) * 2048 + (cw_count as u32 - 1);
if readerinit {
mode |= 1024;
}
(
vec![
(mode >> 12) & 0xF,
(mode >> 8) & 0xF,
(mode >> 4) & 0xF,
mode & 0xF,
],
6,
)
}
"compact" => {
let mode = (layers as u32 - 1) * 64 + (cw_count as u32 - 1);
(vec![(mode >> 4) & 0xF, mode & 0xF], 5)
}
"rune" => {
let mode = rune_value.unwrap_or(0) as u32;
(vec![(mode >> 4) & 0xF, mode & 0xF], 5)
}
_ => panic!("unknown format {format}"),
};
let ecc = crate::util::rs_gf2k::encode_k(&data_nibbles, ecc_count, gf16);
let mut all: Vec<u32> = data_nibbles;
for &e in ecc.iter().rev() {
all.push(e);
}
if format == "rune" {
for v in all.iter_mut() {
*v ^= 10;
}
}
let mut bits = Vec::with_capacity(all.len() * 4);
for v in all {
for k in (0..4).rev() {
bits.push((v >> k) & 1 == 1);
}
}
bits
}
#[derive(Debug, Clone)]
pub(crate) struct AztecSymbolMatrix {
pub size: usize,
pub pixels: Vec<Vec<u8>>,
}
fn lmv(layer: i32, pos: i32, fw: i32) -> (i32, i32) {
let lwid = fw + layer * 4;
let dir = (pos / 2) / lwid;
let col = (pos / 2) % lwid;
let row = pos % 2;
match dir {
0 => {
let x = -((lwid - 1) / 2) + 1 + col;
let y = (fw - 1) / 2 + layer * 2 + row;
(x, y)
}
1 => {
let x = fw / 2 + layer * 2 + row;
let y = (lwid - 1) / 2 - 1 - col;
(x, y)
}
2 => {
let x = -(-(lwid / 2) + 1 + col);
let y = -(fw / 2 + layer * 2 + row);
(x, y)
}
3 => {
let x = -((fw - 1) / 2 + layer * 2 + row);
let y = -(lwid / 2 - 1 - col);
(x, y)
}
_ => unreachable!("lmv: invalid dir {dir}"),
}
}
fn cmv(x: i32, y: i32, mid_idx: i32, size: i32) -> i32 {
x - y * size + mid_idx
}
pub(crate) fn build_matrix(
format: &str,
layers: u8,
cws: &[u32],
bpcw: u8,
modebits: &[bool],
) -> AztecSymbolMatrix {
let fw_initial: i32 = if format == "full" { 12 } else { 9 };
let initial_size = fw_initial + (layers as i32) * 4 + 2;
let mid = (initial_size - 1) / 2;
let mid_idx = mid * initial_size + mid;
let mut pixs: Vec<i8> = vec![-1; (initial_size * initial_size) as usize];
let total_cw_bits = cws.len() * bpcw as usize;
let symbol_bits = if format == "full" {
(layers as usize) * (layers as usize) * 16 + (layers as usize) * 112
} else {
(layers as usize) * (layers as usize) * 16 + (layers as usize) * 88
};
let mut databits = vec![false; symbol_bits];
let offset = symbol_bits - total_cw_bits;
for (i, &cw) in cws.iter().enumerate() {
for k in 0..(bpcw as usize) {
let bit = (cw >> ((bpcw as usize) - 1 - k)) & 1 == 1;
databits[offset + i * (bpcw as usize) + k] = bit;
}
}
let mut bit_idx = 0;
for layer in 1..=(layers as i32) {
let layer_bits = (fw_initial + layer * 4) * 8;
for pos in 0..layer_bits {
let (x, y) = lmv(layer, pos, fw_initial);
let idx = cmv(x, y, mid_idx, initial_size);
let b = databits[databits.len() - 1 - bit_idx] as i8;
pixs[idx as usize] = b;
bit_idx += 1;
}
}
let (mut pixs, fw, size, _mid, mid_idx) = if format == "full" {
let fw2 = 13;
let growth = ((((layers as i32) + 10) * 2 + 1) / 15 - 1).max(0) * 2;
let new_size = fw2 + (layers as i32) * 4 + 2 + growth;
let new_mid = (new_size - 1) / 2;
let new_mid_idx = new_mid * new_size + new_mid;
let total = (new_size * new_size) as usize;
let mut npixs: Vec<i8> = vec![-2; total];
let half = new_size / 2;
let mut i = 0i32;
while i <= half {
for j in 0..new_size {
let coord = -half + j;
let val = (((half + j) + i) + 1) % 2;
let val = val as i8;
let idx_pp = cmv(coord, i, new_mid_idx, new_size);
npixs[idx_pp as usize] = val;
let idx_pn = cmv(coord, -i, new_mid_idx, new_size);
npixs[idx_pn as usize] = val;
let idx_np = cmv(i, coord, new_mid_idx, new_size);
npixs[idx_np as usize] = val;
let idx_nn = cmv(-i, coord, new_mid_idx, new_size);
npixs[idx_nn as usize] = val;
}
i += 16;
}
let mut j = 0usize;
for slot in npixs.iter_mut().take(total) {
if *slot == -2 {
*slot = pixs[j];
j += 1;
}
}
(npixs, fw2, new_size, new_mid, new_mid_idx)
} else {
(pixs, fw_initial, initial_size, mid, mid_idx)
};
let fw_half = fw / 2;
for di in -fw_half..=fw_half {
for dj in -fw_half..=fw_half {
let a = di.abs().max(dj.abs());
let idx = cmv(di, dj, mid_idx, size);
pixs[idx as usize] = ((a + 1) % 2) as i8;
}
}
let orient: &[(i32, i32, u8)] = &[
(-(fw_half + 1), fw_half, 1),
(-(fw_half + 1), fw_half + 1, 1),
(-fw_half, fw_half + 1, 1),
(fw_half + 1, fw_half + 1, 1),
(fw_half + 1, fw_half, 1),
(fw_half + 1, -fw_half, 1),
(fw_half, fw_half + 1, 0),
(fw_half + 1, -(fw_half + 1), 0),
(fw_half, -(fw_half + 1), 0),
(-fw_half, -(fw_half + 1), 0),
(-(fw_half + 1), -(fw_half + 1), 0),
(-(fw_half + 1), -fw_half, 0),
];
for &(dx, dy, v) in orient {
let idx = cmv(dx, dy, mid_idx, size);
pixs[idx as usize] = v as i8;
}
let modemap: &[(i32, i32)] = if format == "full" {
MODEMAP_FULL
} else {
MODEMAP_COMPACT
};
for (i, &(dx, dy)) in modemap.iter().enumerate() {
if i >= modebits.len() {
break;
}
let idx = cmv(dx, dy, mid_idx, size);
pixs[idx as usize] = modebits[i] as i8;
}
let mut pixels = vec![vec![0u8; size as usize]; size as usize];
for y in 0..size {
for x in 0..size {
let v = pixs[(y * size + x) as usize];
pixels[y as usize][x as usize] = if v == 1 { 1 } else { 0 };
}
}
AztecSymbolMatrix {
size: size as usize,
pixels,
}
}
pub(crate) const MODEMAP_FULL: &[(i32, i32)] = &[
(-5, 7),
(-4, 7),
(-3, 7),
(-2, 7),
(-1, 7),
(1, 7),
(2, 7),
(3, 7),
(4, 7),
(5, 7),
(7, 5),
(7, 4),
(7, 3),
(7, 2),
(7, 1),
(7, -1),
(7, -2),
(7, -3),
(7, -4),
(7, -5),
(5, -7),
(4, -7),
(3, -7),
(2, -7),
(1, -7),
(-1, -7),
(-2, -7),
(-3, -7),
(-4, -7),
(-5, -7),
(-7, -5),
(-7, -4),
(-7, -3),
(-7, -2),
(-7, -1),
(-7, 1),
(-7, 2),
(-7, 3),
(-7, 4),
(-7, 5),
];
pub fn encode(data: &[u8]) -> Result<crate::encoding::BitMatrix, crate::error::Error> {
encode_inner(data, false)
}
pub fn encode_compact(data: &[u8]) -> Result<crate::encoding::BitMatrix, crate::error::Error> {
encode_inner(data, true)
}
pub fn encode_rune(data: &str) -> Result<crate::encoding::BitMatrix, crate::error::Error> {
if data.is_empty() {
return Err(crate::error::Error::InvalidData(
"Aztec Rune: input must be a 1-3 digit integer (0..=255)".into(),
));
}
if data.len() > 3 {
return Err(crate::error::Error::InvalidData(format!(
"Aztec Rune: input must be 1-3 digits, got {} chars",
data.len()
)));
}
for b in data.bytes() {
if !b.is_ascii_digit() {
return Err(crate::error::Error::InvalidData(format!(
"Aztec Rune: non-digit byte 0x{b:02x} in input"
)));
}
}
let value: u32 = data.parse().map_err(|_| {
crate::error::Error::InvalidData(format!("Aztec Rune: cannot parse {data:?} as integer"))
})?;
if value > 255 {
return Err(crate::error::Error::InvalidData(format!(
"Aztec Rune: value must be 0..=255, got {value}"
)));
}
let rune = value as u8;
let modebits = build_mode_bits("rune", 0, 0, false, Some(rune));
let sym = build_matrix("rune", 0, &[], 6, &modebits);
let mut bm = crate::encoding::BitMatrix::new(sym.size, sym.size);
for y in 0..sym.size {
for x in 0..sym.size {
bm.set(x, y, sym.pixels[y][x] == 1);
}
}
Ok(bm)
}
fn encode_inner(
data: &[u8],
force_compact: bool,
) -> Result<crate::encoding::BitMatrix, crate::error::Error> {
if data.is_empty() {
return Err(crate::error::Error::InvalidData(
"Aztec: input data must not be empty".into(),
));
}
let msgbits = encode_msg(data)?;
let (format, metric_idx) = match fit_metric(msgbits.len(), "compact", -1, 23, 3, false) {
Some(idx) => ("compact", idx),
None => {
if force_compact {
return Err(crate::error::Error::InvalidData(
"Aztec Compact: input data exceeds the L1-L4 compact size range".into(),
));
}
let idx = fit_metric(msgbits.len(), "full", -1, 23, 3, false).ok_or_else(|| {
crate::error::Error::InvalidData(
"Aztec: input data exceeds maximum symbol size".into(),
)
})?;
("full", idx)
}
};
let m = METRICS[metric_idx];
let cws = build_codewords(&msgbits, metric_idx)?;
let cw_count = bit_stuff(&msgbits, m.bps).len();
let modebits = build_mode_bits(format, m.layers, cw_count, false, None);
let sym = build_matrix(format, m.layers, &cws, m.bps, &modebits);
let mut bm = crate::encoding::BitMatrix::new(sym.size, sym.size);
for y in 0..sym.size {
for x in 0..sym.size {
bm.set(x, y, sym.pixels[y][x] == 1);
}
}
Ok(bm)
}
pub(crate) const MODEMAP_COMPACT: &[(i32, i32)] = &[
(-3, 5),
(-2, 5),
(-1, 5),
(0, 5),
(1, 5),
(2, 5),
(3, 5),
(5, 3),
(5, 2),
(5, 1),
(5, 0),
(5, -1),
(5, -2),
(5, -3),
(3, -5),
(2, -5),
(1, -5),
(0, -5),
(-1, -5),
(-2, -5),
(-3, -5),
(-5, -3),
(-5, -2),
(-5, -1),
(-5, 0),
(-5, 1),
(-5, 2),
(-5, 3),
];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn char_sizes_shape() {
assert_eq!(CHAR_SIZES, [5, 5, 5, 5, 4, 8]);
}
#[test]
fn latch_len_shape() {
for (s, row) in LATCH_LEN.iter().enumerate() {
assert_eq!(row[s], 0, "self-latch[{s}] should be 0");
}
assert_eq!(LATCH_LEN[0][1], 5); }
#[test]
fn shift_len_shape() {
for &v in &SHIFT_LEN[3] {
assert_eq!(v, u16::MAX);
}
assert_eq!(SHIFT_LEN[4][0], 4);
}
#[test]
fn metrics_table_shape() {
assert_eq!(METRICS.len(), 37);
assert_eq!(METRICS[0].format, "rune");
assert_eq!(METRICS[0].has_data, 0);
assert_eq!(METRICS[1].format, "compact");
assert_eq!(METRICS[1].layers, 1);
assert_eq!(METRICS[1].ncws, 17);
assert_eq!(METRICS[36].format, "full");
assert_eq!(METRICS[36].layers, 32);
assert_eq!(METRICS[36].ncws, 1664);
assert_eq!(METRICS[36].bps, 12);
for m in METRICS.iter() {
assert!(
matches!(m.bps, 6 | 8 | 10 | 12),
"unexpected bps {} for layers {}",
m.bps,
m.layers,
);
}
}
#[test]
fn upper_codeword_known_values() {
assert_eq!(upper_codeword(b' '), Some(1));
assert_eq!(upper_codeword(b'A'), Some(2));
assert_eq!(upper_codeword(b'Z'), Some(27));
assert_eq!(upper_codeword(b'a'), None);
assert_eq!(upper_codeword(b'0'), None);
assert_eq!(upper_codeword(b'!'), None);
}
#[test]
fn lower_codeword_known_values() {
assert_eq!(lower_codeword(b' '), Some(1));
assert_eq!(lower_codeword(b'a'), Some(2));
assert_eq!(lower_codeword(b'z'), Some(27));
assert_eq!(lower_codeword(b'A'), None);
}
#[test]
fn digit_codeword_known_values() {
assert_eq!(digit_codeword(b' '), Some(1));
assert_eq!(digit_codeword(b'0'), Some(2));
assert_eq!(digit_codeword(b'9'), Some(11));
assert_eq!(digit_codeword(b','), Some(12));
assert_eq!(digit_codeword(b'.'), Some(13));
assert_eq!(digit_codeword(b'A'), None);
}
#[test]
fn mixed_codeword_known_values() {
assert_eq!(mixed_codeword(b' '), Some(1));
assert_eq!(mixed_codeword(1), Some(2));
assert_eq!(mixed_codeword(13), Some(14));
assert_eq!(mixed_codeword(27), Some(15));
assert_eq!(mixed_codeword(b'@'), Some(20));
assert_eq!(mixed_codeword(127), Some(27));
assert_eq!(mixed_codeword(b'a'), None);
}
#[test]
fn punct_codeword_known_values() {
assert_eq!(punct_codeword(13), Some(1)); assert_eq!(punct_codeword(b'!'), Some(6));
assert_eq!(punct_codeword(b'?'), Some(26));
assert_eq!(punct_codeword(b'['), Some(27));
assert_eq!(punct_codeword(b'{'), Some(29));
assert_eq!(punct_codeword(b'A'), None);
}
#[test]
fn encode_single_state_pure_uppercase() {
let cws = encode_single_state(STATE_UPPER, b"HELLO").unwrap();
assert_eq!(cws, vec![9, 6, 13, 13, 16]);
}
#[test]
fn encode_single_state_pure_digits() {
let cws = encode_single_state(STATE_DIGIT, b"12345").unwrap();
assert_eq!(cws, vec![3, 4, 5, 6, 7]);
}
#[test]
fn encode_single_state_rejects_wrong_alphabet() {
assert!(encode_single_state(STATE_UPPER, b"hello").is_none());
assert!(encode_single_state(STATE_DIGIT, b"ABC").is_none());
}
#[test]
fn pack_codewords_to_bits_5bit() {
let bits = pack_codewords_to_bits(&[9], 5);
assert_eq!(bits, vec![false, true, false, false, true]);
let bits = pack_codewords_to_bits(&[9, 6], 5);
assert_eq!(bits.len(), 10);
assert_eq!(&bits[5..], &[false, false, true, true, false]);
}
#[test]
fn pack_codewords_to_bits_4bit() {
let bits = pack_codewords_to_bits(&[3], 4);
assert_eq!(bits, vec![false, false, true, true]);
}
#[test]
fn encode_greedy_pure_upper_matches_single_state() {
let got = encode_greedy(b"HELLO").unwrap();
let cws = encode_single_state(STATE_UPPER, b"HELLO").unwrap();
let want = pack_codewords_to_bits(&cws, 5);
assert_eq!(got, want);
assert_eq!(got.len(), 25);
}
#[test]
fn encode_greedy_pure_lower_starts_with_ll() {
let got = encode_greedy(b"hello").unwrap();
assert_eq!(&got[..5], &[true, true, true, false, false]);
let lower_cws = encode_single_state(STATE_LOWER, b"hello").unwrap();
let lower_bits = pack_codewords_to_bits(&lower_cws, 5);
assert_eq!(&got[5..], &lower_bits[..]);
assert_eq!(got.len(), 30);
}
#[test]
fn encode_greedy_space_in_lower_stays_in_lower() {
let got = encode_greedy(b"Aa b").unwrap();
let mut want = Vec::new();
for cw in [2u8, 28, 2, 1, 3] {
for k in (0..5).rev() {
want.push((cw >> k) & 1 == 1);
}
}
assert_eq!(got, want);
assert_eq!(got.len(), 25);
}
#[test]
fn encode_greedy_pure_digits_latches_to_digit() {
let got = encode_greedy(b"123").unwrap();
let mut want = Vec::new();
for k in (0..5).rev() {
want.push((30u8 >> k) & 1 == 1);
}
for cw in [3u8, 4, 5] {
for k in (0..4).rev() {
want.push((cw >> k) & 1 == 1);
}
}
assert_eq!(got, want);
assert_eq!(got.len(), 17);
}
#[test]
fn encode_greedy_rejects_high_bit_byte() {
let err = encode_greedy(&[0x80]).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("Byte-state"),
"unexpected error message: {msg}",
);
}
#[test]
fn encode_greedy_handles_mixed_upper_lower() {
let got = encode_greedy(b"Hello").unwrap();
let mut want = Vec::new();
for cw in [9u8, 28, 6, 13, 13, 16] {
for k in (0..5).rev() {
want.push((cw >> k) & 1 == 1);
}
}
assert_eq!(got, want);
}
#[test]
fn encode_greedy_rejects_unreachable_latch() {
let err = encode_greedy(b"1a").unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("no direct latch from state 4 to 1"),
"unexpected error message: {msg}",
);
}
#[test]
fn encode_dp_pure_upper() {
let seq = encode_dp(b"HELLO").unwrap();
assert_eq!(
seq,
vec![
b'H' as i32,
b'E' as i32,
b'L' as i32,
b'L' as i32,
b'O' as i32
]
);
}
#[test]
fn encode_dp_pure_lower_inserts_ll() {
let seq = encode_dp(b"hello").unwrap();
assert_eq!(seq[0], LATCH_LOWER);
assert_eq!(
&seq[1..],
&[
b'h' as i32,
b'e' as i32,
b'l' as i32,
b'l' as i32,
b'o' as i32
]
);
}
#[test]
fn encode_dp_pure_digit_inserts_ld() {
let seq = encode_dp(b"12345").unwrap();
assert_eq!(seq[0], LATCH_DIGIT);
assert_eq!(
&seq[1..],
&[
b'1' as i32,
b'2' as i32,
b'3' as i32,
b'4' as i32,
b'5' as i32
]
);
}
#[test]
fn encode_dp_lower_to_upper_path() {
let seq = encode_dp(b"ab1").unwrap();
assert_eq!(seq[0], LATCH_LOWER);
assert_eq!(seq[1], b'a' as i32);
assert_eq!(seq[2], b'b' as i32);
assert_eq!(seq[3], LATCH_DIGIT);
assert_eq!(seq[4], b'1' as i32);
}
#[test]
fn encode_dp_accepts_high_bit_byte_via_byte_mode() {
let seq = encode_dp(&[0x80]).unwrap();
assert_eq!(seq, vec![SHIFT_BYTE, 0x80]);
}
#[test]
fn seq_to_bits_byte_mode_single_byte() {
let bits = seq_to_bits(&[SHIFT_BYTE, 0x80]).unwrap();
assert_eq!(bits.len(), 18);
assert_eq!(&bits[..5], &[true; 5]);
assert_eq!(&bits[5..10], &[false, false, false, false, true]);
assert_eq!(
&bits[10..18],
&[true, false, false, false, false, false, false, false]
);
}
#[test]
fn seq_to_bits_byte_mode_with_exit() {
let bits = seq_to_bits(&[SHIFT_BYTE, 0xAB, LATCH_UPPER, b'X' as i32]).unwrap();
assert_eq!(bits.len(), 23);
}
#[test]
fn seq_to_bits_byte_mode_multi_byte() {
let bits = seq_to_bits(&[SHIFT_BYTE, 0x80, 0x81, 0x82]).unwrap();
assert_eq!(bits.len(), 34);
}
#[test]
fn seq_to_bits_byte_mode_long_run() {
let mut seq: Vec<i32> = vec![SHIFT_BYTE];
seq.extend(std::iter::repeat_n(0x80, 32));
let bits = seq_to_bits(&seq).unwrap();
assert_eq!(bits.len(), 277);
assert_eq!(&bits[5..10], &[false; 5]);
assert_eq!(
&bits[10..21],
&[false, false, false, false, false, false, false, false, false, false, true,]
);
}
#[test]
fn encode_msg_handles_high_bit_byte() {
let bits = encode_msg(b"\x80").unwrap();
assert_eq!(bits.len(), 18);
}
#[test]
fn seq_to_bits_pure_upper() {
let seq = vec![
b'H' as i32,
b'E' as i32,
b'L' as i32,
b'L' as i32,
b'O' as i32,
];
let bits = seq_to_bits(&seq).unwrap();
let cws = encode_single_state(STATE_UPPER, b"HELLO").unwrap();
let want = pack_codewords_to_bits(&cws, 5);
assert_eq!(bits, want);
}
#[test]
fn seq_to_bits_with_latch() {
let seq = vec![LATCH_LOWER, b'h' as i32];
let bits = seq_to_bits(&seq).unwrap();
let want = vec![
true, true, true, false, false, false, true, false, false, true, ];
assert_eq!(bits, want);
}
#[test]
fn seq_to_bits_digit_latch_4bit_codewords() {
let seq = vec![LATCH_DIGIT, b'1' as i32];
let bits = seq_to_bits(&seq).unwrap();
let want = vec![
true, true, true, true, false, false, false, true, true, ];
assert_eq!(bits, want);
}
#[test]
fn encode_msg_pure_upper_matches_greedy() {
let dp = encode_msg(b"HELLO").unwrap();
let greedy = encode_greedy(b"HELLO").unwrap();
assert_eq!(dp, greedy);
}
#[test]
fn encode_msg_handles_capitalised_word() {
let bits = encode_msg(b"Hello").unwrap();
assert_eq!(bits.len(), 30);
assert_eq!(&bits[..5], &[false, true, false, false, true]);
assert_eq!(&bits[5..10], &[true, true, true, false, false]);
}
#[test]
fn encode_msg_uses_shift_for_single_punct() {
let seq = encode_dp(b"A!").unwrap();
assert_eq!(seq, vec![b'A' as i32, SHIFT_PUNCT, b'!' as i32]);
let bits = seq_to_bits(&seq).unwrap();
assert_eq!(bits.len(), 15);
}
#[test]
fn encode_msg_round_trips_to_bits() {
for &input in &[
"HELLO",
"Hello",
"Hello world",
"12345",
"A1",
"ABC123",
"hello world",
] {
let bits = encode_msg(input.as_bytes()).unwrap();
let seq = encode_dp(input.as_bytes()).unwrap();
let bits2 = seq_to_bits(&seq).unwrap();
assert_eq!(
bits, bits2,
"encode_msg != encode_dp+seq_to_bits for {input:?}"
);
}
}
#[test]
fn bit_stuff_no_padding_bps6() {
let bits = vec![false, true, false, true, false, true];
let cws = bit_stuff(&bits, 6);
assert_eq!(cws, vec![0b010101]);
}
#[test]
fn bit_stuff_all_zero_inserts_one() {
let bits: Vec<bool> = vec![
false, false, false, false, false, true, true, false, true, false, true, true,
];
let cws = bit_stuff(&bits, 6);
assert_eq!(cws, vec![0b000001, 0b110101, 0b111110]);
}
#[test]
fn bit_stuff_all_ones_inserts_zero() {
let bits: Vec<bool> = vec![
true, true, true, true, true, false, true, false, false, ];
let cws = bit_stuff(&bits, 6);
assert_eq!(cws, vec![0b111110, 0b010011]);
}
#[test]
fn bit_stuff_tail_all_ones_flips_last() {
let bits: Vec<bool> = vec![true; 6];
let cws = bit_stuff(&bits, 6);
assert_eq!(cws, vec![0b111110, 0b111110]);
}
#[test]
fn bit_stuff_empty_input() {
let cws = bit_stuff(&[], 6);
assert!(
cws.is_empty(),
"bit_stuff(empty, bps=6) must yield empty codeword vec (no spurious padding/sentinel from mutation); got len={}",
cws.len()
);
}
#[test]
fn bit_stuff_bps8() {
let bits: Vec<bool> = vec![true, false, true, false, true, false, true, false];
let cws = bit_stuff(&bits, 8);
assert_eq!(cws, vec![0b10101010]);
}
#[test]
fn fit_metric_picks_compact_l1_for_short_input() {
let idx = fit_metric(25, "compact", -1, 23, 3, false).unwrap();
assert_eq!(METRICS[idx].format, "compact");
assert_eq!(METRICS[idx].layers, 1);
assert_eq!(METRICS[idx].ncws, 17);
}
#[test]
fn fit_metric_picks_larger_for_long_input() {
let idx2 = fit_metric(500, "compact", -1, 23, 3, false);
assert!(idx2.is_none(), "500 bits should not fit any compact size");
let idx3 = fit_metric(500, "full", -1, 23, 3, false).unwrap();
assert_eq!(METRICS[idx3].format, "full");
assert!(METRICS[idx3].layers >= 2);
}
#[test]
fn fit_metric_forced_layers() {
let idx = fit_metric(50, "full", 5, 23, 3, false).unwrap();
assert_eq!(METRICS[idx].layers, 5);
assert_eq!(METRICS[idx].format, "full");
}
#[test]
fn fit_metric_none_paths_invalid_format_and_impossible_layer() {
assert!(
fit_metric(25, "garbage", -1, 23, 3, false).is_none(),
"unknown format 'garbage' → None"
);
assert!(
fit_metric(25, "", -1, 23, 3, false).is_none(),
"empty format → None"
);
assert!(
fit_metric(25, "compact", 5, 23, 3, false).is_none(),
"compact L5 doesn't exist → None"
);
assert!(
fit_metric(25, "full", 33, 23, 3, false).is_none(),
"full L33 doesn't exist → None"
);
assert!(
fit_metric(25, "full", -1, 100, 3, false).is_none(),
"eclevel=100% leaves no data slots → None"
);
}
#[test]
fn build_codewords_exact_fit_and_over_capacity_branches() {
let exact_fit_bits: Vec<bool> = (0..102).map(|i| i % 2 == 0).collect();
let cws_exact = build_codewords(&exact_fit_bits, 1)
.expect("compact L1 with 102 alternating bits must produce 17 cws (exact fit)");
assert_eq!(
cws_exact.len(),
17,
"exact-fit branch returns cws as-is (no ECC appended); 17 == ncws"
);
let over_cap_bits: Vec<bool> = (0..108).map(|i| i % 2 == 0).collect();
let err = build_codewords(&over_cap_bits, 1)
.expect_err("compact L1 with 108 bits → 18 cws > 17 ncws must error");
let crate::error::Error::InvalidData(msg) = err else {
panic!("over-capacity error must be InvalidData; got {err:?}");
};
assert!(msg.contains("Aztec"), "diagnostic prefix: {msg}");
assert!(msg.contains("18"), "echo actual cw count 18: {msg}");
assert!(msg.contains("17"), "echo capacity 17: {msg}");
}
#[test]
fn build_codewords_short_input_compact_l1() {
let msgbits = encode_msg(b"HELLO").unwrap();
assert_eq!(msgbits.len(), 25);
let metric_idx = fit_metric(msgbits.len(), "compact", -1, 23, 3, false).unwrap();
let cws = build_codewords(&msgbits, metric_idx).unwrap();
assert_eq!(cws.len(), METRICS[metric_idx].ncws as usize);
assert!(
!cws.is_empty(),
"build_codewords(\"HELLO\" → 25 msgbits, compact L1 metric) must produce non-empty codewords (compact L1 ncws=17, bpcw=6); got len={}",
cws.len()
);
}
#[test]
fn build_mode_bits_compact_l1_zero_cws() {
let bits = build_mode_bits("compact", 1, 1, false, None);
assert_eq!(bits.len(), 28);
}
#[test]
fn build_mode_bits_full_size() {
let bits = build_mode_bits("full", 1, 5, false, None);
assert_eq!(bits.len(), 40);
}
#[test]
fn build_mode_bits_rune_xor() {
let bits = build_mode_bits("rune", 0, 0, false, Some(0));
assert_eq!(bits.len(), 28);
assert_eq!(&bits[..4], &[true, false, true, false]);
}
#[test]
fn build_matrix_compact_l1_size() {
let cws = vec![0u32; 17];
let modebits = vec![false; 28];
let m = build_matrix("compact", 1, &cws, 6, &modebits);
assert_eq!(m.size, 15);
assert_eq!(m.pixels.len(), 15);
assert_eq!(m.pixels[0].len(), 15);
}
#[test]
fn encode_hello_matches_bwip_js_compact_l1() {
let bm = encode(b"HELLO").unwrap();
assert_eq!(bm.width(), 15);
assert_eq!(bm.height(), 15);
let want: [[u8; 15]; 15] = [
[0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1],
[0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1],
[0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0],
[1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0],
[1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1],
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1],
[1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0],
[1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0],
[0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1],
];
for (y, want_row) in want.iter().enumerate() {
for (x, &w) in want_row.iter().enumerate() {
let got = u8::from(bm.get(x, y));
assert_eq!(got, w, "mismatch at ({x}, {y})");
}
}
}
#[test]
fn encode_dp_pair_compression_comma_space() {
let seq = encode_dp(b"X, ").unwrap();
assert!(
seq.contains(&PAIR_4),
"expected PAIR_4 in seq for 'X, '; got {seq:?}",
);
}
#[test]
fn encode_hello_world_matches_bwip_js() {
let bm = encode(b"Hello, World").unwrap();
assert_eq!(bm.width(), 19);
let mid = 9;
assert!(bm.get(mid, mid), "bull's-eye centre should be lit");
}
#[test]
fn encode_compact_matches_encode_for_short_input() {
let bm_auto = encode(b"HELLO").unwrap();
let bm_compact = encode_compact(b"HELLO").unwrap();
assert_eq!(bm_auto.width(), bm_compact.width());
assert_eq!(bm_auto.height(), bm_compact.height());
for y in 0..bm_auto.height() {
for x in 0..bm_auto.width() {
assert_eq!(
bm_auto.get(x, y),
bm_compact.get(x, y),
"diverge at ({x},{y})"
);
}
}
}
#[test]
fn encode_compact_rejects_payload_that_exceeds_l4() {
let long = b"A".repeat(300);
let err = encode_compact(&long).expect_err("L4-overflow must error");
match err {
crate::error::Error::InvalidData(msg) => {
assert!(
msg.contains("Aztec Compact:"),
"missing `Aztec Compact:` symbology prefix: {msg:?}"
);
assert!(
msg.contains("input data exceeds"),
"missing `input data exceeds` predicate: {msg:?}"
);
assert!(
msg.contains("L1-L4 compact size range"),
"missing `L1-L4 compact size range` range-name anchor: {msg:?}"
);
assert!(
!msg.contains("maximum symbol size"),
"cross-arm contamination: compact-overflow msg mentions full-size sibling: {msg:?}"
);
}
other => panic!("unexpected error variant: {other:?}"),
}
}
#[test]
fn encode_compact_rejects_empty_input() {
match encode_compact(b"") {
Err(crate::error::Error::InvalidData(msg)) => {
assert!(msg.contains("Aztec:"), "missing `Aztec:` prefix: {msg}");
assert!(
msg.contains("must not be empty"),
"missing `must not be empty` predicate: {msg}"
);
assert!(
!msg.contains("exceeds the L1-L4 compact size range"),
"wrong arm — compact-overflow diagnostic leaked: {msg}"
);
}
other => panic!("empty Aztec compact should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn encode_rune_matches_bwip_js_pixs() {
let cases: &[(&str, [&str; 11])] = &[
(
"0",
[
"11101010101",
"11111111111",
"01000000010",
"11011111011",
"01010001010",
"11010101011",
"01010001010",
"11011111011",
"01000000010",
"01111111111",
"00101010100",
],
),
(
"42",
[
"11100000001",
"11111111111",
"11000000010",
"01011111010",
"11010001010",
"01010101010",
"11010001011",
"01011111011",
"11000000011",
"01111111111",
"00001011100",
],
),
(
"128",
[
"11001010101",
"11111111111",
"11000000010",
"01011111011",
"11010001010",
"11010101010",
"01010001010",
"01011111010",
"11000000010",
"01111111111",
"00100010000",
],
),
(
"255",
[
"11010101001",
"11111111111",
"01000000011",
"11011111011",
"11010001011",
"01010101011",
"01010001010",
"11011111011",
"11000000010",
"01111111111",
"00110011100",
],
),
];
for &(input, rows) in cases {
let bm =
encode_rune(input).unwrap_or_else(|e| panic!("encode_rune({input:?}) failed: {e}"));
assert_eq!(bm.width(), 11, "rune {input}: width != 11");
assert_eq!(bm.height(), 11, "rune {input}: height != 11");
for (y, row) in rows.iter().enumerate() {
let mut got = String::with_capacity(11);
for x in 0..11 {
got.push(if bm.get(x, y) { '1' } else { '0' });
}
assert_eq!(
got, *row,
"rune {input}: row {y} mismatch\n want: {row}\n got: {got}"
);
}
}
}
#[test]
fn encode_rune_rejects_invalid_input() {
match encode_rune("4A") {
Err(crate::error::Error::InvalidData(msg)) => {
assert!(
msg.contains("Aztec Rune:"),
"non-digit arm missing `Aztec Rune:` prefix: {msg}"
);
assert!(
msg.contains("non-digit byte"),
"non-digit arm missing predicate: {msg}"
);
assert!(
msg.contains("0x41"),
"non-digit arm missing hex echo `0x41` for 'A': {msg}"
);
}
other => panic!("`4A` should reject as InvalidData, got {other:?}"),
}
match encode_rune("1000") {
Err(crate::error::Error::InvalidData(msg)) => {
assert!(
msg.contains("Aztec Rune:"),
"too-long arm missing `Aztec Rune:` prefix: {msg}"
);
assert!(
msg.contains("must be 1-3 digits"),
"too-long arm missing `must be 1-3 digits` predicate: {msg}"
);
assert!(
msg.contains("got 4 chars"),
"too-long arm missing `got 4 chars` length echo: {msg}"
);
}
other => panic!("`1000` (4-char) should reject as InvalidData, got {other:?}"),
}
match encode_rune("256") {
Err(crate::error::Error::InvalidData(msg)) => {
assert!(
msg.contains("Aztec Rune:"),
"out-of-range arm missing prefix: {msg}"
);
assert!(
msg.contains("must be 0..=255"),
"out-of-range arm missing `must be 0..=255` predicate: {msg}"
);
assert!(
msg.contains("got 256"),
"out-of-range arm missing `got 256` value echo: {msg}"
);
}
other => panic!("`256` should reject as InvalidData, got {other:?}"),
}
match encode_rune("") {
Err(crate::error::Error::InvalidData(msg)) => {
assert!(
msg.contains("Aztec Rune:"),
"empty arm missing `Aztec Rune:` prefix: {msg}"
);
assert!(
msg.contains("1-3 digit integer"),
"empty arm missing `1-3 digit integer` predicate: {msg}"
);
assert!(
msg.contains("(0..=255)"),
"empty arm missing `(0..=255)` range hint: {msg}"
);
assert!(
!msg.contains("non-digit byte"),
"wrong arm — non-digit diagnostic leaked: {msg}"
);
}
other => panic!("empty Aztec Rune should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn encode_high_bit_byte_matches_bwip_js() {
let bm = encode("café".as_bytes()).unwrap();
assert_eq!(bm.width(), 15);
let want: [[u8; 15]; 15] = [
[0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0],
[1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0],
[1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1],
[0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1],
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0],
[0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0],
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1],
[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
[1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1],
[1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0],
];
for (y, want_row) in want.iter().enumerate() {
for (x, &w) in want_row.iter().enumerate() {
let got = u8::from(bm.get(x, y));
assert_eq!(got, w, "mismatch at ({x}, {y})");
}
}
}
#[test]
fn encode_digits_matches_bwip_js_compact() {
let bm = encode(b"12345").unwrap();
assert_eq!(bm.width(), 15);
assert!(bm.get(7, 7), "center module should be set");
}
#[test]
fn build_matrix_full_l1_size() {
let cws = vec![0u32; 21];
let modebits = vec![false; 40];
let m = build_matrix("full", 1, &cws, 6, &modebits);
assert_eq!(m.size, 19);
}
#[test]
fn lmv_layer1_position0_compact() {
assert_eq!(lmv(1, 0, 9), (-5, 6));
}
#[test]
fn lmv_all_four_sides_compact_layer1() {
assert_eq!(lmv(1, 1, 9), (-5, 7), "dir 0, pos 1 (col=0, row=1)");
assert_eq!(lmv(1, 2, 9), (-4, 6), "dir 0, pos 2 (col=1, row=0)");
assert_eq!(lmv(1, 24, 9), (7, 6), "dir 0, pos 24 (col=12, row=0)");
assert_eq!(lmv(1, 25, 9), (7, 7), "dir 0, pos 25 (col=12, row=1)");
assert_eq!(lmv(1, 26, 9), (6, 5), "dir 1, pos 26 (col=0, row=0)");
assert_eq!(lmv(1, 27, 9), (7, 5), "dir 1, pos 27 (col=0, row=1)");
assert_eq!(lmv(1, 28, 9), (6, 4), "dir 1, pos 28 (col=1, row=0)");
assert_eq!(lmv(1, 50, 9), (6, -7), "dir 1, pos 50 (col=12, row=0)");
assert_eq!(lmv(1, 51, 9), (7, -7), "dir 1, pos 51 (col=12, row=1)");
assert_eq!(lmv(1, 52, 9), (5, -6), "dir 2, pos 52 (col=0, row=0)");
assert_eq!(lmv(1, 53, 9), (5, -7), "dir 2, pos 53 (col=0, row=1)");
assert_eq!(lmv(1, 54, 9), (4, -6), "dir 2, pos 54 (col=1, row=0)");
assert_eq!(lmv(1, 78, 9), (-6, -5), "dir 3, pos 78 (col=0, row=0)");
assert_eq!(lmv(1, 79, 9), (-7, -5), "dir 3, pos 79 (col=0, row=1)");
assert_eq!(lmv(1, 80, 9), (-6, -4), "dir 3, pos 80 (col=1, row=0)");
}
#[test]
fn lmv_layer2_and_full_format() {
assert_eq!(lmv(2, 0, 9), (-7, 8), "compact L2 pos=0");
assert_eq!(lmv(1, 0, 12), (-6, 7), "full L1 pos=0");
assert_eq!(lmv(1, 1, 12), (-6, 8), "full L1 pos=1 row=1");
assert_eq!(lmv(1, 2, 12), (-5, 7), "full L1 pos=2 col=1");
}
#[test]
fn cmv_basic() {
assert_eq!(cmv(0, 0, 112, 15), 112);
assert_eq!(cmv(-5, 6, 112, 15), 17);
assert_eq!(cmv(7, -7, 112, 15), 224);
assert_eq!(cmv(-7, 7, 112, 15), 0);
}
#[test]
fn state_constants_are_in_range() {
assert_eq!(STATE_UPPER, 0);
assert_eq!(STATE_BYTE, 5);
let s = [
STATE_UPPER,
STATE_LOWER,
STATE_MIXED,
STATE_PUNCT,
STATE_DIGIT,
STATE_BYTE,
];
let mut sorted = s;
sorted.sort_unstable();
for i in 1..sorted.len() {
assert!(sorted[i] > sorted[i - 1], "duplicate state {}", sorted[i]);
}
}
#[test]
fn per_state_codeword_lookups() {
assert_eq!(upper_codeword(b' '), Some(UPPER_SPACE));
assert_eq!(upper_codeword(b'A'), Some(2));
assert_eq!(upper_codeword(b'Z'), Some(27));
assert_eq!(upper_codeword(b'a'), None);
assert_eq!(upper_codeword(b'0'), None);
assert_eq!(lower_codeword(b' '), Some(1));
assert_eq!(lower_codeword(b'a'), Some(2));
assert_eq!(lower_codeword(b'z'), Some(27));
assert_eq!(lower_codeword(b'A'), None);
assert_eq!(lower_codeword(b'0'), None);
assert_eq!(digit_codeword(b' '), Some(1));
assert_eq!(digit_codeword(b'0'), Some(2));
assert_eq!(digit_codeword(b'9'), Some(11));
assert_eq!(digit_codeword(b','), Some(12));
assert_eq!(digit_codeword(b'.'), Some(13));
assert_eq!(digit_codeword(b'a'), None);
assert_eq!(digit_codeword(b'A'), None);
}
#[test]
fn preferred_state_per_byte_class() {
assert_eq!(preferred_state(b' '), STATE_UPPER);
assert_eq!(preferred_state(b'A'), STATE_UPPER);
assert_eq!(preferred_state(b'M'), STATE_UPPER);
assert_eq!(preferred_state(b'Z'), STATE_UPPER);
assert_eq!(preferred_state(b'a'), STATE_LOWER);
assert_eq!(preferred_state(b'm'), STATE_LOWER);
assert_eq!(preferred_state(b'z'), STATE_LOWER);
assert_eq!(preferred_state(b'0'), STATE_DIGIT);
assert_eq!(preferred_state(b'9'), STATE_DIGIT);
assert_eq!(preferred_state(b','), STATE_DIGIT);
assert_eq!(preferred_state(b'.'), STATE_DIGIT);
assert_eq!(preferred_state(13), STATE_PUNCT, "CR → punct");
assert_eq!(preferred_state(b'!'), STATE_PUNCT);
assert_eq!(preferred_state(b'?'), STATE_PUNCT);
assert_eq!(preferred_state(b'/'), STATE_PUNCT);
assert_eq!(preferred_state(b'{'), STATE_PUNCT);
assert_eq!(preferred_state(b'}'), STATE_PUNCT);
assert_eq!(preferred_state(1), STATE_MIXED);
assert_eq!(preferred_state(7), STATE_MIXED);
assert_eq!(preferred_state(b'@'), STATE_MIXED);
assert_eq!(preferred_state(b'\\'), STATE_MIXED);
assert_eq!(preferred_state(b'^'), STATE_MIXED);
assert_eq!(preferred_state(b'_'), STATE_MIXED);
assert_eq!(preferred_state(127), STATE_MIXED);
assert_eq!(preferred_state(128), STATE_BYTE);
assert_eq!(preferred_state(200), STATE_BYTE);
assert_eq!(preferred_state(255), STATE_BYTE);
}
#[test]
fn preferred_state_range_boundaries_and_unlisted_bytes() {
assert_eq!(preferred_state(12), STATE_MIXED, "byte 12 (end of 1..=12)");
assert_eq!(
preferred_state(14),
STATE_MIXED,
"byte 14 (start of 14..=26)"
);
assert_eq!(preferred_state(26), STATE_MIXED, "byte 26 (end of 14..=26)");
assert_eq!(
preferred_state(27),
STATE_MIXED,
"byte 27 (start of 27..=31)"
);
assert_eq!(preferred_state(31), STATE_MIXED, "byte 31 (end of 27..=31)");
assert_eq!(preferred_state(b'`'), STATE_MIXED, "backtick → MIXED");
assert_eq!(preferred_state(b'|'), STATE_MIXED, "pipe → MIXED");
assert_eq!(preferred_state(b'~'), STATE_MIXED, "tilde → MIXED");
assert_eq!(preferred_state(b'"'), STATE_PUNCT, "dquote → PUNCT");
assert_eq!(preferred_state(b'#'), STATE_PUNCT);
assert_eq!(preferred_state(b'$'), STATE_PUNCT);
assert_eq!(preferred_state(b'%'), STATE_PUNCT);
assert_eq!(preferred_state(b'&'), STATE_PUNCT);
assert_eq!(preferred_state(b'\''), STATE_PUNCT, "squote → PUNCT");
assert_eq!(preferred_state(b'('), STATE_PUNCT);
assert_eq!(preferred_state(b')'), STATE_PUNCT);
assert_eq!(preferred_state(b'*'), STATE_PUNCT);
assert_eq!(preferred_state(b'+'), STATE_PUNCT);
assert_eq!(preferred_state(b'-'), STATE_PUNCT);
assert_eq!(preferred_state(b':'), STATE_PUNCT);
assert_eq!(preferred_state(b';'), STATE_PUNCT);
assert_eq!(preferred_state(b'<'), STATE_PUNCT);
assert_eq!(preferred_state(b'='), STATE_PUNCT);
assert_eq!(preferred_state(b'>'), STATE_PUNCT);
assert_eq!(preferred_state(b'['), STATE_PUNCT);
assert_eq!(preferred_state(b']'), STATE_PUNCT);
assert_eq!(
preferred_state(0),
STATE_BYTE,
"NUL (0) falls through to default BYTE"
);
}
#[test]
fn latch_codeword_per_state_pair() {
assert_eq!(latch_codeword(STATE_UPPER, STATE_LOWER), Some(UPPER_LL));
assert_eq!(latch_codeword(STATE_UPPER, STATE_MIXED), Some(UPPER_LM));
assert_eq!(latch_codeword(STATE_UPPER, STATE_DIGIT), Some(UPPER_LD));
assert_eq!(latch_codeword(STATE_LOWER, STATE_MIXED), Some(29));
assert_eq!(latch_codeword(STATE_LOWER, STATE_DIGIT), Some(30));
assert_eq!(latch_codeword(STATE_MIXED, STATE_LOWER), Some(28));
assert_eq!(latch_codeword(STATE_MIXED, STATE_UPPER), Some(29));
assert_eq!(latch_codeword(STATE_MIXED, STATE_PUNCT), Some(30));
assert_eq!(latch_codeword(STATE_PUNCT, STATE_UPPER), Some(31));
assert_eq!(latch_codeword(STATE_DIGIT, STATE_UPPER), Some(14));
assert_eq!(latch_codeword(STATE_UPPER, STATE_PUNCT), None);
assert_eq!(latch_codeword(STATE_UPPER, STATE_UPPER), None);
assert_eq!(latch_codeword(STATE_DIGIT, STATE_PUNCT), None);
assert_eq!(latch_codeword(STATE_PUNCT, STATE_LOWER), None);
assert_eq!(latch_codeword(STATE_BYTE, STATE_UPPER), None);
}
#[test]
fn sentinel_codeword_all_arms() {
assert_eq!(sentinel_codeword(STATE_UPPER, LATCH_LOWER), Some(28));
assert_eq!(sentinel_codeword(STATE_UPPER, LATCH_MIXED), Some(29));
assert_eq!(sentinel_codeword(STATE_UPPER, LATCH_DIGIT), Some(30));
assert_eq!(sentinel_codeword(STATE_UPPER, SHIFT_BYTE), Some(31));
assert_eq!(sentinel_codeword(STATE_UPPER, SHIFT_PUNCT), Some(0));
assert_eq!(sentinel_codeword(STATE_LOWER, SHIFT_UPPER), Some(28));
assert_eq!(sentinel_codeword(STATE_LOWER, LATCH_MIXED), Some(29));
assert_eq!(sentinel_codeword(STATE_LOWER, LATCH_DIGIT), Some(30));
assert_eq!(sentinel_codeword(STATE_LOWER, SHIFT_BYTE), Some(31));
assert_eq!(sentinel_codeword(STATE_LOWER, SHIFT_PUNCT), Some(0));
assert_eq!(sentinel_codeword(STATE_MIXED, LATCH_LOWER), Some(28));
assert_eq!(sentinel_codeword(STATE_MIXED, LATCH_UPPER), Some(29));
assert_eq!(sentinel_codeword(STATE_MIXED, LATCH_PUNCT), Some(30));
assert_eq!(sentinel_codeword(STATE_MIXED, SHIFT_BYTE), Some(31));
assert_eq!(sentinel_codeword(STATE_MIXED, SHIFT_PUNCT), Some(0));
assert_eq!(sentinel_codeword(STATE_PUNCT, LATCH_UPPER), Some(31));
assert_eq!(sentinel_codeword(STATE_PUNCT, FLG_NEXT), Some(0));
assert_eq!(sentinel_codeword(STATE_PUNCT, PAIR_2), Some(2));
assert_eq!(sentinel_codeword(STATE_PUNCT, PAIR_3), Some(3));
assert_eq!(sentinel_codeword(STATE_PUNCT, PAIR_4), Some(4));
assert_eq!(sentinel_codeword(STATE_PUNCT, PAIR_5), Some(5));
assert_eq!(sentinel_codeword(STATE_DIGIT, LATCH_UPPER), Some(14));
assert_eq!(sentinel_codeword(STATE_DIGIT, SHIFT_UPPER), Some(15));
assert_eq!(sentinel_codeword(STATE_DIGIT, SHIFT_PUNCT), Some(0));
assert_eq!(
sentinel_codeword(STATE_UPPER, LATCH_UPPER),
None,
"UPPER + LATCH_UPPER is a self-transition (no codeword)"
);
assert_eq!(
sentinel_codeword(STATE_DIGIT, LATCH_LOWER),
None,
"DIGIT has no direct LATCH_LOWER"
);
assert_eq!(
sentinel_codeword(STATE_PUNCT, LATCH_LOWER),
None,
"PUNCT can only return to UPPER (via 31)"
);
assert_eq!(sentinel_codeword(STATE_BYTE, LATCH_UPPER), None);
assert_eq!(sentinel_codeword(STATE_UPPER, 65), None);
assert_eq!(sentinel_codeword(STATE_UPPER, 0), None);
}
#[test]
fn charsize_state_widths_and_sentinel_inf() {
assert_eq!(charsize(STATE_UPPER, b'A' as i32), 5);
assert_eq!(charsize(STATE_LOWER, b'a' as i32), 5);
assert_eq!(charsize(STATE_MIXED, b'@' as i32), 5);
assert_eq!(charsize(STATE_PUNCT, 13), 5);
assert_eq!(charsize(STATE_DIGIT, b'0' as i32), 4, "digit width is 4");
assert_eq!(charsize(STATE_BYTE, 0xFF), 8, "byte mode width is 8");
assert_eq!(
charsize(STATE_UPPER, 0),
5,
"ch=0 hits the >= 0 branch (CHAR_SIZES[state])"
);
assert_eq!(
charsize(STATE_DIGIT, 0),
4,
"ch=0 in DIGIT must use the 4-bit width"
);
assert_eq!(charsize(STATE_UPPER, LATCH_LOWER), u16::MAX);
assert_eq!(charsize(STATE_DIGIT, SHIFT_UPPER), u16::MAX);
assert_eq!(charsize(STATE_BYTE, -1), u16::MAX);
assert_eq!(charsize(STATE_LOWER, FLG_NEXT), u16::MAX);
}
#[test]
fn latch_target_per_sentinel() {
assert_eq!(latch_target(LATCH_UPPER), Some(STATE_UPPER));
assert_eq!(latch_target(LATCH_LOWER), Some(STATE_LOWER));
assert_eq!(latch_target(LATCH_MIXED), Some(STATE_MIXED));
assert_eq!(latch_target(LATCH_PUNCT), Some(STATE_PUNCT));
assert_eq!(latch_target(LATCH_DIGIT), Some(STATE_DIGIT));
assert_eq!(latch_target(SHIFT_UPPER), None);
assert_eq!(latch_target(SHIFT_PUNCT), None);
assert_eq!(latch_target(SHIFT_BYTE), None);
assert_eq!(latch_target(FLG_NEXT), None);
assert_eq!(latch_target(PAIR_2), None);
assert_eq!(latch_target(PAIR_3), None);
assert_eq!(latch_target(PAIR_4), None);
assert_eq!(latch_target(PAIR_5), None);
assert_eq!(latch_target(0), None);
assert_eq!(latch_target(65), None);
assert_eq!(latch_target(-100), None);
}
#[test]
fn pair_sentinel_pre_compression_arms() {
assert_eq!(pair_sentinel(0x0D, 0x0A), Some(PAIR_2), "CR-LF → PAIR_2");
assert_eq!(pair_sentinel(b'.', b' '), Some(PAIR_3));
assert_eq!(pair_sentinel(b',', b' '), Some(PAIR_4));
assert_eq!(pair_sentinel(b':', b' '), Some(PAIR_5));
assert_eq!(pair_sentinel(b' ', 0x0A), None, "space-LF not a pair");
assert_eq!(pair_sentinel(b'.', b'.'), None, "double-dot not a pair");
assert_eq!(pair_sentinel(b'A', b'B'), None);
assert_eq!(pair_sentinel(0, 0), None);
assert_eq!(pair_sentinel(0x0A, 0x0D), None, "reversed CR-LF not a pair");
assert!(is_pair_sentinel(PAIR_2));
assert!(is_pair_sentinel(PAIR_3));
assert!(is_pair_sentinel(PAIR_4));
assert!(is_pair_sentinel(PAIR_5));
assert!(!is_pair_sentinel(-15), "-15 (below PAIR_5) not a sentinel");
assert!(!is_pair_sentinel(-10), "-10 (above PAIR_2) not a sentinel");
assert!(!is_pair_sentinel(0));
assert!(!is_pair_sentinel(1));
assert!(!is_pair_sentinel(100));
}
#[test]
fn encode_byte_in_state_routes_to_state_encoders() {
assert_eq!(
encode_byte_in_state(STATE_UPPER, b'A'),
upper_codeword(b'A'),
"STATE_UPPER must dispatch to upper_codeword"
);
assert_eq!(
encode_byte_in_state(STATE_LOWER, b'a'),
lower_codeword(b'a'),
"STATE_LOWER must dispatch to lower_codeword"
);
assert_eq!(
encode_byte_in_state(STATE_DIGIT, b'0'),
digit_codeword(b'0'),
"STATE_DIGIT must dispatch to digit_codeword"
);
assert_eq!(
encode_byte_in_state(STATE_PUNCT, 13),
punct_codeword(13),
"STATE_PUNCT must dispatch to punct_codeword"
);
assert_eq!(
encode_byte_in_state(STATE_MIXED, b'@'),
mixed_codeword(b'@'),
"STATE_MIXED must dispatch to mixed_codeword"
);
assert_eq!(
encode_byte_in_state(STATE_UPPER, b'a'),
None,
"'a' (lowercase) not in UPPER alphabet"
);
assert_eq!(
encode_byte_in_state(STATE_DIGIT, b'A'),
None,
"'A' (uppercase) not in DIGIT alphabet"
);
assert_eq!(encode_byte_in_state(STATE_BYTE, b'A'), None);
assert_eq!(encode_byte_in_state(STATE_BYTE, 0xFF), None);
assert_eq!(encode_byte_in_state(99, b'A'), None);
assert_eq!(encode_byte_in_state(255, b'A'), None);
}
#[test]
fn append_codeword_msb_first_bit_layout() {
let mut bits: Vec<bool> = Vec::new();
append_codeword(&mut bits, 0b1010, 4);
assert_eq!(bits, vec![true, false, true, false]);
bits.clear();
append_codeword(&mut bits, 0b11111, 5);
assert_eq!(bits, vec![true; 5]);
bits.clear();
append_codeword(&mut bits, 0, 5);
assert_eq!(bits, vec![false; 5]);
bits.clear();
append_codeword(&mut bits, 0b10000, 5);
assert_eq!(bits, vec![true, false, false, false, false]);
bits.clear();
append_codeword(&mut bits, 0b101, 3);
append_codeword(&mut bits, 0b010, 3);
assert_eq!(
bits,
vec![true, false, true, false, true, false],
"two appends concatenate bit-stream order"
);
}
#[test]
fn upper_codeword_per_arm_with_boundary_anchors() {
assert_eq!(
upper_codeword(b' '),
Some(super::UPPER_SPACE),
"space → UPPER_SPACE (= 1)"
);
assert_eq!(upper_codeword(b' '), Some(1), "space → 1 (numeric)");
assert_eq!(upper_codeword(b'A'), Some(2), "'A' → 2 ('A'-'A'+2)");
assert_eq!(
upper_codeword(b'Z'),
Some(27),
"'Z' → 27 ('Z'-'A'+2 = 25+2)"
);
assert_eq!(upper_codeword(b'M'), Some(14), "'M' → 14 ('M'-'A'+2)");
assert_eq!(
upper_codeword(b'@'),
None,
"'@' (64) is one below 'A'; must be None"
);
assert_eq!(
upper_codeword(b'['),
None,
"'[' (91) is one above 'Z'; must be None"
);
assert_eq!(upper_codeword(b'a'), None, "'a' lowercase: None");
assert_eq!(upper_codeword(b'z'), None, "'z' lowercase: None");
assert_eq!(upper_codeword(b'm'), None, "'m' mid lowercase: None");
assert_eq!(upper_codeword(b'0'), None, "'0': digit → None");
assert_eq!(upper_codeword(b'9'), None, "'9': digit → None");
assert_eq!(upper_codeword(b'!'), None, "'!': punct → None");
assert_eq!(upper_codeword(0), None, "NUL: None");
assert_eq!(upper_codeword(0x7F), None, "DEL: None");
assert_eq!(upper_codeword(0xFF), None, "0xFF: None");
use std::collections::HashSet;
let mut codewords: HashSet<u8> = HashSet::new();
codewords.insert(upper_codeword(b' ').unwrap());
for c in b'A'..=b'Z' {
let cw =
upper_codeword(c).unwrap_or_else(|| panic!("'{}' must be accepted", c as char));
assert!(
codewords.insert(cw),
"char '{}' produced duplicate codeword {cw}",
c as char
);
}
assert_eq!(
codewords.len(),
27,
"exactly 27 accepted bytes (space + A-Z)"
);
for &cw in &codewords {
assert!((1..=27).contains(&cw), "codeword {cw} must be in 1..=27");
}
for c in b'A'..b'Z' {
let cw = upper_codeword(c).unwrap();
let cw_next = upper_codeword(c + 1).unwrap();
assert_eq!(
cw_next,
cw + 1,
"'{}' → {cw}, '{}' → must be {cw}+1 = {}",
c as char,
(c + 1) as char,
cw + 1
);
}
}
#[test]
fn digit_codeword_per_arm_with_punct_discriminators() {
assert_eq!(digit_codeword(b' '), Some(1), "space → 1");
assert_eq!(digit_codeword(b'0'), Some(2), "'0' → 2 ('0'-'0'+2)");
assert_eq!(digit_codeword(b'5'), Some(7), "'5' → 7 ('5'-'0'+2)");
assert_eq!(digit_codeword(b'9'), Some(11), "'9' → 11 ('9'-'0'+2)");
assert_eq!(
digit_codeword(b','),
Some(12),
"',' → 12 (NOT 13 — own arm)"
);
assert_eq!(
digit_codeword(b'.'),
Some(13),
"'.' → 13 (NOT 12 — own arm)"
);
assert_eq!(
digit_codeword(b'/'),
None,
"'/' (47) just below '0' (48); must be None"
);
assert_eq!(
digit_codeword(b':'),
None,
"':' (58) just above '9' (57); must be None"
);
assert_eq!(
digit_codeword(b'-'),
None,
"'-' (45) is between ',' and '.'; must be None"
);
assert_eq!(
digit_codeword(b'+'),
None,
"'+' (43) is one below ','; must be None"
);
assert_eq!(digit_codeword(b'A'), None, "'A': None");
assert_eq!(digit_codeword(b'Z'), None, "'Z': None");
assert_eq!(digit_codeword(b'a'), None, "'a' lowercase: None");
assert_eq!(digit_codeword(b'!'), None, "'!': None");
assert_eq!(digit_codeword(0), None, "NUL: None");
assert_eq!(digit_codeword(0x7F), None, "DEL: None");
assert_eq!(digit_codeword(0xFF), None, "0xFF: None");
use std::collections::HashSet;
let mut seen: HashSet<u8> = HashSet::new();
for &b in b" ,." {
let cw = digit_codeword(b).unwrap();
assert!(seen.insert(cw), "duplicate codeword {cw} for {b:?}");
}
for c in b'0'..=b'9' {
let cw = digit_codeword(c).unwrap();
assert!(
seen.insert(cw),
"duplicate codeword {cw} for digit {}",
c as char
);
}
assert_eq!(seen.len(), 13, "exactly 13 accepted bytes");
for &cw in &seen {
assert!((1..=13).contains(&cw), "codeword {cw} must be in 1..=13");
}
for c in b'0'..b'9' {
let cw = digit_codeword(c).unwrap();
let cw_next = digit_codeword(c + 1).unwrap();
assert_eq!(
cw_next,
cw + 1,
"'{}' → {cw}, '{}' → must be {}",
c as char,
(c + 1) as char,
cw + 1
);
}
}
#[test]
fn lower_codeword_per_arm_with_boundary_anchors() {
assert_eq!(lower_codeword(b' '), Some(1), "space → 1");
assert_eq!(lower_codeword(b'a'), Some(2), "'a' → 2 ('a'-'a'+2)");
assert_eq!(lower_codeword(b'm'), Some(14), "'m' → 14 (mid)");
assert_eq!(
lower_codeword(b'z'),
Some(27),
"'z' → 27 ('z'-'a'+2 = 25+2)"
);
assert_eq!(
lower_codeword(b'`'),
None,
"'`' (96) is one below 'a' (97); must be None"
);
assert_eq!(
lower_codeword(b'{'),
None,
"'{{' (123) is one above 'z' (122); must be None"
);
assert_eq!(lower_codeword(b'A'), None, "'A' uppercase: None");
assert_eq!(lower_codeword(b'M'), None, "'M' uppercase: None");
assert_eq!(lower_codeword(b'Z'), None, "'Z' uppercase: None");
assert_eq!(lower_codeword(b'0'), None, "'0' digit: None");
assert_eq!(lower_codeword(b'9'), None, "'9' digit: None");
assert_eq!(lower_codeword(b'!'), None, "'!' punct: None");
assert_eq!(lower_codeword(0), None, "NUL: None");
assert_eq!(lower_codeword(0x7F), None, "DEL: None");
assert_eq!(lower_codeword(0xFF), None, "0xFF: None");
use std::collections::HashSet;
let mut codewords: HashSet<u8> = HashSet::new();
codewords.insert(lower_codeword(b' ').unwrap());
for c in b'a'..=b'z' {
let cw =
lower_codeword(c).unwrap_or_else(|| panic!("'{}' must be accepted", c as char));
assert!(
codewords.insert(cw),
"char '{}' produced duplicate codeword {cw}",
c as char
);
}
assert_eq!(codewords.len(), 27, "exactly 27 accepted bytes");
for &cw in &codewords {
assert!((1..=27).contains(&cw), "codeword {cw} must be in 1..=27");
}
assert_eq!(
lower_codeword(b'a'),
upper_codeword(b'A'),
"'a' (lower) and 'A' (upper) both map to codeword 2"
);
assert_eq!(
lower_codeword(b'z'),
upper_codeword(b'Z'),
"'z' and 'Z' both map to codeword 27"
);
for c in b'a'..b'z' {
let cw = lower_codeword(c).unwrap();
let cw_next = lower_codeword(c + 1).unwrap();
assert_eq!(
cw_next,
cw + 1,
"'{}' → {cw}, '{}' → must be {}",
c as char,
(c + 1) as char,
cw + 1
);
}
}
#[test]
fn mixed_codeword_per_arm_with_gap_discriminators() {
assert_eq!(mixed_codeword(b' '), Some(1), "space → 1");
assert_eq!(mixed_codeword(1), Some(2), "^A (1) → 2 (byte+1)");
assert_eq!(mixed_codeword(7), Some(8), "^G (7) → 8");
assert_eq!(mixed_codeword(13), Some(14), "^M / CR (13) → 14");
assert_eq!(mixed_codeword(0), None, "NUL (0) is below 1..=13; None");
assert_eq!(mixed_codeword(14), None, "14 (^N) is in the gap; None");
assert_eq!(mixed_codeword(20), None, "20 (^T) is in the gap; None");
assert_eq!(mixed_codeword(26), None, "26 (^Z) is in the gap; None");
assert_eq!(mixed_codeword(27), Some(15), "ESC (27) → 15");
assert_eq!(mixed_codeword(28), Some(16), "28 → 16 (byte-12)");
assert_eq!(mixed_codeword(30), Some(18), "30 → 18");
assert_eq!(mixed_codeword(31), Some(19), "31 → 19");
assert_eq!(mixed_codeword(b'@'), Some(20), "'@' → 20");
assert_eq!(mixed_codeword(b'\\'), Some(21), "'\\\\' → 21");
assert_eq!(mixed_codeword(b'^'), Some(22), "'^' → 22");
assert_eq!(mixed_codeword(b'_'), Some(23), "'_' → 23");
assert_eq!(mixed_codeword(b'`'), Some(24), "'`' → 24");
assert_eq!(mixed_codeword(b'|'), Some(25), "'|' → 25");
assert_eq!(mixed_codeword(b'~'), Some(26), "'~' → 26");
assert_eq!(mixed_codeword(127), Some(27), "DEL (127) → 27");
assert_eq!(
mixed_codeword(b'['),
None,
"'[' is in special-punct gap; None"
);
assert_eq!(
mixed_codeword(b']'),
None,
"']' is in special-punct gap; None"
);
assert_eq!(
mixed_codeword(b'}'),
None,
"'}}' is in special-punct gap; None"
);
assert_eq!(mixed_codeword(b'A'), None, "'A' (65) regular: None");
assert_eq!(mixed_codeword(b'Z'), None, "'Z' (90) regular: None");
assert_eq!(mixed_codeword(b'a'), None, "'a' regular: None");
assert_eq!(mixed_codeword(b'0'), None, "'0' regular: None");
assert_eq!(mixed_codeword(b'!'), None, "'!' regular: None");
assert_eq!(mixed_codeword(0xFF), None, "0xFF beyond range: None");
use std::collections::HashSet;
let mut codewords: HashSet<u8> = HashSet::new();
codewords.insert(mixed_codeword(b' ').unwrap()); for b in 1u8..=13 {
codewords.insert(mixed_codeword(b).unwrap()); }
codewords.insert(mixed_codeword(27).unwrap()); for b in 28u8..=31 {
codewords.insert(mixed_codeword(b).unwrap()); }
for &b in b"@\\^_`|~" {
codewords.insert(mixed_codeword(b).unwrap()); }
codewords.insert(mixed_codeword(127).unwrap()); assert_eq!(
codewords.len(),
27,
"exactly 27 accepted bytes → 27 unique codewords"
);
for &cw in &codewords {
assert!((1..=27).contains(&cw), "codeword {cw} must be in 1..=27");
}
}
#[test]
fn punct_codeword_full_arm_set_with_gap_discriminators() {
assert_eq!(punct_codeword(13), Some(1), "CR (13) → 1");
assert_eq!(punct_codeword(b'!'), Some(6), "'!' → 6");
assert_eq!(punct_codeword(b'#'), Some(8), "'#' → 8");
assert_eq!(
punct_codeword(b'/'),
Some(20),
"'/' → 20 (end of first run)"
);
assert_eq!(punct_codeword(b':'), Some(21), "':' → 21 (after gap)");
assert_eq!(punct_codeword(b'='), Some(24), "'=' → 24");
assert_eq!(
punct_codeword(b'?'),
Some(26),
"'?' → 26 (end of second run)"
);
assert_eq!(punct_codeword(b'['), Some(27), "'[' → 27");
assert_eq!(punct_codeword(b']'), Some(28), "']' → 28");
assert_eq!(
punct_codeword(b'\\'),
None,
"'\\\\' (92) between '[' and ']'; must be None (it's in mixed_codeword)"
);
assert_eq!(punct_codeword(b'{'), Some(29), "'{{' → 29");
assert_eq!(punct_codeword(b'}'), Some(30), "'}}' → 30");
assert_eq!(
punct_codeword(b'|'),
None,
"'|' (124) between '{{' and '}}'; must be None"
);
assert_eq!(punct_codeword(0), None, "NUL: None");
assert_eq!(punct_codeword(1), None, "^A: None");
assert_eq!(punct_codeword(12), None, "^L (12): None");
assert_eq!(punct_codeword(14), None, "14 (^N): None");
assert_eq!(punct_codeword(b' '), None, "space (32): None");
assert_eq!(punct_codeword(b'0'), None, "'0' (48): None");
assert_eq!(punct_codeword(b'9'), None, "'9' (57): None");
assert_eq!(punct_codeword(b'A'), None, "'A' (65): None");
assert_eq!(punct_codeword(b'Z'), None, "'Z' (90): None");
assert_eq!(punct_codeword(b'@'), None, "'@' (64): None (mixed only)");
assert_eq!(punct_codeword(b'^'), None, "'^' (94): None");
assert_eq!(punct_codeword(b'_'), None, "'_' (95): None");
assert_eq!(punct_codeword(b'`'), None, "'`' (96): None");
assert_eq!(punct_codeword(b'a'), None, "'a': None");
assert_eq!(punct_codeword(b'z'), None, "'z': None");
assert_eq!(punct_codeword(b'~'), None, "'~' (126): None");
assert_eq!(punct_codeword(127), None, "DEL: None");
assert_eq!(punct_codeword(0xFF), None, "0xFF: None");
use std::collections::HashSet;
let mut codewords: HashSet<u8> = HashSet::new();
codewords.insert(punct_codeword(13).unwrap()); for c in b'!'..=b'/' {
codewords.insert(punct_codeword(c).unwrap()); }
for c in b':'..=b'?' {
codewords.insert(punct_codeword(c).unwrap()); }
for &c in b"[]{}" {
codewords.insert(punct_codeword(c).unwrap()); }
assert_eq!(codewords.len(), 26, "exactly 26 accepted bytes");
for &cw in &codewords {
assert!(
cw == 1 || (6..=30).contains(&cw),
"codeword {cw} must be 1 or in 6..=30"
);
}
for c in b'!'..b'/' {
let cw = punct_codeword(c).unwrap();
let cw_next = punct_codeword(c + 1).unwrap();
assert_eq!(
cw_next,
cw + 1,
"first run monotonic: '{}' → {cw}, '{}' → {}",
c as char,
(c + 1) as char,
cw + 1
);
}
for c in b':'..b'?' {
let cw = punct_codeword(c).unwrap();
let cw_next = punct_codeword(c + 1).unwrap();
assert_eq!(
cw_next,
cw + 1,
"second run monotonic: '{}' → {cw}, '{}' → {}",
c as char,
(c + 1) as char,
cw + 1
);
}
}
#[test]
fn encode_byte_in_state_dispatches_per_state_arm() {
assert_eq!(
encode_byte_in_state(STATE_UPPER, b'A'),
Some(2),
"STATE_UPPER + 'A' → 2 (upper_codeword route)"
);
assert_eq!(
encode_byte_in_state(STATE_LOWER, b'A'),
None,
"Lower rejects 'A'"
);
assert_eq!(
encode_byte_in_state(STATE_MIXED, b'A'),
None,
"Mixed rejects 'A'"
);
assert_eq!(
encode_byte_in_state(STATE_PUNCT, b'A'),
None,
"Punct rejects 'A'"
);
assert_eq!(
encode_byte_in_state(STATE_DIGIT, b'A'),
None,
"Digit rejects 'A'"
);
assert_eq!(
encode_byte_in_state(STATE_LOWER, b'a'),
Some(2),
"STATE_LOWER + 'a' → 2 (lower_codeword route)"
);
assert_eq!(
encode_byte_in_state(STATE_UPPER, b'a'),
None,
"Upper rejects 'a'"
);
assert_eq!(
encode_byte_in_state(STATE_DIGIT, b'0'),
Some(2),
"STATE_DIGIT + '0' → 2 (digit_codeword route)"
);
assert_eq!(
encode_byte_in_state(STATE_UPPER, b'0'),
None,
"Upper rejects '0'"
);
assert_eq!(
encode_byte_in_state(STATE_LOWER, b'0'),
None,
"Lower rejects '0'"
);
assert_eq!(
encode_byte_in_state(STATE_MIXED, 27),
Some(15),
"STATE_MIXED + ESC(27) → 15 (mixed_codeword route)"
);
assert_eq!(
encode_byte_in_state(STATE_UPPER, 27),
None,
"Upper rejects ESC"
);
assert_eq!(
encode_byte_in_state(STATE_PUNCT, 27),
None,
"Punct rejects ESC"
);
assert_eq!(
encode_byte_in_state(STATE_PUNCT, b'?'),
Some(26),
"STATE_PUNCT + '?' → 26 (punct_codeword route)"
);
assert_eq!(
encode_byte_in_state(STATE_UPPER, b'?'),
None,
"Upper rejects '?'"
);
assert_eq!(
encode_byte_in_state(STATE_MIXED, b'?'),
None,
"Mixed rejects '?'"
);
assert_eq!(
encode_byte_in_state(STATE_BYTE, b'A'),
None,
"STATE_BYTE has no per-byte lookup; always None"
);
assert_eq!(
encode_byte_in_state(STATE_BYTE, b' '),
None,
"STATE_BYTE + space: None"
);
assert_eq!(
encode_byte_in_state(99, b'A'),
None,
"unknown state → None (default arm)"
);
assert_eq!(
encode_byte_in_state(u8::MAX, b'A'),
None,
"u8::MAX state → None"
);
for &state in &[STATE_UPPER, STATE_LOWER, STATE_MIXED, STATE_DIGIT] {
assert_eq!(
encode_byte_in_state(state, b' '),
Some(1),
"state {state} accepts space with codeword 1"
);
}
assert_eq!(
encode_byte_in_state(STATE_PUNCT, b' '),
None,
"STATE_PUNCT does NOT accept space (CR is its '1')"
);
assert_eq!(encode_byte_in_state(STATE_MIXED, b'@'), Some(20));
assert_eq!(encode_byte_in_state(STATE_UPPER, b'@'), None);
assert_eq!(encode_byte_in_state(STATE_LOWER, b'@'), None);
assert_eq!(encode_byte_in_state(STATE_PUNCT, b'@'), None);
assert_eq!(encode_byte_in_state(STATE_DIGIT, b'@'), None);
}
#[test]
fn pair_sentinel_4_arms_and_is_pair_sentinel_round_trip() {
assert_eq!(pair_sentinel(0x0D, 0x0A), Some(PAIR_2), "CR LF → PAIR_2");
assert_eq!(pair_sentinel(b'.', b' '), Some(PAIR_3), "'. ' → PAIR_3");
assert_eq!(pair_sentinel(b',', b' '), Some(PAIR_4), "', ' → PAIR_4");
assert_eq!(pair_sentinel(b':', b' '), Some(PAIR_5), "': ' → PAIR_5");
assert_eq!(
pair_sentinel(0x0A, 0x0D),
None,
"LF CR (swapped) must be None"
);
assert_eq!(
pair_sentinel(b' ', b'.'),
None,
"' .' (swapped) must be None"
);
assert_eq!(
pair_sentinel(b' ', b','),
None,
"' ,' (swapped) must be None"
);
assert_eq!(
pair_sentinel(b' ', b':'),
None,
"' :' (swapped) must be None"
);
let p2 = pair_sentinel(0x0D, 0x0A).unwrap();
let p3 = pair_sentinel(b'.', b' ').unwrap();
let p4 = pair_sentinel(b',', b' ').unwrap();
let p5 = pair_sentinel(b':', b' ').unwrap();
assert_ne!(p2, p3);
assert_ne!(p2, p4);
assert_ne!(p2, p5);
assert_ne!(p3, p4);
assert_ne!(p3, p5);
assert_ne!(p4, p5);
assert_eq!(p2, -11, "PAIR_2 = -11");
assert_eq!(p3, -12, "PAIR_3 = -12");
assert_eq!(p4, -13, "PAIR_4 = -13");
assert_eq!(p5, -14, "PAIR_5 = -14");
assert_eq!(pair_sentinel(b'A', b' '), None, "letter + space: None");
assert_eq!(pair_sentinel(b'.', b'.'), None, "'.' '.': None");
assert_eq!(
pair_sentinel(b'.', b'a'),
None,
"'. ' but 'a' not ' ': None"
);
assert_eq!(pair_sentinel(0, 0), None, "NUL NUL: None");
assert_eq!(pair_sentinel(0x0D, b' '), None, "CR ' ': None");
assert_eq!(pair_sentinel(b' ', b' '), None, "space space: None");
for &p in &[PAIR_2, PAIR_3, PAIR_4, PAIR_5] {
assert!(is_pair_sentinel(p), "is_pair_sentinel({p}) must be true");
}
assert!(!is_pair_sentinel(-10), "-10 just above PAIR_2: false");
assert!(!is_pair_sentinel(-15), "-15 just below PAIR_5: false");
assert!(!is_pair_sentinel(0));
assert!(!is_pair_sentinel(1));
assert!(!is_pair_sentinel(100));
assert!(!is_pair_sentinel(-100));
assert!(!is_pair_sentinel(i32::MAX));
assert!(!is_pair_sentinel(i32::MIN));
use std::collections::HashSet;
let mut seen: HashSet<i32> = HashSet::new();
for (last, cur) in &[(0x0D, 0x0A), (b'.', b' '), (b',', b' '), (b':', b' ')] {
let s = pair_sentinel((*last), *cur).unwrap();
assert!(
seen.insert(s),
"duplicate sentinel {s} for ({last}, {cur:?})"
);
}
assert_eq!(seen.len(), 4, "exactly 4 distinct sentinels");
}
#[test]
fn latch_mixed_and_punct_constants_pinned() {
assert_eq!(LATCH_MIXED, -4);
assert_eq!(LATCH_PUNCT, -5);
}
#[test]
fn modemap_full_signs_and_positions_pinned() {
let mut sxy: i64 = 0;
let mut neg_x = 0usize;
let mut neg_y = 0usize;
let mut pos_x = 0usize;
let mut pos_y = 0usize;
let mut wfp: u64 = 0;
for (i, &(x, y)) in MODEMAP_FULL.iter().enumerate() {
sxy += (x as i64) * (y as i64);
if x < 0 {
neg_x += 1;
} else if x > 0 {
pos_x += 1;
}
if y < 0 {
neg_y += 1;
} else if y > 0 {
pos_y += 1;
}
let packed = ((x as i64 as u64) & 0xFFFF) | (((y as i64 as u64) & 0xFFFF) << 16);
wfp = wfp.wrapping_add(
packed.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
assert_eq!(MODEMAP_FULL.len(), 40);
assert_eq!(
(sxy, neg_x, pos_x, neg_y, pos_y, wfp),
MODEMAP_FULL_FP,
"MODEMAP_FULL fingerprint changed — a sign was flipped"
);
let mut wfp2: u64 = 0;
for (i, &(x, y)) in MODEMAP_COMPACT.iter().enumerate() {
let packed = ((x as i64 as u64) & 0xFFFF) | (((y as i64 as u64) & 0xFFFF) << 16);
wfp2 = wfp2.wrapping_add(
packed.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
assert_eq!(
wfp2, MODEMAP_COMPACT_FP,
"MODEMAP_COMPACT fingerprint changed"
);
}
const MODEMAP_FULL_FP: (i64, usize, usize, usize, usize, u64) =
(0, 20, 20, 20, 20, 3485326926686748680);
const MODEMAP_COMPACT_FP: u64 = 11392794952466961206;
#[test]
fn encode_dp_state_machine_fingerprint_pinned() {
fn fp(seq: &[i32]) -> (usize, u64) {
let mut s: u64 = 0;
for (i, &v) in seq.iter().enumerate() {
let packed = v as i64 as u64;
s = s.wrapping_add(
packed.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(seq.len(), s)
}
const BYTE_SHORT: &[u8] = &[b'A', 0xC0, 0xC1, 0xC2, b'B'];
const BYTE_LONG: &[u8] = &[
b'X', 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, b'Y',
];
let cases: &[(&str, &[u8], (usize, u64))] = &[
("upper", b"AZTEC", FP_DP_UPPER),
("lower", b"hello", FP_DP_LOWER),
("digit", b"0123456789", FP_DP_DIGIT),
("mix_ad", b"ABC123def", FP_DP_MIXED_AD),
("mix_ctl", b"\x1B 0\x1Bz", FP_DP_MIXED_CTL),
("punct_pair", b"End. Begin.", FP_DP_PUNCT_PAIR),
("byte_short", BYTE_SHORT, FP_DP_BYTE_SHORT),
("byte_long", BYTE_LONG, FP_DP_BYTE_LONG),
("crlf", b"line1\r\nline2", FP_DP_CRLF),
("single", b"Q", FP_DP_SINGLE),
];
for (tag, payload, want) in cases {
let seq = encode_dp(payload).unwrap_or_else(|e| panic!("encode_dp({tag}) ok: {e:?}"));
let got = fp(&seq);
assert_eq!(got, *want, "fingerprint changed for {tag}");
}
}
const FP_DP_UPPER: (usize, u64) = (5, 2941114823188);
const FP_DP_LOWER: (usize, u64) = (6, 5696419143106);
const FP_DP_DIGIT: (usize, u64) = (11, 9261326370129);
const FP_DP_MIXED_AD: (usize, u64) = (12, 12125462556248);
const FP_DP_MIXED_CTL: (usize, u64) = (9, 4217898424229);
const FP_DP_PUNCT_PAIR: (usize, u64) = (14, 16298235572540);
const FP_DP_BYTE_SHORT: (usize, u64) = (7, 7472236667215);
const FP_DP_BYTE_LONG: (usize, u64) = (14, 42762960109710);
const FP_DP_CRLF: (usize, u64) = (17, 21325736903874);
const FP_DP_SINGLE: (usize, u64) = (1, 215009296641);
#[test]
fn build_matrix_size_and_grid_fingerprint_pinned() {
fn fp_bm(bm: &crate::encoding::BitMatrix) -> (usize, usize, u64) {
let w = bm.width();
let h = bm.height();
let mut s: u64 = 0;
for y in 0..h {
for x in 0..w {
let v = u64::from(bm.get(x, y));
let idx = (y as u64) * (w as u64) + (x as u64);
s = s.wrapping_add(
v.wrapping_mul(idx.wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
}
(w, h, s)
}
let bm_c1 = encode_compact(b"A").expect("compact L1");
let bm_c2 = encode_compact(b"HELLO WORLD").expect("compact L2-ish");
let fp_c1 = fp_bm(&bm_c1);
let fp_c2 = fp_bm(&bm_c2);
assert_eq!(fp_c1, FP_BM_COMPACT_A, "compact L1 fingerprint changed");
assert_eq!(fp_c2, FP_BM_COMPACT_HELLO, "compact L2 fingerprint changed");
let bm_f1 = encode(b"This is a longer Aztec payload that triggers full mode L1+.")
.expect("full mid");
let bm_f2 = encode(&[b'A'; 200]).expect("full mid layers");
let bm_f3 = encode(&[b'X'; 600]).expect("full higher layers");
let fp_f1 = fp_bm(&bm_f1);
let fp_f2 = fp_bm(&bm_f2);
let fp_f3 = fp_bm(&bm_f3);
assert_eq!(fp_f1, FP_BM_FULL_MID, "full mid fingerprint changed");
assert_eq!(fp_f2, FP_BM_FULL_200A, "full 200A fingerprint changed");
assert_eq!(fp_f3, FP_BM_FULL_600X, "full 600X fingerprint changed");
}
const FP_BM_COMPACT_A: (usize, usize, u64) = (15, 15, 29366022823943);
const FP_BM_COMPACT_HELLO: (usize, usize, u64) = (15, 15, 34751872983012);
const FP_BM_FULL_MID: (usize, usize, u64) = (27, 27, 327368907968369);
const FP_BM_FULL_200A: (usize, usize, u64) = (45, 45, 1913601321155227);
const FP_BM_FULL_600X: (usize, usize, u64) = (71, 71, 18679129073933189);
#[test]
fn build_mode_bits_full_compact_rune_fingerprint_pinned() {
fn fp(bits: &[bool]) -> (usize, u64) {
let mut s: u64 = 0;
for (i, &b) in bits.iter().enumerate() {
let v = u64::from(b);
s = s.wrapping_add(
v.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(bits.len(), s)
}
let cases_full: &[(u8, usize, bool, (usize, u64))] = &[
(1, 20, false, FP_MB_FULL_L1_CW20),
(1, 20, true, FP_MB_FULL_L1_CW20_RI), (10, 500, false, FP_MB_FULL_L10_CW500),
(32, 1664, false, FP_MB_FULL_L32_CW_MAX),
];
for (layers, cw, ri, want) in cases_full {
let bits = build_mode_bits("full", *layers, *cw, *ri, None);
let got = fp(&bits);
assert_eq!(
got, *want,
"fingerprint changed for full L{layers} CW{cw} ri{ri}"
);
}
let cases_compact: &[(u8, usize, (usize, u64))] = &[
(1, 17, FP_MB_COMPACT_L1_CW17),
(4, 64, FP_MB_COMPACT_L4_CW64),
];
for (layers, cw, want) in cases_compact {
let bits = build_mode_bits("compact", *layers, *cw, false, None);
let got = fp(&bits);
assert_eq!(
got, *want,
"fingerprint changed for compact L{layers} CW{cw}"
);
}
for (rune, want) in &[
(0u8, FP_MB_RUNE_0),
(128u8, FP_MB_RUNE_128),
(255u8, FP_MB_RUNE_255),
] {
let bits = build_mode_bits("rune", 1, 1, false, Some(*rune));
let got = fp(&bits);
assert_eq!(got, *want, "fingerprint changed for rune={rune}");
}
}
const FP_MB_FULL_L1_CW20: (usize, u64) = (40, 899853722979);
const FP_MB_FULL_L1_CW20_RI: (usize, u64) = (40, 1236967064626);
const FP_MB_FULL_L10_CW500: (usize, u64) = (40, 934361387872);
const FP_MB_FULL_L32_CW_MAX: (usize, u64) = (40, 1505065076487);
const FP_MB_COMPACT_L1_CW17: (usize, u64) = (28, 581321431659);
const FP_MB_COMPACT_L4_CW64: (usize, u64) = (28, 461871822414);
const FP_MB_RUNE_0: (usize, u64) = (28, 520269409156);
const FP_MB_RUNE_128: (usize, u64) = (28, 432673029043);
const FP_MB_RUNE_255: (usize, u64) = (28, 663608940250);
#[test]
fn seq_to_bits_state_paths_fingerprint_pinned() {
fn fp(bits: &[bool]) -> (usize, u64) {
let mut s: u64 = 0;
for (i, &b) in bits.iter().enumerate() {
let v = u64::from(b);
s = s.wrapping_add(
v.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(bits.len(), s)
}
const LONG_BYTE_RUN: &[u8] = &[
b'X', 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC,
0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5,
];
let cases: &[(&str, &[u8], (usize, u64))] = &[
("HELLO", b"HELLO", FP_S2B_UPPER),
("lower", b"hello world", FP_S2B_LOWER),
("mix", b"Aztec 2D 1995", FP_S2B_MIX),
("byte_small", &[b'A', 0xC0, 0xC1, 0xC2], FP_S2B_BYTE_SMALL),
("byte_big", LONG_BYTE_RUN, FP_S2B_BYTE_BIG),
];
for (tag, payload, want) in cases {
let seq = encode_dp(payload).unwrap_or_else(|e| panic!("encode_dp({tag}) ok: {e:?}"));
let bits = seq_to_bits(&seq).unwrap_or_else(|e| panic!("seq_to_bits({tag}) ok: {e:?}"));
let got = fp(&bits);
assert_eq!(got, *want, "fingerprint changed for {tag}");
}
}
const FP_S2B_UPPER: (usize, u64) = (25, 371621006540);
const FP_S2B_LOWER: (usize, u64) = (60, 2051878843253);
const FP_S2B_MIX: (usize, u64) = (72, 3917947183236);
const FP_S2B_BYTE_SMALL: (usize, u64) = (39, 767131934929);
const FP_S2B_BYTE_BIG: (usize, u64) = (330, 84843730228843);
#[test]
fn encode_dp_v2_state_machine_fingerprint_pinned() {
fn fp(seq: &[i32]) -> (usize, u64) {
let mut s: u64 = 0;
for (i, &v) in seq.iter().enumerate() {
let packed = v as i64 as u64;
s = s.wrapping_add(
packed.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(seq.len(), s)
}
const BYTE_31: &[u8] = &[
b'X', 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC,
0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
0xDB, 0xDC, 0xDD, 0xDE,
];
const BYTE_32: &[u8] = &[
b'X', 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC,
0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
];
const BYTE_33: &[u8] = &[
b'X', 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC,
0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0,
];
let cases: &[(&str, &[u8], (usize, u64))] = &[
("comma_pair", b"red, blue", FP_DP2_COMMA_PAIR),
("colon_pair", b"Time: 12 now", FP_DP2_COLON_PAIR),
("digit_dot_digits", b"12.34", FP_DP2_DIGIT_DOT),
("digit_comma_digits", b"12,34", FP_DP2_DIGIT_COMMA),
("mixed_crlf", b"A\x1B\x0D\x0AB", FP_DP2_MIXED_CRLF),
("digit_to_byte", b"12345\xC0\xC1", FP_DP2_DIGIT_TO_BYTE),
("punct_to_byte", b"!@#$%\xC0\xC1", FP_DP2_PUNCT_TO_BYTE),
("byte_31", BYTE_31, FP_DP2_BYTE_31),
("byte_32", BYTE_32, FP_DP2_BYTE_32),
("byte_33", BYTE_33, FP_DP2_BYTE_33),
(
"punct_after_digit_pair",
b"12. End.",
FP_DP2_PUNCT_AFTER_DIGIT_PAIR,
),
("pair_mid_seq", b"end. and more.", FP_DP2_PAIR_MID_SEQ),
];
for (tag, payload, want) in cases {
let seq = encode_dp(payload).unwrap_or_else(|e| panic!("encode_dp({tag}) ok: {e:?}"));
let got = fp(&seq);
assert_eq!(got, *want, "fingerprint changed for {tag}");
}
}
const FP_DP2_COMMA_PAIR: (usize, u64) = (10, 11740569370903);
const FP_DP2_COLON_PAIR: (usize, u64) = (16, 19825980698909);
const FP_DP2_DIGIT_DOT: (usize, u64) = (6, 2635854710673);
const FP_DP2_DIGIT_COMMA: (usize, u64) = (6, 2614619224585);
const FP_DP2_MIXED_CRLF: (usize, u64) = (7, 1831560675090);
const FP_DP2_DIGIT_TO_BYTE: (usize, u64) = (10, 12199786757556);
const FP_DP2_PUNCT_TO_BYTE: (usize, u64) = (8, 9765669164719);
const FP_DP2_BYTE_31: (usize, u64) = (33, 313372068200616);
const FP_DP2_BYTE_32: (usize, u64) = (34, 333498000140518);
const FP_DP2_BYTE_33: (usize, u64) = (35, 354308776506758);
const FP_DP2_PUNCT_AFTER_DIGIT_PAIR: (usize, u64) = (12, 9314415085349);
const FP_DP2_PAIR_MID_SEQ: (usize, u64) = (16, 25665739373109);
#[test]
fn encode_single_state_mixed_and_punct_arms() {
let mixed = encode_single_state(STATE_MIXED, &[b'@', 0x01, 127])
.expect("MIXED arm must encode mixed-alphabet bytes");
assert_eq!(mixed, vec![20, 2, 27], "STATE_MIXED arm output");
let punct = encode_single_state(STATE_PUNCT, b"!?[")
.expect("PUNCT arm must encode punct-alphabet bytes");
assert_eq!(punct, vec![6, 26, 27], "STATE_PUNCT arm output");
}
#[test]
fn bit_stuff_value_pins_msb_first() {
let bits = [true, false, true, true, false, false, true];
let cws = bit_stuff(&bits, 6);
assert_eq!(cws[0], 0b101100, "normal codeword MSB-first");
assert_eq!(cws[1], 0b111110, "tail all-ones flip");
let zeros = [false, false, false, false, false, true];
let z = bit_stuff(&zeros, 6);
assert_eq!(z[0], 0b000001, "all-zero stuffer sets low bit 1");
}
#[test]
fn fit_metric_readerinit_and_layer_filter() {
let off = fit_metric(40, "compact", -1, 23, 3, false);
let on = fit_metric(40, "compact", -1, 23, 3, true);
assert_eq!(off, FIT_RI_OFF, "readerinit=false compact pick");
assert_eq!(on, FIT_RI_ON, "readerinit=true compact pick");
let l1 = fit_metric(40, "compact", 1, 23, 3, false);
let l2 = fit_metric(40, "compact", 2, 23, 3, false);
let l3 = fit_metric(40, "compact", 3, 23, 3, false);
assert_eq!(l1, FIT_L1, "forced compact L1");
assert_eq!(l2, FIT_L2, "forced compact L2");
assert_eq!(l3, FIT_L3, "forced compact L3");
assert_ne!(l1, l2, "distinct layers give distinct metrics");
assert_ne!(l2, l3, "distinct layers give distinct metrics");
}
const FIT_RI_OFF: Option<usize> = FIT_RI_OFF_V;
const FIT_RI_ON: Option<usize> = FIT_RI_ON_V;
const FIT_L1: Option<usize> = FIT_L1_V;
const FIT_L2: Option<usize> = FIT_L2_V;
const FIT_L3: Option<usize> = FIT_L3_V;
const FIT_RI_OFF_V: Option<usize> = Some(1);
const FIT_RI_ON_V: Option<usize> = Some(1);
const FIT_L1_V: Option<usize> = Some(1);
const FIT_L2_V: Option<usize> = Some(3);
const FIT_L3_V: Option<usize> = Some(5);
#[test]
fn seq_to_bits_residual_paths() {
let seq_lower = vec![SHIFT_BYTE, 0xC0, 0xC1, LATCH_LOWER, b'a' as i32];
let bits_lower = seq_to_bits(&seq_lower).expect("byte->lower");
let seq_mixed = vec![SHIFT_BYTE, 0xC0, 0xC1, LATCH_MIXED, b'@' as i32];
let bits_mixed = seq_to_bits(&seq_mixed).expect("byte->mixed");
let seq_pair = vec![SHIFT_PUNCT, PAIR_3];
let bits_pair = seq_to_bits(&seq_pair).expect("shift-punct pair");
let seq_shift = vec![SHIFT_PUNCT, b'!' as i32];
let bits_shift = seq_to_bits(&seq_shift).expect("shift-punct char");
let fp = |b: &[bool]| {
let mut s: u64 = 0;
for (i, &x) in b.iter().enumerate() {
s = s.wrapping_add(
u64::from(x).wrapping_mul((i as u64 + 1).wrapping_mul(2_654_435_761)),
);
}
(b.len(), s)
};
assert_eq!(fp(&bits_lower), S2B_BYTE_LOWER, "byte-exit LOWER");
assert_eq!(fp(&bits_mixed), S2B_BYTE_MIXED, "byte-exit MIXED");
assert_eq!(fp(&bits_pair), S2B_SHIFT_PAIR, "shift-punct pair");
assert_eq!(fp(&bits_shift), S2B_SHIFT_CHAR, "shift-punct char");
assert_ne!(bits_lower, bits_mixed, "LOWER vs MIXED exit differ");
}
const S2B_BYTE_LOWER: (usize, u64) = S2B_BYTE_LOWER_V;
const S2B_BYTE_MIXED: (usize, u64) = S2B_BYTE_MIXED_V;
const S2B_SHIFT_PAIR: (usize, u64) = S2B_SHIFT_PAIR_V;
const S2B_SHIFT_CHAR: (usize, u64) = S2B_SHIFT_CHAR_V;
const S2B_BYTE_LOWER_V: (usize, u64) = (31, 376929878062);
const S2B_BYTE_MIXED_V: (usize, u64) = (31, 445945207848);
const S2B_SHIFT_PAIR_V: (usize, u64) = (10, 50434279459);
const S2B_SHIFT_CHAR_V: (usize, u64) = (10, 45125407937);
#[test]
fn build_matrix_direct_multisize_fingerprint() {
fn fp(sym: &AztecSymbolMatrix) -> (usize, u64) {
let n = sym.size;
let mut s: u64 = 0;
for (y, row) in sym.pixels.iter().enumerate() {
for (x, &v) in row.iter().enumerate() {
let idx = (y as u64) * (n as u64) + (x as u64);
s = s.wrapping_add(
u64::from(v).wrapping_mul(idx.wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
}
(n, s)
}
fn cws_of(n: usize, bpcw: u8) -> Vec<u32> {
let mask = (1u32 << bpcw) - 1;
(0..n as u32)
.map(|i| (i.wrapping_mul(0x9E37) ^ (i >> 1) ^ 0x5A5A) & mask)
.collect()
}
let modebits: Vec<bool> = (0..40).map(|i| i % 3 == 0).collect();
let mut got: Vec<(usize, u64)> = Vec::new();
for target_layers in [1u8, 2, 3, 4] {
let m = *METRICS
.iter()
.find(|m| m.format == "compact" && m.layers == target_layers)
.expect("compact metric exists");
let cws = cws_of(m.ncws as usize, m.bps);
let sym = build_matrix("compact", target_layers, &cws, m.bps, &modebits);
got.push(fp(&sym));
}
for target_layers in [1u8, 2, 5, 6] {
let mi = METRICS
.iter()
.position(|m| m.format == "full" && m.layers == target_layers)
.expect("full metric exists");
let m = METRICS[mi];
let cws = cws_of(m.ncws as usize, m.bps);
let sym = build_matrix("full", target_layers, &cws, m.bps, &modebits);
got.push(fp(&sym));
}
assert_eq!(got.as_slice(), BM_DIRECT_FPS, "build_matrix fingerprints");
}
const BM_DIRECT_FPS: &[(usize, u64)] = BM_DIRECT_FPS_V;
const BM_DIRECT_FPS_V: &[(usize, u64)] = &[
(15, 33650282142197),
(19, 86072733986186),
(23, 185213255223775),
(27, 363928451704622),
(19, 85448941582351),
(23, 186991727183645),
(37, 1237601474772879),
(41, 1865364914506335),
];
fn _t2a_fpv(seq: &[i32]) -> u64 {
let mut s: u64 = 0;
for (i, &v) in seq.iter().enumerate() {
let packed = v as i64 as u64;
s = s.wrapping_add(
packed.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(seq.len() as u64)
.wrapping_mul(1099511628211)
.wrapping_add(s)
}
#[test]
fn encode_dp_deterministic_fuzz_pins_reachable_mutants() {
let menu: &[u8] = b"!@#$%^&*().,:; \r\n0123456789Aa.,: ";
let mut state: u64 = 0x1234_5678_9ABC_DEF0;
let mut fuzz_acc: u64 = 0;
for trial in 0..60000u64 {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let len = 1 + ((state >> 33) % 40) as usize;
let mut input = Vec::with_capacity(len);
let mut s2 = state ^ (trial.wrapping_mul(0x9E3779B97F4A7C15));
for _ in 0..len {
s2 = s2
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
input.push(menu[((s2 >> 40) as usize) % menu.len()]);
}
if let Ok(seq) = encode_dp(&input) {
let h = _t2a_fpv(&seq);
fuzz_acc = fuzz_acc.wrapping_add(
h.wrapping_mul(trial.wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
}
assert_eq!(fuzz_acc, 3826213173968392585, "encode_dp fuzz accumulator");
let menus: &[&[u8]] = &[
b"Aa0!. ,:;\r\n@#",
b"\x00\x01\x1B AaZz09.,: \r\n!?[]{}\xC0\xC1\x7F",
b"0123456789.,: \r\n",
b"ABCabc!@#. , : ;\r\n12",
b"\x1B\x0D\x0A. , : Aa09",
];
let mut fuzz2: u64 = 0;
for (mi, menu) in menus.iter().enumerate() {
let mut state: u64 =
0xCAFE_BABE_0000_0001 ^ (mi as u64).wrapping_mul(0x9E3779B97F4A7C15);
for trial in 0..50000u64 {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let len = 1 + ((state >> 31) % 30) as usize;
let mut input = Vec::with_capacity(len);
let mut s2 = state ^ trial.wrapping_mul(0xD1B54A32D192ED03);
for _ in 0..len {
s2 = s2
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
input.push(menu[((s2 >> 40) as usize) % menu.len()]);
}
if let Ok(seq) = encode_dp(&input) {
let h = _t2a_fpv(&seq);
fuzz2 = fuzz2.wrapping_add(
h.wrapping_mul((trial ^ (mi as u64 * 0x100000001B3)).wrapping_add(1)),
);
}
}
}
assert_eq!(fuzz2, 539728216792903729, "encode_dp fuzz2 accumulator");
}
#[test]
fn encode_dp_exhaustive_len5_accumulator() {
let alpha: &[u8] = b"A0!. ,:\r\n\x1B\xC0";
let n = alpha.len();
let mut ex_acc: u64 = 0;
let mut counter: u64 = 0;
for l in 1..=5usize {
let total = n.pow(l as u32);
for code in 0..total {
let mut c = code;
let mut input = Vec::with_capacity(l);
for _ in 0..l {
input.push(alpha[c % n]);
c /= n;
}
counter += 1;
if let Ok(seq) = encode_dp(&input) {
let h = _t2a_fpv(&seq);
ex_acc = ex_acc.wrapping_add(
h.wrapping_mul(counter.wrapping_mul(2_654_435_761).wrapping_add(1)),
);
}
}
}
assert_eq!(ex_acc, 15437856385332364494, "encode_dp exhaustive len<=5");
}
#[test]
fn seq_to_bits_error_edges_kill_mutants() {
let mut ok = vec![SHIFT_BYTE];
ok.extend(std::iter::repeat_n(0xC0, 2078));
ok.push(LATCH_UPPER);
assert!(seq_to_bits(&ok).is_ok(), "2078-byte run must encode");
let mut bad = vec![SHIFT_BYTE];
bad.extend(std::iter::repeat_n(0xC0, 2079));
bad.push(LATCH_UPPER);
assert!(
seq_to_bits(&bad).is_err(),
"2079-byte run must error (BYTE block caps at 2078)"
);
assert!(
seq_to_bits(&[SHIFT_PUNCT]).is_err(),
"shift with no following char must error"
);
assert!(
seq_to_bits(&[LATCH_LOWER, SHIFT_UPPER, PAIR_3]).is_err(),
"shift-to-UPPER followed by pair sentinel must error"
);
}
#[test]
fn fit_metric_layers_zero_kills_ge_mutant() {
assert_eq!(
fit_metric(40, "compact", 0, 23, 3, false),
Some(1),
"layers==0 means 'no constraint' (guard is strictly > 0)"
);
assert_eq!(
fit_metric(40, "full", 0, 23, 3, false),
Some(2),
"layers==0 full likewise unconstrained"
);
}
#[test]
fn build_matrix_full_l12_l27_growth_fingerprint() {
fn fp(sym: &AztecSymbolMatrix) -> u64 {
let n = sym.size;
let mut s: u64 = 0;
for (y, row) in sym.pixels.iter().enumerate() {
for (x, &v) in row.iter().enumerate() {
let idx = (y as u64) * (n as u64) + (x as u64);
s = s.wrapping_add(
u64::from(v).wrapping_mul(idx.wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
}
(n as u64).wrapping_mul(1099511628211).wrapping_add(s)
}
fn cws_of(n: usize, bpcw: u8) -> Vec<u32> {
let mask = (1u32 << bpcw) - 1;
(0..n as u32)
.map(|i| (i.wrapping_mul(0x9E37) ^ (i >> 1) ^ 0x5A5A) & mask)
.collect()
}
let modebits: Vec<bool> = (0..40).map(|i| i % 3 == 0).collect();
for (tl, want) in [(12u8, 13333290602940607u64), (27u8, 194802344388397256u64)] {
let m = *METRICS
.iter()
.find(|m| m.format == "full" && m.layers == tl)
.expect("full metric exists");
let cws = cws_of(m.ncws as usize, m.bps);
let sym = build_matrix("full", tl, &cws, m.bps, &modebits);
assert_eq!(fp(&sym), want, "build_matrix full L{tl} growth fingerprint");
}
}
}