#![allow(dead_code)]
use crate::encoding::BitMatrix;
use crate::error::Error;
pub(crate) const SWA: i16 = -1;
pub(crate) const SWB: i16 = -2;
pub(crate) const SWC: i16 = -3;
pub(crate) const SA1: i16 = -4;
pub(crate) const SB1: i16 = -5;
pub(crate) const SC1: i16 = -6;
pub(crate) const SA2: i16 = -7;
pub(crate) const SB2: i16 = -8;
pub(crate) const SC2: i16 = -9;
pub(crate) const PAD: i16 = -10;
pub(crate) const SB3: i16 = -11;
pub(crate) const SC3: i16 = -12;
pub(crate) const FN1: i16 = -13;
pub(crate) const FN2: i16 = -14;
pub(crate) const FN3: i16 = -15;
pub(crate) const FN4: i16 = -16;
#[rustfmt::skip]
pub(crate) const CHARMAPS: [[i16; 3]; 107] = [
[ 32, 32, 0], [ 33, 33, 1], [ 34, 34, 2], [ 35, 35, 3],
[ 36, 36, 4], [ 37, 37, 5], [ 38, 38, 6], [ 39, 39, 7],
[ 40, 40, 8], [ 41, 41, 9], [ 42, 42, 10], [ 43, 43, 11],
[ 44, 44, 12], [ 45, 45, 13], [ 46, 46, 14], [ 47, 47, 15],
[ 48, 48, 16], [ 49, 49, 17], [ 50, 50, 18], [ 51, 51, 19],
[ 52, 52, 20], [ 53, 53, 21], [ 54, 54, 22], [ 55, 55, 23],
[ 56, 56, 24], [ 57, 57, 25], [ 58, 58, 26], [ 59, 59, 27],
[ 60, 60, 28], [ 61, 61, 29], [ 62, 62, 30], [ 63, 63, 31],
[ 64, 64, 32], [ 65, 65, 33], [ 66, 66, 34], [ 67, 67, 35],
[ 68, 68, 36], [ 69, 69, 37], [ 70, 70, 38], [ 71, 71, 39],
[ 72, 72, 40], [ 73, 73, 41], [ 74, 74, 42], [ 75, 75, 43],
[ 76, 76, 44], [ 77, 77, 45], [ 78, 78, 46], [ 79, 79, 47],
[ 80, 80, 48], [ 81, 81, 49], [ 82, 82, 50], [ 83, 83, 51],
[ 84, 84, 52], [ 85, 85, 53], [ 86, 86, 54], [ 87, 87, 55],
[ 88, 88, 56], [ 89, 89, 57], [ 90, 90, 58], [ 91, 91, 59],
[ 92, 92, 60], [ 93, 93, 61], [ 94, 94, 62], [ 95, 95, 63],
[ 0, 96, 64], [ 1, 97, 65], [ 2, 98, 66], [ 3, 99, 67],
[ 4, 100, 68], [ 5, 101, 69], [ 6, 102, 70], [ 7, 103, 71],
[ 8, 104, 72], [ 9, 105, 73], [ 10, 106, 74], [ 11, 107, 75],
[ 12, 108, 76], [ 13, 109, 77], [ 14, 110, 78], [ 15, 111, 79],
[ 16, 112, 80], [ 17, 113, 81], [ 18, 114, 82], [ 19, 115, 83],
[ 20, 116, 84], [ 21, 117, 85], [ 22, 118, 86], [ 23, 119, 87],
[ 24, 120, 88], [ 25, 121, 89], [ 26, 122, 90], [ 27, 123, 91],
[ 28, 124, 92], [ 29, 125, 93], [ 30, 126, 94], [ 31, 127, 95],
[FN3, FN3, 96], [FN2, FN2, 97], [SB1, SA1, 98], [SWC, SWC, 99],
[SWB, FN4, SWB], [FN4, SWA, SWA], [FN1, FN1, FN1], [PAD, PAD, PAD],
[SB2, SA2, SB1], [SC2, SC2, SB2], [SC3, SC3, SB3],
];
pub(crate) const METRICS: [[u16; 2]; 15] = [
[2, 7],
[3, 12],
[4, 17],
[5, 22],
[6, 27],
[7, 32],
[8, 37],
[9, 42],
[10, 47],
[11, 52],
[12, 57],
[13, 62],
[14, 67],
[15, 72],
[16, 77],
];
pub(crate) const ENCS: [&str; 107] = [
"212222", "222122", "222221", "121223", "121322", "131222", "122213", "122312", "132212",
"221213", "221312", "231212", "112232", "122132", "122231", "113222", "123122", "123221",
"223211", "221132", "221231", "213212", "223112", "312131", "311222", "321122", "321221",
"312212", "322112", "322211", "212123", "212321", "232121", "111323", "131123", "131321",
"112313", "132113", "132311", "211313", "231113", "231311", "112133", "112331", "132131",
"113123", "113321", "133121", "313121", "211331", "231131", "213113", "213311", "213131",
"311123", "311321", "331121", "312113", "312311", "332111", "314111", "221411", "431111",
"111224", "111422", "121124", "121421", "141122", "141221", "112214", "112412", "122114",
"122411", "142112", "142211", "241211", "221114", "413111", "241112", "134111", "111242",
"121142", "121241", "114212", "124112", "124211", "411212", "421112", "421211", "212141",
"214121", "412121", "111143", "111341", "131141", "114113", "114311", "411113", "411311",
"113141", "114131", "311141", "411131", "211412", "211214", "211232", "211133",
];
pub(crate) const STARTENCS: [&str; 16] = [
"3211", "2221", "2122", "1411", "1132", "1231", "1114", "3112", "3211", "2221", "2122", "1411",
"1132", "1231", "1114", "3112",
];
pub(crate) const STOPENCS_ODD: [&str; 16] = [
"3211", "2221", "2122", "1411", "1132", "1231", "1114", "3112", "1132", "1231", "1114", "3112",
"3211", "2221", "2122", "1411",
];
pub(crate) const STOPENCS_EVEN: [&str; 16] = [
"2122", "1411", "1132", "1231", "1114", "3112", "1132", "1231", "1114", "3112", "3211", "2221",
"2122", "1411", "3211", "2221",
];
pub(crate) const PAD_CW: u16 = 103;
#[inline]
pub(crate) fn leading_row_indicator(rows: u16, mode: u16) -> u16 {
(rows - 2) * 7 + mode
}
pub(crate) fn compute_checksums(cws: &[u16]) -> (u16, u16) {
let dcws_inner = cws.len() as u32 - 1;
let mut s1: u32 = 0;
let mut s2: u32 = 0;
for (idx, &cw) in cws.iter().enumerate() {
let i = idx as u32;
s1 = s1.wrapping_add((i + 2) * u32::from(cw));
s2 = s2.wrapping_add((i + 1) * u32::from(cw));
}
let c1 = (s1 % 107) as u16;
let c2 = ((s2 + u32::from(c1) * (dcws_inner + 2)) % 107) as u16;
(c1, c2)
}
pub(crate) const MODE_A: u16 = 0;
pub(crate) const MODE_B: u16 = 1;
pub(crate) const MODE_C_FROM_START: u16 = 2;
pub(crate) const MODE_B_THEN_A: u16 = 3;
pub(crate) const MODE_B_THEN_C: u16 = 4;
pub(crate) const MODE_C_THEN_B: u16 = 5;
pub(crate) const MODE_GS1: u16 = 6;
pub(crate) fn pick_symbol_size(pair_count: usize) -> Option<(u16, u16)> {
METRICS
.iter()
.find(|row| usize::from(row[1]) >= pair_count)
.map(|row| (row[0], row[1]))
}
#[inline]
pub(crate) fn lookup_b(b: u8) -> Option<u16> {
CHARMAPS
.iter()
.position(|row| row[1] == i16::from(b))
.map(|i| i as u16)
}
#[inline]
pub(crate) fn lookup_a(b: u8) -> Option<u16> {
CHARMAPS
.iter()
.position(|row| row[0] == i16::from(b))
.map(|i| i as u16)
}
pub(crate) const SA1_FROM_B: u16 = 98;
pub(crate) const SB1_FROM_A: u16 = 98;
pub(crate) const SA2_FROM_B: u16 = 104;
pub(crate) const SB2_FROM_A: u16 = 104;
pub(crate) const SWA_FROM_B: u16 = 101;
pub(crate) const SWB_FROM_A: u16 = 100;
pub(crate) const FN4_FROM_A: u16 = 101;
pub(crate) const FN4_FROM_B: u16 = 100;
pub(crate) const SB1_FROM_C: u16 = 104;
pub(crate) const SB2_FROM_C: u16 = 105;
pub(crate) const SB3_FROM_C: u16 = 106;
#[inline]
pub(crate) fn anotb(b: i16) -> bool {
let in_a = b >= 0 && CHARMAPS.iter().any(|row| row[0] == b);
let in_b = b >= 0 && CHARMAPS.iter().any(|row| row[1] == b);
in_a && !in_b
}
#[inline]
pub(crate) fn bnota(b: i16) -> bool {
let in_a = b >= 0 && CHARMAPS.iter().any(|row| row[0] == b);
let in_b = b >= 0 && CHARMAPS.iter().any(|row| row[1] == b);
in_b && !in_a
}
#[inline]
pub(crate) fn in_a(b: i16) -> bool {
b >= 0 && CHARMAPS.iter().any(|row| row[0] == b)
}
#[inline]
pub(crate) fn in_b(b: i16) -> bool {
b >= 0 && CHARMAPS.iter().any(|row| row[1] == b)
}
pub(crate) fn insert_fn4_markers(msg: &[i16]) -> Vec<i16> {
let msglen = msg.len();
let mut num_sa: Vec<usize> = vec![0; msglen + 1];
let mut num_ea: Vec<usize> = vec![0; msglen + 1];
for i in (0..msglen).rev() {
let c = msg[i];
if c >= 0 {
if c >= 128 {
num_ea[i] = num_ea[i + 1] + 1;
} else {
num_sa[i] = num_sa[i + 1] + 1;
}
}
}
let mut out: Vec<i16> = Vec::with_capacity(msglen * 2);
let mut ea = false;
for (i, &c) in msg.iter().enumerate() {
if c >= 0 && ea == (c < 128) {
let run = if ea { num_sa[i] } else { num_ea[i] };
let threshold = if run + i == msglen { 3 } else { 5 };
if run < threshold {
out.push(FN4);
} else {
ea = !ea;
out.push(FN4);
out.push(FN4);
}
}
if c >= 0 {
out.push(c & 127);
} else {
out.push(c);
}
}
out
}
pub(crate) fn compute_lookahead(msg: &[i16]) -> (Vec<u32>, Vec<u32>) {
let n = msg.len();
let mut next_anotb: Vec<u32> = vec![0; n + 1];
let mut next_bnota: Vec<u32> = vec![0; n + 1];
next_anotb[n] = 9999;
next_bnota[n] = 9999;
for i in (0..n).rev() {
next_anotb[i] = if anotb(msg[i]) {
0
} else {
next_anotb[i + 1] + 1
};
next_bnota[i] = if bnota(msg[i]) {
0
} else {
next_bnota[i + 1] + 1
};
}
(next_anotb, next_bnota)
}
#[inline]
pub(crate) fn abeforeb(i: usize, next_anotb: &[u32], next_bnota: &[u32]) -> bool {
next_anotb[i] < next_bnota[i]
}
#[inline]
pub(crate) fn bbeforea(i: usize, next_anotb: &[u32], next_bnota: &[u32]) -> bool {
next_bnota[i] < next_anotb[i]
}
pub(crate) fn numsscr(msg: &[i16], p: usize) -> (usize, usize) {
let mut n: usize = 0;
let mut s: usize = 0;
let mut p = p;
while p < msg.len() {
let c = msg[p];
if c == FN1 {
if s % 2 == 0 {
s += 1;
} else {
break;
}
} else if !(b'0' as i16..=b'9' as i16).contains(&c) {
break;
}
n += 1;
s += 1;
p += 1;
}
(n, s)
}
#[inline]
fn pair_codeword(hi: u8, lo: u8) -> u16 {
u16::from(hi - b'0') * 10 + u16::from(lo - b'0')
}
pub(crate) const SWC_FROM_A_OR_B: u16 = 99;
pub(crate) const SC2_FROM_A: u16 = 105;
pub(crate) const SC3_FROM_A: u16 = 106;
pub(crate) const SC2_FROM_B: u16 = 105;
pub(crate) const SC3_FROM_B: u16 = 106;
pub(crate) const SWA_FROM_C: u16 = 101;
pub(crate) const SWB_FROM_C: u16 = 100;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum Cset {
A,
B,
C,
}
fn pick_initial_mode(
msg: &[i16],
next_anotb: &[u32],
next_bnota: &[u32],
) -> (Cset, u16, usize, Vec<u16>) {
let msglen = msg.len();
if msglen >= 2 {
let (_, s0) = numsscr(msg, 0);
if s0 >= 2 && s0 % 2 == 0 {
return (Cset::C, MODE_C_FROM_START, 0, vec![]);
}
if s0 >= 3 && s0 % 2 == 1 {
if let Some(cw) = lookup_b((msg[0] & 0xff) as u8) {
return (Cset::C, MODE_C_THEN_B, 1, vec![cw]);
}
}
if msg[0] >= 0 && lookup_b(msg[0] as u8).is_some() {
let (_, s1) = numsscr(msg, 1);
if s1 >= 2 && s1 % 2 == 0 {
let cw = lookup_b(msg[0] as u8).unwrap();
return (Cset::C, MODE_C_THEN_B, 1, vec![cw]);
}
if s1 >= 3 && s1 % 2 == 1 {
let cw0 = lookup_b(msg[0] as u8).unwrap();
if let Some(cw1) = lookup_b((msg[1] & 0xff) as u8) {
return (Cset::C, MODE_B_THEN_C, 2, vec![cw0, cw1]);
}
}
}
if msglen >= 3 && msg[0] >= 0 && msg[1] >= 0 {
if let (Some(cw0), Some(cw1)) = (lookup_b(msg[0] as u8), lookup_b(msg[1] as u8)) {
let (_, s2) = numsscr(msg, 2);
if s2 >= 2 && s2 % 2 == 0 {
return (Cset::C, MODE_B_THEN_C, 2, vec![cw0, cw1]);
}
}
}
}
if abeforeb(0, next_anotb, next_bnota) {
(Cset::A, MODE_A, 0, vec![])
} else {
(Cset::B, MODE_B, 0, vec![])
}
}
pub(crate) fn encode_data_cws_mixed(input: &[u8]) -> Result<(u16, Vec<u16>), Error> {
if input.is_empty() {
return Err(Error::InvalidData("code16k: empty input".to_string()));
}
let initial_msg: Vec<i16> = input.iter().map(|&b| i16::from(b)).collect();
let msg = insert_fn4_markers(&initial_msg);
let (next_anotb, next_bnota) = compute_lookahead(&msg);
let (mut cset, mode, mut i, mut cws) = pick_initial_mode(&msg, &next_anotb, &next_bnota);
cws.reserve(msg.len() * 2);
while i < msg.len() {
let c = msg[i];
match cset {
Cset::A => {
if i + 1 < msg.len() && bnota(c) && abeforeb(i + 1, &next_anotb, &next_bnota) {
cws.push(SB1_FROM_A);
let cw = lookup_b_for_sentinel_or_byte(c)?;
cws.push(cw);
i += 1;
continue;
}
if i + 2 < msg.len()
&& bnota(c)
&& bnota(msg[i + 1])
&& abeforeb(i + 2, &next_anotb, &next_bnota)
{
cws.push(SB2_FROM_A);
cws.push(lookup_b_for_sentinel_or_byte(c)?);
cws.push(lookup_b_for_sentinel_or_byte(msg[i + 1])?);
i += 2;
continue;
}
if bnota(c) {
cws.push(SWB_FROM_A);
cset = Cset::B;
continue;
}
let (nums, _) = numsscr(&msg, i);
if i + 4 < msg.len() && nums == 4 && in_a(msg[i + 4]) {
cws.push(SC2_FROM_A);
for _ in 0..2 {
let hi = (msg[i] & 0xff) as u8;
let lo = (msg[i + 1] & 0xff) as u8;
cws.push(pair_codeword(hi, lo));
i += 2;
}
continue;
}
if i + 6 < msg.len() && nums == 6 && in_a(msg[i + 6]) {
cws.push(SC3_FROM_A);
for _ in 0..3 {
let hi = (msg[i] & 0xff) as u8;
let lo = (msg[i + 1] & 0xff) as u8;
cws.push(pair_codeword(hi, lo));
i += 2;
}
continue;
}
if nums >= 4 && nums % 2 == 0 {
cws.push(SWC_FROM_A_OR_B);
cset = Cset::C;
continue;
}
let cw = lookup_a_for_sentinel_or_byte(c)?;
cws.push(cw);
i += 1;
}
Cset::B => {
if i + 1 < msg.len() && anotb(c) && bbeforea(i + 1, &next_anotb, &next_bnota) {
cws.push(SA1_FROM_B);
cws.push(lookup_a_for_sentinel_or_byte(c)?);
i += 1;
continue;
}
if i + 2 < msg.len()
&& anotb(c)
&& anotb(msg[i + 1])
&& bbeforea(i + 2, &next_anotb, &next_bnota)
{
cws.push(SA2_FROM_B);
cws.push(lookup_a_for_sentinel_or_byte(c)?);
cws.push(lookup_a_for_sentinel_or_byte(msg[i + 1])?);
i += 2;
continue;
}
if anotb(c) {
cws.push(SWA_FROM_B);
cset = Cset::A;
continue;
}
let (nums, _) = numsscr(&msg, i);
if i + 4 < msg.len() && nums == 4 && in_b(msg[i + 4]) {
cws.push(SC2_FROM_B);
for _ in 0..2 {
let hi = (msg[i] & 0xff) as u8;
let lo = (msg[i + 1] & 0xff) as u8;
cws.push(pair_codeword(hi, lo));
i += 2;
}
continue;
}
if i + 6 < msg.len() && nums == 6 && in_b(msg[i + 6]) {
cws.push(SC3_FROM_B);
for _ in 0..3 {
let hi = (msg[i] & 0xff) as u8;
let lo = (msg[i + 1] & 0xff) as u8;
cws.push(pair_codeword(hi, lo));
i += 2;
}
continue;
}
if nums >= 4 && nums % 2 == 0 {
cws.push(SWC_FROM_A_OR_B);
cset = Cset::C;
continue;
}
cws.push(lookup_b_for_sentinel_or_byte(c)?);
i += 1;
}
Cset::C => {
let (nums, _) = numsscr(&msg, i);
if nums >= 2 {
let hi = (msg[i] & 0xff) as u8;
let lo = (msg[i + 1] & 0xff) as u8;
cws.push(pair_codeword(hi, lo));
i += 2;
continue;
}
if i + 1 < msg.len() && in_b(c) {
let (_, s_next) = numsscr(&msg, i + 1);
if s_next >= 2 && s_next % 2 == 0 {
cws.push(SB1_FROM_C);
cws.push(lookup_b_for_sentinel_or_byte(c)?);
i += 1;
continue;
}
}
if i + 1 < msg.len() && in_b(c) {
let (_, s_next) = numsscr(&msg, i + 1);
if s_next >= 3 && s_next % 2 == 1 && i + 2 < msg.len() && in_b(msg[i + 1]) {
cws.push(SB2_FROM_C);
cws.push(lookup_b_for_sentinel_or_byte(c)?);
cws.push(lookup_b_for_sentinel_or_byte(msg[i + 1])?);
i += 2;
continue;
}
}
if i + 2 < msg.len() && in_b(c) && in_b(msg[i + 1]) {
let (_, s_next) = numsscr(&msg, i + 2);
if s_next >= 3 && s_next % 2 == 1 && i + 3 < msg.len() && in_b(msg[i + 2]) {
cws.push(SB3_FROM_C);
cws.push(lookup_b_for_sentinel_or_byte(c)?);
cws.push(lookup_b_for_sentinel_or_byte(msg[i + 1])?);
cws.push(lookup_b_for_sentinel_or_byte(msg[i + 2])?);
i += 3;
continue;
}
}
if i + 3 < msg.len() && in_b(c) && in_b(msg[i + 1]) && in_b(msg[i + 2]) {
let (_, s_next) = numsscr(&msg, i + 3);
if s_next >= 2 && s_next % 2 == 0 {
cws.push(SB3_FROM_C);
cws.push(lookup_b_for_sentinel_or_byte(c)?);
cws.push(lookup_b_for_sentinel_or_byte(msg[i + 1])?);
cws.push(lookup_b_for_sentinel_or_byte(msg[i + 2])?);
i += 3;
continue;
}
}
if abeforeb(i, &next_anotb, &next_bnota) {
cws.push(SWA_FROM_C);
cset = Cset::A;
} else {
cws.push(SWB_FROM_C);
cset = Cset::B;
}
}
}
}
Ok((mode, cws))
}
#[inline]
fn lookup_a_for_sentinel_or_byte(c: i16) -> Result<u16, Error> {
if c == FN4 {
return Ok(FN4_FROM_A);
}
if c < 0 {
return Err(Error::InvalidData(format!(
"code16k mixed encoder: unsupported sentinel {c} (only FN4 is wired today)"
)));
}
let b = c as u8;
lookup_a(b).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mixed encoder: byte 0x{b:02x} not A-encodable"
))
})
}
#[inline]
fn lookup_b_for_sentinel_or_byte(c: i16) -> Result<u16, Error> {
if c == FN4 {
return Ok(FN4_FROM_B);
}
if c < 0 {
return Err(Error::InvalidData(format!(
"code16k mixed encoder: unsupported sentinel {c} (only FN4 is wired today)"
)));
}
let b = c as u8;
lookup_b(b).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mixed encoder: byte 0x{b:02x} not B-encodable"
))
})
}
pub(crate) fn encode_cws_text_only(text: &[u8]) -> Result<Vec<u16>, Error> {
if text.is_empty() {
return Err(Error::InvalidData("code16k: empty input".to_string()));
}
let mut cws: Vec<u16> = Vec::with_capacity(text.len() + 3);
let (rows, dcws_inner) = pick_symbol_size(text.len()).ok_or_else(|| {
Error::InvalidData(format!(
"code16k: text payload of {} bytes exceeds the r=16 ceiling (77 codewords)",
text.len()
))
})?;
let dcws_inner = usize::from(dcws_inner);
cws.push(leading_row_indicator(rows, MODE_B));
for (idx, &b) in text.iter().enumerate() {
let cw = lookup_b(b).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mode-B path: byte 0x{b:02x} at position {idx} \
isn't B-encodable — mode-A and mixed-mode paths are on the burndown"
))
})?;
cws.push(cw);
}
while cws.len() < 1 + dcws_inner {
cws.push(PAD_CW);
}
let (c1, c2) = compute_checksums(&cws);
cws.push(c1);
cws.push(c2);
Ok(cws)
}
pub(crate) fn encode_cws_mode_a(text: &[u8]) -> Result<Vec<u16>, Error> {
if text.is_empty() {
return Err(Error::InvalidData("code16k: empty input".to_string()));
}
let (rows, dcws_inner) = pick_symbol_size(text.len()).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mode-A path: payload of {} bytes exceeds the r=16 ceiling (77 codewords)",
text.len()
))
})?;
let dcws_inner = usize::from(dcws_inner);
let mut cws: Vec<u16> = Vec::with_capacity(1 + dcws_inner + 2);
cws.push(leading_row_indicator(rows, MODE_A));
for (idx, &b) in text.iter().enumerate() {
let cw = lookup_a(b).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mode-A path: byte 0x{b:02x} at position {idx} isn't A-encodable \
(lowercase + high bytes need mode-B or FNC4 / mixed-mode shifts \
— extension paths, see PORT_STATUS)"
))
})?;
cws.push(cw);
}
while cws.len() < 1 + dcws_inner {
cws.push(PAD_CW);
}
let (c1, c2) = compute_checksums(&cws);
cws.push(c1);
cws.push(c2);
Ok(cws)
}
pub(crate) fn encode_cws_digit_with_shift_b(digits: &[u8]) -> Result<Vec<u16>, Error> {
if digits.is_empty() {
return Err(Error::InvalidData("code16k: empty input".to_string()));
}
if digits.len() % 2 == 0 {
return Err(Error::InvalidData(format!(
"code16k mode-5 path needs odd length, got {} digits — \
use encode_cws_digit_only for even-length pure-digit input",
digits.len()
)));
}
for (idx, &b) in digits.iter().enumerate() {
if !b.is_ascii_digit() {
return Err(Error::InvalidData(format!(
"code16k mode-5 path: non-digit byte 0x{b:02x} at position {idx}"
)));
}
}
let pair_count = (digits.len() - 1) / 2;
let inner_slots = 1 + pair_count;
let (rows, dcws_inner) = pick_symbol_size(inner_slots).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mode-5: payload of {} bytes (1 + {} pairs) exceeds r=16 ceiling",
digits.len(),
pair_count
))
})?;
let dcws_inner = usize::from(dcws_inner);
let mut cws: Vec<u16> = Vec::with_capacity(1 + dcws_inner + 2);
cws.push(leading_row_indicator(rows, MODE_C_THEN_B));
let first = lookup_b(digits[0])
.expect("digit was already validated to be ASCII '0'..='9' → B-encodable");
cws.push(first);
for chunk in digits[1..].chunks_exact(2) {
let hi = u16::from(chunk[0] - b'0');
let lo = u16::from(chunk[1] - b'0');
cws.push(hi * 10 + lo);
}
while cws.len() < 1 + dcws_inner {
cws.push(PAD_CW);
}
let (c1, c2) = compute_checksums(&cws);
cws.push(c1);
cws.push(c2);
Ok(cws)
}
pub(crate) fn encode_cws(input: &[u8]) -> Result<Vec<u16>, Error> {
if input.is_empty() {
return Err(Error::InvalidData("code16k: empty input".to_string()));
}
encode_cws_mixed(input)
}
pub(crate) fn encode_cws_mixed(input: &[u8]) -> Result<Vec<u16>, Error> {
let (mode, data_cws) = encode_data_cws_mixed(input)?;
let (rows, dcws_inner) = pick_symbol_size(data_cws.len()).ok_or_else(|| {
Error::InvalidData(format!(
"code16k mixed-mode: data payload of {} codewords exceeds r=16 ceiling \
(77 codewords)",
data_cws.len()
))
})?;
let dcws_inner = usize::from(dcws_inner);
let mut cws: Vec<u16> = Vec::with_capacity(1 + dcws_inner + 2);
cws.push(leading_row_indicator(rows, mode));
cws.extend_from_slice(&data_cws);
while cws.len() < 1 + dcws_inner {
cws.push(PAD_CW);
}
let (c1, c2) = compute_checksums(&cws);
cws.push(c1);
cws.push(c2);
Ok(cws)
}
pub(crate) fn encode_cws_digit_only(digits: &[u8]) -> Result<Vec<u16>, Error> {
if digits.is_empty() {
return Err(Error::InvalidData("code16k: empty input".to_string()));
}
if digits.len() % 2 != 0 {
return Err(Error::InvalidData(format!(
"code16k digit-only path needs even length, got {} digits — \
odd-length payloads use mode 5 (shift to B for trailing byte), \
on the encoder burndown",
digits.len()
)));
}
for (idx, &b) in digits.iter().enumerate() {
if !b.is_ascii_digit() {
return Err(Error::InvalidData(format!(
"code16k: non-digit byte 0x{b:02x} at position {idx} — \
digit-only path is the only mode currently wired"
)));
}
}
let pair_count = digits.len() / 2;
let (rows, dcws_inner) = pick_symbol_size(pair_count).ok_or_else(|| {
Error::InvalidData(format!(
"code16k: payload of {} pairs exceeds the r=16 ceiling (77 pairs)",
pair_count
))
})?;
let dcws_inner = usize::from(dcws_inner);
let mut cws: Vec<u16> = Vec::with_capacity(1 + dcws_inner + 2);
cws.push(leading_row_indicator(rows, MODE_C_FROM_START));
for chunk in digits.chunks_exact(2) {
let hi = u16::from(chunk[0] - b'0');
let lo = u16::from(chunk[1] - b'0');
cws.push(hi * 10 + lo);
}
while cws.len() < 1 + dcws_inner {
cws.push(PAD_CW);
}
let (c1, c2) = compute_checksums(&cws);
cws.push(c1);
cws.push(c2);
Ok(cws)
}
fn build_seprow() -> [u8; 81] {
let mut row = [1u8; 81];
for cell in row.iter_mut().take(10) {
*cell = 0;
}
row[80] = 0;
row
}
fn build_row_bits(row_idx: usize, row_cws: &[u16], stopencs: &[&str; 16]) -> [u8; 81] {
debug_assert_eq!(row_cws.len(), 5);
let mut sbs: Vec<u8> = Vec::with_capacity(41);
sbs.push(10);
for c in STARTENCS[row_idx].chars() {
sbs.push(c.to_digit(10).expect("STARTENCS contains only digits") as u8);
}
sbs.push(1);
for &cw in row_cws {
for c in ENCS[cw as usize].chars() {
sbs.push(c.to_digit(10).expect("ENCS contains only digits") as u8);
}
}
for c in stopencs[row_idx].chars() {
sbs.push(c.to_digit(10).expect("stopencs contains only digits") as u8);
}
sbs.push(1);
let mut row = [0u8; 81];
let mut current: u8 = 1;
let mut idx = 0;
for &w in &sbs {
current = 1 - current;
for _ in 0..w {
row[idx] = current;
idx += 1;
}
}
debug_assert_eq!(idx, 81, "sbs widths must sum to 81 modules");
row
}
pub fn encode(input: &[u8]) -> Result<BitMatrix, Error> {
let cws = encode_cws(input)?;
if cws.len() % 5 != 0 {
return Err(Error::InvalidData(format!(
"code16k internal: cws length {} not divisible by 5",
cws.len()
)));
}
let rows = cws.len() / 5;
if !(2..=16).contains(&rows) {
return Err(Error::InvalidData(format!(
"code16k internal: derived row count {rows} not in 2..=16"
)));
}
let stopencs = &STOPENCS_ODD;
let rowheight: usize = 8;
let sepheight: usize = 1;
let pixx: usize = 81;
let seprow = build_seprow();
let allone = [1u8; 81];
let numcomprows = 2 * rows + 1;
let mut compressed: Vec<[u8; 81]> = Vec::with_capacity(numcomprows);
let mut mults: Vec<usize> = Vec::with_capacity(numcomprows);
compressed.push(allone);
mults.push(sepheight);
for i in 0..rows {
let row_cws = &cws[i * 5..i * 5 + 5];
compressed.push(build_row_bits(i, row_cws, stopencs));
mults.push(rowheight);
if i + 1 < rows {
compressed.push(seprow);
mults.push(sepheight);
}
}
compressed.push(allone);
mults.push(sepheight);
debug_assert_eq!(compressed.len(), numcomprows);
let symhgt: usize = mults.iter().sum();
let mut bm = BitMatrix::new(pixx, symhgt);
let mut y = 0;
for (row, &mult) in compressed.iter().zip(mults.iter()) {
for _ in 0..mult {
for (x, &bit) in row.iter().enumerate() {
if bit != 0 {
bm.set(x, y, true);
}
}
y += 1;
}
}
Ok(bm)
}
pub(crate) fn encode_pixs(input: &[u8]) -> Result<Vec<u8>, Error> {
let cws = encode_cws(input)?;
if cws.len() % 5 != 0 {
return Err(Error::InvalidData(format!(
"code16k internal: cws length {} not divisible by 5",
cws.len()
)));
}
let rows = cws.len() / 5;
let stopencs = &STOPENCS_ODD;
let seprow = build_seprow();
let allone = [1u8; 81];
let numcomprows = 2 * rows + 1;
let mut pixs: Vec<u8> = Vec::with_capacity(numcomprows * 81);
pixs.extend_from_slice(&allone);
for i in 0..rows {
let row_cws = &cws[i * 5..i * 5 + 5];
pixs.extend_from_slice(&build_row_bits(i, row_cws, stopencs));
if i + 1 < rows {
pixs.extend_from_slice(&seprow);
}
}
pixs.extend_from_slice(&allone);
debug_assert_eq!(pixs.len(), numcomprows * 81);
Ok(pixs)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn numsscr_digit_run_with_fn1_alignment() {
let msg: Vec<i16> = "123".bytes().map(i16::from).collect();
assert_eq!(numsscr(&msg, 0), (3, 3));
let msg: Vec<i16> = "12A3".bytes().map(i16::from).collect();
assert_eq!(numsscr(&msg, 0), (2, 2));
let msg: Vec<i16> = vec![FN1, i16::from(b'1'), i16::from(b'2')];
assert_eq!(
numsscr(&msg, 0),
(3, 4),
"FN1 at s=0 (even) bumps s by 1 then the standard +1/+1"
);
let msg: Vec<i16> = vec![i16::from(b'1'), FN1, i16::from(b'2')];
assert_eq!(numsscr(&msg, 0), (1, 1), "FN1 at s=1 (odd) breaks the run");
let msg: Vec<i16> = vec![i16::from(b'1'), i16::from(b'2'), FN1, i16::from(b'3')];
assert_eq!(
numsscr(&msg, 0),
(4, 5),
"FN1 at s=2 (even) re-aligns and the run continues through '3'"
);
assert_eq!(numsscr(&[], 0), (0, 0));
let msg: Vec<i16> = "12".bytes().map(i16::from).collect();
assert_eq!(numsscr(&msg, 2), (0, 0));
let msg: Vec<i16> = vec![i16::from(b'A')];
assert_eq!(numsscr(&msg, 0), (0, 0));
let msg: Vec<i16> = vec![i16::from(b'/')];
assert_eq!(numsscr(&msg, 0), (0, 0), "'/' just below '0' breaks");
let msg: Vec<i16> = vec![i16::from(b':')];
assert_eq!(numsscr(&msg, 0), (0, 0), "':' just above '9' breaks");
}
#[test]
fn compute_lookahead_right_to_left_walk() {
let msg: Vec<i16> = vec![5, i16::from(b'A'), i16::from(b'a')];
let (next_anotb, next_bnota) = compute_lookahead(&msg);
assert_eq!(
next_anotb,
vec![0, 10001, 10000, 9999],
"next_anotb: [0]=0 (control 5 IS A-only), then +1 each step \
back to the 9999 sentinel"
);
assert_eq!(
next_bnota,
vec![2, 1, 0, 9999],
"next_bnota: [2]=0 ('a' IS B-only), then +1 back; [0]=2 \
distance to 'a'"
);
let msg: Vec<i16> = vec![i16::from(b'A'), i16::from(b'B')];
let (next_anotb, next_bnota) = compute_lookahead(&msg);
assert_eq!(next_anotb, vec![10001, 10000, 9999]);
assert_eq!(next_bnota, vec![10001, 10000, 9999]);
let msg: Vec<i16> = vec![];
let (next_anotb, next_bnota) = compute_lookahead(&msg);
assert_eq!(next_anotb, vec![9999]);
assert_eq!(next_bnota, vec![9999]);
}
#[test]
fn abeforeb_and_bbeforea_strict_less() {
let anotb = [3u32];
let bnota = [7u32];
assert!(
abeforeb(0, &anotb, &bnota),
"anotb(3) < bnota(7) → abeforeb=true"
);
assert!(
!bbeforea(0, &anotb, &bnota),
"anotb(3) < bnota(7) → bbeforea=false"
);
let anotb = [7u32];
let bnota = [3u32];
assert!(!abeforeb(0, &anotb, &bnota));
assert!(bbeforea(0, &anotb, &bnota));
let anotb = [5u32];
let bnota = [5u32];
assert!(
!abeforeb(0, &anotb, &bnota),
"equal distances must NOT count as a-before-b (rejects `<=`)"
);
assert!(
!bbeforea(0, &anotb, &bnota),
"equal distances must NOT count as b-before-a (rejects `<=`)"
);
let anotb = [99u32, 2, 100];
let bnota = [99u32, 5, 1];
assert!(abeforeb(1, &anotb, &bnota), "i=1: 2 < 5 → true");
assert!(
!abeforeb(2, &anotb, &bnota),
"i=2: 100 < 1 is false (rejects index-pinned-at-0 mutation)"
);
assert!(bbeforea(2, &anotb, &bnota), "i=2: 1 < 100 → true");
}
#[test]
fn charmaps_shape() {
assert_eq!(CHARMAPS.len(), 107);
for row in &CHARMAPS {
assert_eq!(row.len(), 3);
}
}
#[test]
fn charmaps_anchors() {
assert_eq!(CHARMAPS[0], [32, 32, 0]);
assert_eq!(CHARMAPS[33], [65, 65, 33]);
assert_eq!(CHARMAPS[64], [0, 96, 64]);
assert_eq!(CHARMAPS[103], [PAD, PAD, PAD]);
assert_eq!(CHARMAPS[106], [SC3, SC3, SB3]);
}
#[test]
fn metrics_shape_and_progression() {
assert_eq!(METRICS.len(), 15);
assert_eq!(METRICS[0], [2, 7]);
assert_eq!(METRICS[14], [16, 77]);
for i in 1..15 {
assert_eq!(
METRICS[i][0] - METRICS[i - 1][0],
1,
"rows should step by 1"
);
assert_eq!(
METRICS[i][1] - METRICS[i - 1][1],
5,
"dcws_inner should step by 5",
);
}
}
#[test]
fn encs_shape() {
assert_eq!(ENCS.len(), 107);
for (i, enc) in ENCS.iter().enumerate() {
assert_eq!(enc.len(), 6, "ENCS[{i}] = {enc:?} should be 6 chars");
let total: u32 = enc.chars().map(|c| c.to_digit(10).unwrap()).sum();
assert_eq!(total, 11, "ENCS[{i}] = {enc:?} should sum to 11 modules");
}
}
#[test]
fn start_stop_enc_shapes() {
for (table_name, table) in [
("startencs", &STARTENCS[..]),
("stopencs_odd", &STOPENCS_ODD[..]),
("stopencs_even", &STOPENCS_EVEN[..]),
] {
assert_eq!(table.len(), 16, "{table_name} should have 16 entries");
for (i, enc) in table.iter().enumerate() {
assert_eq!(
enc.len(),
4,
"{table_name}[{i}] = {enc:?} should be 4 chars"
);
let total: u32 = enc.chars().map(|c| c.to_digit(10).unwrap()).sum();
assert_eq!(
total, 7,
"{table_name}[{i}] = {enc:?} should sum to 7 modules"
);
}
}
}
#[test]
fn leading_row_indicator_matches_bwipp() {
let cases: &[(u16, u16, u16)] = &[
(2, 1, 1),
(2, 2, 2),
(2, 1, 1),
(2, 1, 1),
(2, 5, 5),
(2, 1, 1),
(2, 1, 1),
(2, 2, 2),
];
for &(r, mode, expected) in cases {
assert_eq!(
leading_row_indicator(r, mode),
expected,
"(r={r}, mode={mode})",
);
}
}
#[test]
fn compute_checksums_matches_bwipp_goldens() {
let cases: &[(&[u16], u16, u16)] = &[
(&[1, 17, 103, 103, 103, 103, 103, 103], 4, 46),
(&[2, 12, 103, 103, 103, 103, 103, 103], 98, 27),
(&[1, 33, 103, 103, 103, 103, 103, 103], 52, 82),
(&[1, 33, 34, 103, 103, 103, 103, 103], 97, 66),
(&[5, 17, 23, 45, 103, 103, 103, 103], 44, 45),
(&[2, 12, 34, 56, 78, 90, 103, 103], 95, 44),
];
for &(cws, want_c1, want_c2) in cases {
let (c1, c2) = compute_checksums(cws);
assert_eq!(
(c1, c2),
(want_c1, want_c2),
"cws={cws:?} → want (c1, c2) = ({want_c1}, {want_c2}), got ({c1}, {c2})",
);
}
}
#[test]
fn encode_produces_valid_bitmatrix_for_supported_inputs() {
for input in [&b"12"[..], b"1234", b"A", b"ABC", b"Hello"] {
let bm = encode(input).unwrap_or_else(|e| panic!("encode({input:?}) failed: {e:?}"));
assert_eq!(bm.width(), 81, "encode({input:?}) width should be 81");
assert_eq!(
bm.height(),
19,
"encode({input:?}) r=2 height should be 19 (sep+data+sep+data+sep)",
);
}
let bm = encode(b"Hello123").expect(
"encode(b\"Hello123\") (Code 16K r=3 → symhgt = 1 sep + 3×(8 data + 1 sep) - 0 = 28 modules) must succeed",
);
assert_eq!(bm.width(), 81);
assert_eq!(bm.height(), 28);
let err = encode(b"").unwrap_err();
let Error::InvalidData(msg) = err else {
panic!("encode(b\"\") must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("code16k:"),
"empty-input diagnostic must carry the symbology tag; got {msg:?}"
);
assert!(
msg.contains("empty input"),
"empty-input diagnostic must call out 'empty input'; got {msg:?}"
);
assert!(
!msg.contains("exceeds")
&& !msg.contains("divisible by 5")
&& !msg.contains("row count"),
"empty-input diagnostic must not leak the downstream arms; got {msg:?}"
);
let bm = encode(b"A\tB").expect(
"encode(b\"A\\tB\") (post-Stage-21: Mode A handles TAB control char → 81×19 r=2 matrix) must succeed",
);
assert_eq!(bm.width(), 81);
assert_eq!(bm.height(), 19);
}
#[test]
fn encode_pixs_matches_bwip_js_golden_for_12() {
let pixs = encode_pixs(b"12").expect(
"encode_pixs(b\"12\") (Code 16K NS-digits, r=2 → 5 compressed rows × 81 cols, rowmult=[1,8,1,8,1]) must succeed",
);
let want: &[u8] = &[
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,
1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1,
0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
];
assert_eq!(pixs.len(), 5 * 81, "5 compressed rows × 81 cells");
assert_eq!(pixs.len(), want.len());
for (i, (&got, &exp)) in pixs.iter().zip(want.iter()).enumerate() {
assert_eq!(got, exp, "pixs[{i}] (row {}, col {})", i / 81, i % 81);
}
}
#[test]
fn encode_bitmatrix_content_matches_expanded_pixs_for_12() {
let pixs = encode_pixs(b"12").expect("encode_pixs(12) ok");
let bm = encode(b"12").expect("encode(12) ok");
let mults = [1usize, 8, 1, 8, 1];
assert_eq!(bm.width(), 81);
assert_eq!(bm.height(), mults.iter().sum::<usize>());
let mut y = 0usize;
let mut any_set_below_row0 = false;
for (crow, &mult) in mults.iter().enumerate() {
for _ in 0..mult {
for x in 0..81 {
let want = pixs[crow * 81 + x] != 0;
assert_eq!(
bm.get(x, y),
want,
"encode(12) bit at (x={x}, y={y}) (compressed row {crow}) \
must equal expanded-pixs bit {want}"
);
if y > 0 && bm.get(x, y) {
any_set_below_row0 = true;
}
}
y += 1;
}
}
assert!(
any_set_below_row0,
"expanded matrix must have set bits below row 0 (kills y *= 1 collapse)"
);
}
#[test]
fn encode_pixs_numcomprows_for_r3() {
let pixs = encode_pixs(b"Hello123").expect("encode_pixs(Hello123) ok");
let cws = encode_cws(b"Hello123").expect("cws ok");
let rows = cws.len() / 5;
assert_eq!(rows, 3, "Hello123 must be a 3-row symbol");
assert_eq!(
pixs.len(),
(2 * rows + 1) * 81,
"r=3 compressed pixs must be (2*3+1)=7 rows × 81 (kills 2*rows+1 → 2+rows+1)"
);
}
#[test]
fn pick_symbol_size_picks_smallest_metrics_row() {
for pairs in 0..=7 {
assert_eq!(pick_symbol_size(pairs), Some((2, 7)));
}
for pairs in 8..=12 {
assert_eq!(pick_symbol_size(pairs), Some((3, 12)));
}
for pairs in 13..=17 {
assert_eq!(pick_symbol_size(pairs), Some((4, 17)));
}
assert_eq!(pick_symbol_size(77), Some((16, 77)));
assert_eq!(pick_symbol_size(78), None);
}
#[test]
fn encode_cws_digit_only_matches_bwip_js_goldens() {
let cases: &[(&[u8], &[u16])] = &[
(b"12", &[2, 12, 103, 103, 103, 103, 103, 103, 98, 27]),
(b"1234", &[2, 12, 34, 103, 103, 103, 103, 103, 36, 11]),
(b"123456", &[2, 12, 34, 56, 103, 103, 103, 103, 15, 62]),
(b"1234567890", &[2, 12, 34, 56, 78, 90, 103, 103, 95, 44]),
(b"123456789012", &[2, 12, 34, 56, 78, 90, 12, 103, 9, 24]),
(b"12345678901234", &[2, 12, 34, 56, 78, 90, 12, 34, 30, 89]),
(
b"1234567890123456",
&[
9, 12, 34, 56, 78, 90, 12, 34, 56, 103, 103, 103, 103, 83, 24,
],
),
(
b"123456789012345678",
&[9, 12, 34, 56, 78, 90, 12, 34, 56, 78, 103, 103, 103, 22, 97],
),
(
b"12345678901234567890123456",
&[
16, 12, 34, 56, 78, 90, 12, 34, 56, 78, 90, 12, 34, 56, 103, 103, 103, 103, 3,
60,
],
),
];
for &(input, expected) in cases {
let cws = encode_cws_digit_only(input).unwrap_or_else(|e| {
panic!(
"encode_cws_digit_only({:?}) failed: {e:?}",
std::str::from_utf8(input).unwrap_or("<non-utf8>"),
)
});
assert_eq!(
cws,
expected,
"encode_cws_digit_only({:?})",
std::str::from_utf8(input).unwrap_or("<non-utf8>"),
);
}
}
#[test]
fn encode_cws_digit_only_rejects_invalid_inputs() {
match encode_cws_digit_only(b"").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"empty arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("empty input"),
"empty arm missing `empty input` predicate: {msg}"
);
assert!(
!msg.contains("non-digit"),
"empty arm leaked non-digit diagnostic: {msg}"
);
assert!(
!msg.contains("even length"),
"empty arm leaked even-length diagnostic: {msg}"
);
}
other => panic!("empty digit-only input should reject as InvalidData, got {other:?}"),
}
match encode_cws_digit_only(b"12345").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k digit-only path"),
"odd-length arm missing prefix: {msg}"
);
assert!(
msg.contains("needs even length"),
"odd-length arm missing `needs even length` predicate: {msg}"
);
assert!(
msg.contains("got 5 digits"),
"odd-length arm missing `got 5 digits` length echo: {msg}"
);
}
other => panic!("5-digit (odd) input should reject as InvalidData, got {other:?}"),
}
match encode_cws_digit_only(b"12A4").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"non-digit 'A' arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("non-digit byte"),
"non-digit 'A' arm missing `non-digit byte` predicate: {msg}"
);
assert!(
msg.contains("0x41"),
"non-digit 'A' arm missing hex echo `0x41`: {msg}"
);
assert!(
msg.contains("at position 2"),
"non-digit 'A' arm missing `at position 2`: {msg}"
);
}
other => panic!("`12A4` should reject as InvalidData, got {other:?}"),
}
match encode_cws_digit_only(b"12 4").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("non-digit byte"),
"non-digit ' ' arm missing predicate: {msg}"
);
assert!(
msg.contains("0x20"),
"non-digit ' ' arm missing hex echo `0x20`: {msg}"
);
assert!(
msg.contains("at position 2"),
"non-digit ' ' arm missing `at position 2`: {msg}"
);
}
other => panic!("`12 4` should reject as InvalidData, got {other:?}"),
}
let huge: Vec<u8> = (0..156).map(|i| b'0' + ((i % 10) as u8)).collect();
match encode_cws_digit_only(&huge).unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"overflow arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("78 pairs"),
"overflow arm missing `78 pairs` count echo: {msg}"
);
assert!(
msg.contains("r=16 ceiling"),
"overflow arm missing `r=16 ceiling` predicate: {msg}"
);
}
other => panic!("156-digit input should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn mode_constants_compose_with_leading_row_indicator() {
assert_eq!(leading_row_indicator(2, MODE_A), 0);
assert_eq!(leading_row_indicator(2, MODE_B), 1);
assert_eq!(leading_row_indicator(2, MODE_C_FROM_START), 2);
assert_eq!(leading_row_indicator(3, MODE_C_FROM_START), 9);
assert_eq!(leading_row_indicator(4, MODE_C_FROM_START), 16);
assert_eq!(leading_row_indicator(2, MODE_C_THEN_B), 5);
assert_eq!(leading_row_indicator(2, MODE_GS1), 6);
assert_eq!(leading_row_indicator(16, MODE_GS1), 104);
}
#[test]
fn lookup_b_spot_checks() {
assert_eq!(lookup_b(b' '), Some(0));
assert_eq!(lookup_b(b'A'), Some(33));
assert_eq!(lookup_b(b'B'), Some(34));
assert_eq!(lookup_b(b'H'), Some(40));
assert_eq!(lookup_b(b'a'), Some(65));
assert_eq!(lookup_b(b'e'), Some(69));
assert_eq!(lookup_b(b'l'), Some(76));
assert_eq!(lookup_b(b'o'), Some(79));
assert_eq!(lookup_b(b'z'), Some(90));
assert_eq!(lookup_b(b'1'), Some(17));
assert_eq!(lookup_b(b'9'), Some(25));
assert_eq!(lookup_b(0), None);
assert_eq!(lookup_b(9), None);
assert_eq!(lookup_b(31), None);
}
#[test]
fn lookup_a_spot_checks() {
assert_eq!(lookup_a(b' '), Some(0));
assert_eq!(lookup_a(b'A'), Some(33));
assert_eq!(lookup_a(b'1'), Some(17));
assert_eq!(lookup_a(0), Some(64));
assert_eq!(lookup_a(9), Some(73));
assert_eq!(lookup_a(31), Some(95));
assert_eq!(lookup_a(b'a'), None);
assert_eq!(lookup_a(b'z'), None);
assert_eq!(lookup_a(127), None);
}
#[test]
fn encode_cws_text_only_matches_bwip_js_goldens() {
let cases: &[(&[u8], &[u16])] = &[
(b"A", &[1, 33, 103, 103, 103, 103, 103, 103, 52, 82]),
(b"AB", &[1, 33, 34, 103, 103, 103, 103, 103, 97, 66]),
(b"ABC", &[1, 33, 34, 35, 103, 103, 103, 103, 78, 51]),
(b"abc", &[1, 65, 66, 67, 103, 103, 103, 103, 34, 50]),
(b"Hello", &[1, 40, 69, 76, 76, 79, 103, 103, 7, 58]),
(b"abcdef", &[1, 65, 66, 67, 68, 69, 70, 103, 71, 94]),
(
b"Hello123",
&[
8, 40, 69, 76, 76, 79, 17, 18, 19, 103, 103, 103, 103, 56, 26,
],
),
(
b"ABCDEFGHIJKLMN",
&[
15, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 103, 103, 103, 52,
56,
],
),
];
for &(input, expected) in cases {
let cws = encode_cws_text_only(input).unwrap_or_else(|e| {
panic!(
"encode_cws_text_only({:?}) failed: {e:?}",
std::str::from_utf8(input).unwrap_or("<non-utf8>"),
)
});
assert_eq!(
cws,
expected,
"encode_cws_text_only({:?})",
std::str::from_utf8(input).unwrap_or("<non-utf8>"),
);
}
}
#[test]
fn encode_cws_text_only_rejects_invalid_inputs() {
match encode_cws_text_only(b"").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"empty arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("empty input"),
"empty arm missing `empty input` predicate: {msg}"
);
assert!(
!msg.contains("mode-B path"),
"empty arm leaked mode-B byte diagnostic: {msg}"
);
assert!(
!msg.contains("ceiling"),
"empty arm leaked ceiling diagnostic: {msg}"
);
}
other => panic!("empty text-only input should reject as InvalidData, got {other:?}"),
}
match encode_cws_text_only(b"\0").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k mode-B path"),
"NUL arm missing `code16k mode-B path` prefix: {msg}"
);
assert!(
msg.contains("byte 0x00"),
"NUL arm missing hex echo `byte 0x00`: {msg}"
);
assert!(
msg.contains("at position 0"),
"NUL arm missing position 0 echo: {msg}"
);
assert!(
msg.contains("isn't B-encodable"),
"NUL arm missing `isn't B-encodable` predicate: {msg}"
);
}
other => panic!("NUL text-only input should reject as InvalidData, got {other:?}"),
}
match encode_cws_text_only(b"A\tB").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("byte 0x09"),
"TAB arm missing hex echo `byte 0x09`: {msg}"
);
assert!(
msg.contains("at position 1"),
"TAB arm missing `at position 1` echo (after 'A' at 0): {msg}"
);
}
other => panic!("`A\\tB` should reject as InvalidData, got {other:?}"),
}
let huge: Vec<u8> = (0..78).map(|_| b'A').collect();
match encode_cws_text_only(&huge).unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"ceiling arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("78 bytes"),
"ceiling arm missing `78 bytes` count echo: {msg}"
);
assert!(
msg.contains("r=16 ceiling"),
"ceiling arm missing `r=16 ceiling` predicate: {msg}"
);
assert!(
msg.contains("77 codewords"),
"ceiling arm missing `77 codewords` capacity echo: {msg}"
);
}
other => panic!("78-byte input should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn encode_cws_digit_with_shift_b_matches_bwip_js_goldens() {
let cases: &[(&[u8], &[u16])] = &[
(b"123", &[5, 17, 23, 103, 103, 103, 103, 103, 13, 105]),
(b"12345", &[5, 17, 23, 45, 103, 103, 103, 103, 44, 45]),
(b"1234567", &[5, 17, 23, 45, 67, 103, 103, 103, 42, 61]),
(b"12345678901", &[5, 17, 23, 45, 67, 89, 1, 103, 91, 25]),
];
for &(input, expected) in cases {
let cws = encode_cws_digit_with_shift_b(input).unwrap_or_else(|e| {
panic!(
"encode_cws_digit_with_shift_b({:?}) failed: {e:?}",
std::str::from_utf8(input).unwrap_or("<non-utf8>"),
)
});
assert_eq!(
cws,
expected,
"encode_cws_digit_with_shift_b({:?})",
std::str::from_utf8(input).unwrap_or("<non-utf8>"),
);
}
}
#[test]
fn encode_cws_digit_with_shift_b_symbol_size_boundary_pinned() {
assert_eq!(
encode_cws_digit_with_shift_b(b"1234567890123").expect("len13 ok"),
vec![5, 17, 23, 45, 67, 89, 1, 23, 13, 74],
"len=13 stays r=2; `- → +` mutant would push to r=3"
);
assert_eq!(
encode_cws_digit_with_shift_b(b"123456789012345").expect("len15 ok"),
vec![12, 17, 23, 45, 67, 89, 1, 23, 45, 103, 103, 103, 103, 63, 104],
"len=15 needs r=3; `+ → *` and `/ → %` mutants would collapse to r=2"
);
}
#[test]
fn encode_cws_digit_with_shift_b_rejects_invalid_inputs() {
match encode_cws_digit_with_shift_b(b"").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"empty arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("empty input"),
"empty arm missing `empty input` predicate: {msg}"
);
assert!(
!msg.contains("mode-5"),
"empty arm leaked mode-5 diagnostic: {msg}"
);
}
other => panic!("empty mode-5 input should reject as InvalidData, got {other:?}"),
}
match encode_cws_digit_with_shift_b(b"1234").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k mode-5 path"),
"even-length arm missing `code16k mode-5 path` prefix: {msg}"
);
assert!(
msg.contains("needs odd length"),
"even-length arm missing `needs odd length` predicate: {msg}"
);
assert!(
msg.contains("got 4 digits"),
"even-length arm missing `got 4 digits` count echo: {msg}"
);
assert!(
msg.contains("encode_cws_digit_only"),
"even-length arm missing remediation hint pointing at digit-only path: {msg}"
);
}
other => panic!("4-digit even mode-5 should reject as InvalidData, got {other:?}"),
}
match encode_cws_digit_with_shift_b(b"12A").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k mode-5 path:"),
"non-digit arm missing prefix: {msg}"
);
assert!(
msg.contains("non-digit byte"),
"non-digit arm missing `non-digit byte` predicate: {msg}"
);
assert!(
msg.contains("0x41"),
"non-digit arm missing hex echo `0x41` for 'A': {msg}"
);
assert!(
msg.contains("at position 2"),
"non-digit arm missing `at position 2`: {msg}"
);
}
other => panic!("`12A` should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn encode_cws_top_level_routes_to_sub_encoders() {
let cws = encode_cws(b"1234")
.expect("encode_cws(b\"1234\") (4-digit even → mode 2 / digit-pair path) must succeed");
assert_eq!(cws, vec![2, 12, 34, 103, 103, 103, 103, 103, 36, 11]);
let cws = encode_cws(b"12345")
.expect("encode_cws(b\"12345\") (5-digit odd → mode 5 / digit-pair-plus-trailing path) must succeed");
assert_eq!(cws, vec![5, 17, 23, 45, 103, 103, 103, 103, 44, 45]);
let cws = encode_cws(b"ABC")
.expect("encode_cws(b\"ABC\") (pure-uppercase → mode 1 / text path) must succeed");
assert_eq!(cws, vec![1, 33, 34, 35, 103, 103, 103, 103, 78, 51]);
let cws = encode_cws(b"Hello")
.expect("encode_cws(b\"Hello\") (mixed-case → mode 1 / text path) must succeed");
assert_eq!(cws, vec![1, 40, 69, 76, 76, 79, 103, 103, 7, 58]);
let cws = encode_cws(b"Hello123").unwrap();
assert_eq!(
cws,
vec![8, 40, 69, 76, 76, 79, 17, 18, 19, 103, 103, 103, 103, 56, 26]
);
}
#[test]
fn encode_cws_rejects_empty() {
match encode_cws(b"").unwrap_err() {
Error::InvalidData(msg) => {
assert!(msg.contains("code16k:"), "missing `code16k:` prefix: {msg}");
assert!(
msg.contains("empty input"),
"missing `empty input` predicate: {msg}"
);
assert!(
!msg.contains("ceiling")
&& !msg.contains("mode-A path")
&& !msg.contains("mode-B path")
&& !msg.contains("mixed-mode"),
"empty arm leaked downstream-mode diagnostic: {msg}"
);
}
other => panic!("empty encode_cws should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn encode_cws_accepts_mixed_and_high_byte_inputs() {
let cws = encode_cws(b"\0Hello").expect(
"encode_cws(b\"\\0Hello\") (mid-message A↔B latch: NUL forces Mode A, 'Hello' lowercase forces Mode B) must succeed",
);
assert!(!cws.is_empty(), "expected mixed encoding to succeed");
let mut high = b"abc".to_vec();
high.push(0xC8);
let cws = encode_cws(&high).expect(
"encode_cws(b\"abc\\xC8\") (FN4 high-byte escape: 0xC8 > 127 forces FN4 prefix) must succeed",
);
assert!(!cws.is_empty(), "expected FN4 encoding to succeed");
}
#[test]
fn encode_cws_mode_a_matches_text_only_structurally() {
let cws = encode_cws_mode_a(b"ABC").expect(
"encode_cws_mode_a(b\"ABC\") (Mode A path: r=2 → row indicator=(r-2)*7+mode=0; uppercase shares rows 33-35 with Mode B) must succeed",
);
assert_eq!(cws[0], 0); assert_eq!(&cws[1..4], &[33, 34, 35][..]);
assert_eq!(&cws[4..8], &[103, 103, 103, 103][..]); assert_eq!(cws.len(), 1 + 7 + 2); }
#[test]
fn encode_cws_routes_control_bytes_through_mode_a() {
let cws = encode_cws(b"A\tB").unwrap();
assert_eq!(cws[0], 0); assert_eq!(cws[1], 33); assert_eq!(cws[2], 73); assert_eq!(cws[3], 34); assert_eq!(&cws[4..8], &[103, 103, 103, 103][..]);
assert_eq!(cws.len(), 1 + 7 + 2);
}
#[test]
fn encode_cws_mode_a_rejects_lowercase() {
match encode_cws_mode_a(b"abc").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k mode-A path:"),
"lowercase arm missing `code16k mode-A path:` prefix: {msg}"
);
assert!(
msg.contains("byte 0x61"),
"lowercase arm missing hex echo `byte 0x61` for 'a': {msg}"
);
assert!(
msg.contains("at position 0"),
"lowercase arm missing `at position 0`: {msg}"
);
assert!(
msg.contains("isn't A-encodable"),
"lowercase arm missing `isn't A-encodable` predicate: {msg}"
);
}
other => panic!("`abc` mode-A should reject as InvalidData, got {other:?}"),
}
match encode_cws_mode_a(b"").unwrap_err() {
Error::InvalidData(msg) => {
assert!(
msg.contains("code16k:"),
"mode-A empty arm missing `code16k:` prefix: {msg}"
);
assert!(
msg.contains("empty input"),
"mode-A empty arm missing `empty input` predicate: {msg}"
);
assert!(
!msg.contains("isn't A-encodable"),
"mode-A empty arm leaked per-byte mode-A diagnostic: {msg}"
);
}
other => panic!("empty mode-A should reject as InvalidData, got {other:?}"),
}
}
#[test]
fn mixed_mode_b_with_trailing_control_byte_swa_latch() {
let (mode, cws) = encode_data_cws_mixed(b"Hello\x01").expect(
"encode_data_cws_mixed(b\"Hello\\x01\") (mode B start → SWA latch → trailing 0x01 in set A) must succeed",
);
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![40, 69, 76, 76, 79, 101, 65]);
}
#[test]
fn mixed_mode_b_with_two_trailing_control_bytes_swa_latch() {
let (mode, cws) = encode_data_cws_mixed(b"Hi\x01\x02").expect(
"encode_data_cws_mixed(b\"Hi\\x01\\x02\") (mode B start → SWA latch → 0x01+0x02 in set A) must succeed",
);
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![40, 73, 101, 65, 66]);
}
#[test]
fn mixed_mode_b_with_mid_message_sa1_shift() {
let (mode, cws) = encode_data_cws_mixed(b"ab\x01cd").expect(
"encode_data_cws_mixed(b\"ab\\x01cd\") (mode B start → SA1 single-byte shift for 0x01 → back to set B) must succeed",
);
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![65, 66, 98, 65, 67, 68]);
}
#[test]
fn mixed_mode_a_from_start_for_control_byte_in_middle() {
let (mode, cws) = encode_data_cws_mixed(b"A\x01B").expect(
"encode_data_cws_mixed(b\"A\\x01B\") (all bytes A-encodable → mode A from start, mid-message control byte) must succeed",
);
assert_eq!(mode, MODE_A);
assert_eq!(cws, vec![33, 65, 34]);
}
#[test]
fn mixed_mode_a_from_start_for_leading_control_byte() {
let (mode, cws) = encode_data_cws_mixed(b"\x01ABC").expect(
"encode_data_cws_mixed(b\"\\x01ABC\") (leading control byte → mode A from start, ABC follows in A) must succeed",
);
assert_eq!(mode, MODE_A);
assert_eq!(cws, vec![65, 33, 34, 35]);
}
#[test]
fn mixed_extended_ascii_one_byte_via_fn4_shift() {
let (mode, cws) = encode_data_cws_mixed("A\u{0080}".as_bytes()).expect(
"encode_data_cws_mixed(\"A\\u{0080}\") (extended ASCII U+0080 → UTF-8 [0x41, 0xc2, 0x80] → FN4 marker insertion in mode A) must succeed",
);
assert_eq!(mode, MODE_A);
assert_eq!(cws, vec![33, 101, 34, 101, 64]);
}
#[test]
fn mixed_extended_ascii_with_following_byte_via_fn4() {
let (mode, cws) = encode_data_cws_mixed("A\u{00c1}B".as_bytes()).expect(
"encode_data_cws_mixed(\"A\\u{00c1}B\") (U+00C1 'Á' between ASCII → UTF-8 [0x41, 0xc3, 0x81, 0x42] → FN4 markers + back to A) must succeed",
);
assert_eq!(mode, MODE_A);
assert_eq!(cws, vec![33, 101, 35, 101, 65, 34]);
}
#[test]
fn fn4_insertion_is_identity_for_pure_ascii() {
let msg: Vec<i16> = b"Hello".iter().map(|&b| i16::from(b)).collect();
assert_eq!(insert_fn4_markers(&msg), msg);
}
#[test]
fn lookup_a_b_for_sentinel_or_byte_branch_dispatch() {
assert_eq!(
lookup_a_for_sentinel_or_byte(FN4),
Ok(FN4_FROM_A),
"FN4 in A → FN4_FROM_A (101)"
);
assert_eq!(
lookup_b_for_sentinel_or_byte(FN4),
Ok(FN4_FROM_B),
"FN4 in B → FN4_FROM_B (100)"
);
let err = lookup_a_for_sentinel_or_byte(-99).unwrap_err();
assert!(
matches!(err, Error::InvalidData(ref m) if m.contains("unsupported sentinel")),
"negative non-FN4 should error: {err:?}"
);
let err = lookup_b_for_sentinel_or_byte(-99).unwrap_err();
assert!(
matches!(err, Error::InvalidData(ref m) if m.contains("unsupported sentinel")),
"negative non-FN4 (B) should error: {err:?}"
);
assert_eq!(
lookup_a_for_sentinel_or_byte(0),
Ok(lookup_a(0).unwrap()),
"NUL is in set A via lookup_a"
);
assert_eq!(
lookup_b_for_sentinel_or_byte(b'a' as i16),
Ok(lookup_b(b'a').unwrap()),
"'a' is in set B via lookup_b"
);
let err = lookup_a_for_sentinel_or_byte(b'a' as i16).unwrap_err();
assert!(
matches!(err, Error::InvalidData(ref m) if m.contains("not A-encodable")),
"'a' must not be A-encodable: {err:?}"
);
let err = lookup_b_for_sentinel_or_byte(0).unwrap_err();
assert!(
matches!(err, Error::InvalidData(ref m) if m.contains("not B-encodable")),
"NUL must not be B-encodable: {err:?}"
);
}
#[test]
fn insert_fn4_markers_num_sa_run_counter_pinned() {
assert_eq!(
insert_fn4_markers(&[195i16, 195, 195, 195, 195, 65, 65, 65, 65, 65, 195]),
vec![-16, -16, 67, 67, 67, 67, 67, -16, -16, 65, 65, 65, 65, 65, -16, 67],
"5 high + 5 ASCII + high: num_sa[5]=5 hits threshold → double FN4 toggle back"
);
assert_eq!(
insert_fn4_markers(&[195i16, 195, 195, 195, 195, 65, 65, 65, 65, 195]),
vec![-16, -16, 67, 67, 67, 67, 67, -16, 65, -16, 65, -16, 65, -16, 65, 67],
"5 high + 4 ASCII + high: num_sa run=4 < 5 → single FN4 per byte"
);
assert_eq!(
insert_fn4_markers(&[195i16, 195, 195, 195, 195, 65, 65, 65, 65, 65, 65, 195]),
vec![-16, -16, 67, 67, 67, 67, 67, -16, -16, 65, 65, 65, 65, 65, 65, -16, 67],
"5 high + 6 ASCII + high: num_sa run=6 ≥ 5 → double FN4 toggle back"
);
}
#[test]
fn insert_fn4_markers_handles_high_bit_runs_and_threshold() {
assert_eq!(
insert_fn4_markers(&[195i16]),
vec![FN4, 67],
"single high byte at end: shift + stripped byte"
);
assert_eq!(
insert_fn4_markers(&[195i16, 195]),
vec![FN4, 67, FN4, 67],
"two high bytes at end: per-byte shifts (no toggle)"
);
assert_eq!(
insert_fn4_markers(&[195i16, 195, 195]),
vec![FN4, FN4, 67, 67, 67],
"three high bytes at end: double FN4 + ea toggle"
);
assert_eq!(
insert_fn4_markers(&[195i16, 88]),
vec![FN4, 67, 88],
"high+ascii: shift on first, ASCII unchanged"
);
}
#[test]
fn insert_fn4_markers_mid_stream_threshold_5_boundary() {
assert_eq!(
insert_fn4_markers(&[195i16, 195, 195, 195, 195, 88]),
vec![FN4, FN4, 67, 67, 67, 67, 67, FN4, 88],
"5 high bytes mid-stream + ASCII: run=5 at i=0 hits \
threshold=5 boundary → DOUBLE FN4 + toggle, then \
stripped run, then single FN4 + ASCII at end"
);
assert_eq!(
insert_fn4_markers(&[195i16, 195, 195, 195, 88]),
vec![FN4, 67, FN4, 67, FN4, 67, FN4, 67, 88],
"4 high bytes mid-stream + ASCII: run=4 < threshold=5 \
→ SINGLE FN4 per byte (no toggle); pins the 5-vs-4 \
threshold boundary"
);
}
#[test]
fn anotb_bnota_match_charmap() {
assert!(anotb(1));
assert!(!bnota(1));
assert!(!anotb(97));
assert!(bnota(97));
assert!(!anotb(65));
assert!(!bnota(65));
assert!(!anotb(FN4));
assert!(!bnota(FN4));
}
#[test]
fn mixed_wrapper_adds_row_indicator_and_checks() {
let cws = encode_cws_mixed(b"Hello\x01").unwrap();
assert_eq!(cws[0], 1, "row indicator for r=2 mode B is 1");
assert_eq!(&cws[1..8], &[40, 69, 76, 76, 79, 101, 65]);
assert_eq!(cws.len(), 1 + 7 + 2, "indicator + 7 cws + c1 + c2");
}
#[test]
fn dispatcher_routes_mixed_through_encode_cws_mixed() {
let cws_direct = encode_cws_mixed(b"Hello\x01").unwrap();
let cws_via_dispatcher = encode_cws(b"Hello\x01").unwrap();
assert_eq!(cws_direct, cws_via_dispatcher);
}
#[test]
fn initial_mode_pure_digits_even_picks_mode_c() {
let (mode, cws) = encode_data_cws_mixed(b"1234").unwrap();
assert_eq!(mode, MODE_C_FROM_START);
assert_eq!(cws, vec![12, 34]);
}
#[test]
fn initial_mode_pure_digits_odd_picks_mode_5() {
let (mode, cws) = encode_data_cws_mixed(b"12345").unwrap();
assert_eq!(mode, MODE_C_THEN_B);
assert_eq!(cws, vec![17, 23, 45]);
}
#[test]
fn initial_mode_one_b_byte_then_2_even_digits_picks_mode_5() {
let (mode, cws) = encode_data_cws_mixed(b"A12").unwrap();
assert_eq!(mode, MODE_C_THEN_B);
assert_eq!(cws, vec![33, 12]);
}
#[test]
fn initial_mode_one_b_byte_then_4_even_digits_picks_mode_5() {
let (mode, cws) = encode_data_cws_mixed(b"A1234").unwrap();
assert_eq!(mode, MODE_C_THEN_B);
assert_eq!(cws, vec![33, 12, 34]);
}
#[test]
fn initial_mode_one_b_byte_then_5_odd_digits_picks_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"A12345").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![33, 17, 23, 45]);
}
#[test]
fn initial_mode_two_b_bytes_then_2_even_digits_then_text_picks_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"AB12CD").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![33, 34, 12, 100, 35, 36]);
}
#[test]
fn initial_mode_two_b_bytes_then_4_even_digits_picks_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"AB1234").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![33, 34, 12, 34]);
}
#[test]
fn initial_mode_two_b_bytes_then_6_even_digits_picks_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"AB123456").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![33, 34, 12, 34, 56]);
}
#[test]
fn initial_mode_two_b_bytes_then_8_digits_then_text_picks_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"AB12345678CD").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![33, 34, 12, 34, 56, 78, 100, 35, 36]);
}
#[test]
fn initial_mode_two_b_bytes_then_4_digits_then_text_picks_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"AB1234CD").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![33, 34, 12, 34, 100, 35, 36]);
}
#[test]
fn initial_mode_lowercase_then_digits_then_lowercase() {
let (mode, cws) = encode_data_cws_mixed(b"a1234b").unwrap();
assert_eq!(mode, MODE_C_THEN_B);
assert_eq!(cws, vec![65, 12, 34, 100, 66]);
}
#[test]
fn initial_mode_lowercase_then_6_digits_then_lowercase() {
let (mode, cws) = encode_data_cws_mixed(b"a123456b").unwrap();
assert_eq!(mode, MODE_C_THEN_B);
assert_eq!(cws, vec![65, 12, 34, 56, 100, 66]);
}
#[test]
fn initial_mode_lowercase_then_3_odd_digits_then_lowercase_mode_6() {
let (mode, cws) = encode_data_cws_mixed(b"a123b").unwrap();
assert_eq!(mode, MODE_B_THEN_C);
assert_eq!(cws, vec![65, 17, 23, 100, 66]);
}
#[test]
fn numsscr_pure_digit_run() {
let msg: Vec<i16> = b"1234".iter().map(|&b| i16::from(b)).collect();
let (n, s) = numsscr(&msg, 0);
assert_eq!((n, s), (4, 4));
}
#[test]
fn numsscr_stops_at_non_digit() {
let msg: Vec<i16> = b"12Ab".iter().map(|&b| i16::from(b)).collect();
let (n, s) = numsscr(&msg, 0);
assert_eq!((n, s), (2, 2));
}
#[test]
fn numsscr_from_offset() {
let msg: Vec<i16> = b"AB1234".iter().map(|&b| i16::from(b)).collect();
let (n, s) = numsscr(&msg, 2);
assert_eq!((n, s), (4, 4));
}
#[test]
fn build_row_bits_invariant_layout_code16k() {
let row = build_row_bits(0, &[0u16; 5], &STOPENCS_ODD);
assert_eq!(row.len(), 81);
for i in 0..10 {
assert_eq!(row[i], 0, "quiet pos {i} must be 0");
}
assert_eq!(row[10], 1, "start bar at pos 10");
assert_eq!(row[80], 0, "trailing separator at pos 80");
let row_odd = build_row_bits(0, &[0u16; 5], &STOPENCS_ODD);
let row_even = build_row_bits(0, &[0u16; 5], &STOPENCS_EVEN);
if STOPENCS_ODD[0] != STOPENCS_EVEN[0] {
assert_ne!(
row_odd, row_even,
"different stopencs tables must produce different rows"
);
}
assert_eq!(row_even[0], 0);
assert_eq!(row_even[10], 1);
assert_eq!(row_even[80], 0);
}
#[test]
fn build_row_bits_pins_encs_indexing_with_non_zero_cws() {
assert_eq!(ENCS[0], "212222", "ENCS[0] table anchor");
assert_eq!(ENCS[1], "222122", "ENCS[1] table anchor");
let encs0_bits: [u8; 11] = [0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1];
let encs1_bits: [u8; 11] = [0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1];
let row_a = build_row_bits(0, &[1u16, 0, 0, 0, 0], &STOPENCS_ODD);
for (off, &want) in encs1_bits.iter().enumerate() {
let i = 18 + off;
assert_eq!(
row_a[i], want,
"case 1 chunk 0 (cw=1 → ENCS[1]): pos {i} should be {want}"
);
}
for (off, &want) in encs0_bits.iter().enumerate() {
let i = 29 + off;
assert_eq!(
row_a[i], want,
"case 1 chunk 1 (cw=0 → ENCS[0]): pos {i} should be {want}"
);
}
let row_b = build_row_bits(0, &[0u16, 1, 0, 0, 0], &STOPENCS_ODD);
for (off, &want) in encs0_bits.iter().enumerate() {
let i = 18 + off;
assert_eq!(
row_b[i], want,
"case 2 chunk 0 (cw=0 → ENCS[0]): pos {i} should be {want}"
);
}
for (off, &want) in encs1_bits.iter().enumerate() {
let i = 29 + off;
assert_eq!(
row_b[i], want,
"case 2 chunk 1 (cw=1 → ENCS[1]): pos {i} should be {want}"
);
}
assert_ne!(
&row_a[18..29],
&row_b[18..29],
"moving non-zero cw from position 0 to position 1 must shift the divergence"
);
}
#[test]
fn build_seprow_10_zeros_then_70_ones_then_zero() {
let row = build_seprow();
assert_eq!(row.len(), 81);
for i in 0..10 {
assert_eq!(row[i], 0, "leading pos {i} must be 0");
}
for i in 10..80 {
assert_eq!(row[i], 1, "middle pos {i} must be 1");
}
assert_eq!(row[80], 0, "trailing pos 80 must be 0");
assert_eq!(row.iter().filter(|&&v| v == 0).count(), 11);
assert_eq!(row.iter().filter(|&&v| v == 1).count(), 70);
}
#[test]
fn pair_codeword_basic_pairs() {
assert_eq!(pair_codeword(b'1', b'2'), 12);
assert_eq!(pair_codeword(b'0', b'0'), 0);
assert_eq!(pair_codeword(b'9', b'9'), 99);
}
#[test]
fn lookup_a_b_for_sentinel_or_byte_fn4_negative_and_byte_forwarding() {
assert_eq!(
lookup_a_for_sentinel_or_byte(FN4).unwrap(),
FN4_FROM_A,
"FN4 in mode A → FN4_FROM_A (101)"
);
assert_eq!(lookup_a_for_sentinel_or_byte(FN4).unwrap(), 101);
assert_eq!(
lookup_b_for_sentinel_or_byte(FN4).unwrap(),
FN4_FROM_B,
"FN4 in mode B → FN4_FROM_B (100)"
);
assert_eq!(lookup_b_for_sentinel_or_byte(FN4).unwrap(), 100);
assert_ne!(
lookup_a_for_sentinel_or_byte(FN4).unwrap(),
lookup_b_for_sentinel_or_byte(FN4).unwrap(),
);
for &neg in &[-1_i16, -2, -100] {
match lookup_a_for_sentinel_or_byte(neg) {
Err(Error::InvalidData(msg)) => {
assert!(
msg.contains("code16k mixed encoder:"),
"lookup_a: must carry the symbology prefix; got {msg:?}"
);
assert!(
msg.contains("unsupported sentinel"),
"lookup_a: must carry the predicate; got {msg:?}"
);
assert!(
msg.contains(&format!("sentinel {neg}")),
"lookup_a: must echo `{{c}}` ({neg}); got {msg:?}"
);
assert!(
msg.contains("only FN4 is wired today"),
"lookup_a: must carry the FN4 remediation hint; got {msg:?}"
);
}
other => panic!("expected InvalidData for c={neg}, got {other:?}"),
}
match lookup_b_for_sentinel_or_byte(neg) {
Err(Error::InvalidData(msg)) => {
assert!(
msg.contains("code16k mixed encoder:"),
"lookup_b: must carry the symbology prefix; got {msg:?}"
);
assert!(
msg.contains("unsupported sentinel"),
"lookup_b: must carry the predicate; got {msg:?}"
);
assert!(
msg.contains(&format!("sentinel {neg}")),
"lookup_b: must echo `{{c}}` ({neg}); got {msg:?}"
);
assert!(
msg.contains("only FN4 is wired today"),
"lookup_b: must carry the FN4 remediation hint; got {msg:?}"
);
}
other => panic!("expected InvalidData for c={neg}, got {other:?}"),
}
}
assert_eq!(
lookup_a_for_sentinel_or_byte(b'A' as i16).unwrap(),
33,
"A-encodable byte 'A' forwards to lookup_a row 33"
);
assert_eq!(lookup_b_for_sentinel_or_byte(b'A' as i16).unwrap(), 33);
let err =
lookup_a_for_sentinel_or_byte(b'a' as i16).expect_err("'a' (0x61) is not A-encodable");
let Error::InvalidData(msg) = err else {
panic!("lookup_a('a') must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("code16k") && msg.contains("0x61") && msg.contains("not A-encodable"),
"lookup_a('a') diagnostic must carry symbology tag + byte echo + 'not A-encodable'; \
got {msg:?}"
);
assert!(
!msg.contains("B-encodable"),
"lookup_a diagnostic must NOT leak the B-arm text (cross-arm swap guard); got {msg:?}"
);
assert_eq!(lookup_b_for_sentinel_or_byte(b'a' as i16).unwrap(), 65);
assert_eq!(
lookup_a_for_sentinel_or_byte(0).unwrap(),
64,
"NUL is A-encodable at row 64"
);
let err = lookup_b_for_sentinel_or_byte(0).expect_err("NUL (0x00) is not B-encodable");
let Error::InvalidData(msg) = err else {
panic!("lookup_b(0) must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("code16k") && msg.contains("0x00") && msg.contains("not B-encodable"),
"lookup_b(NUL) diagnostic must carry symbology tag + byte echo + 'not B-encodable'; \
got {msg:?}"
);
assert!(
!msg.contains("not A-encodable"),
"lookup_b diagnostic must NOT leak the A-arm text (cross-arm swap guard); got {msg:?}"
);
match lookup_a_for_sentinel_or_byte(b'a' as i16) {
Err(Error::InvalidData(msg)) => {
assert!(
msg.contains("code16k mixed encoder:"),
"A-arm: missing `code16k mixed encoder:` prefix: {msg}"
);
assert!(
msg.contains("byte 0x61"),
"A-arm: missing `byte 0x61` hex echo (0x61 == 'a'): {msg}"
);
assert!(
msg.contains("not A-encodable"),
"A-arm: missing `not A-encodable` predicate: {msg}"
);
assert!(
!msg.contains("not B-encodable"),
"A-arm: B-arm diagnostic leaked into A-arm reject: {msg}"
);
}
other => panic!("expected InvalidData, got {other:?}"),
}
match lookup_b_for_sentinel_or_byte(0) {
Err(Error::InvalidData(msg)) => {
assert!(
msg.contains("code16k mixed encoder:"),
"B-arm: missing `code16k mixed encoder:` prefix: {msg}"
);
assert!(
msg.contains("byte 0x00"),
"B-arm: missing `byte 0x00` hex echo (NUL): {msg}"
);
assert!(
msg.contains("not B-encodable"),
"B-arm: missing `not B-encodable` predicate: {msg}"
);
assert!(
!msg.contains("not A-encodable"),
"B-arm: A-arm diagnostic leaked into B-arm reject: {msg}"
);
}
other => panic!("expected InvalidData, got {other:?}"),
}
}
#[test]
fn mixed_mode_b_with_sa2_two_byte_shift() {
let (mode, cws) = encode_data_cws_mixed(b"a\x01\x02b").unwrap();
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![65, 104, 65, 66, 66]);
}
#[test]
fn mixed_mode_b_with_sa2_amid_lowercase() {
let (mode, cws) = encode_data_cws_mixed(b"ab\x01\x02cd").unwrap();
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![65, 66, 104, 65, 66, 67, 68]);
}
#[test]
fn mid_message_swc_latch_after_long_text() {
let (mode, cws) = encode_data_cws_mixed(b"ABCDE12345").unwrap();
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![33, 34, 35, 36, 37, 17, 99, 23, 45]);
}
#[test]
fn mid_message_swc_latch_lowercase() {
let (mode, cws) = encode_data_cws_mixed(b"abcde12345").unwrap();
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![65, 66, 67, 68, 69, 17, 99, 23, 45]);
}
#[test]
fn mid_message_swc_with_4_digits_even() {
let (mode, cws) = encode_data_cws_mixed(b"abcd1234").unwrap();
assert_eq!(mode, MODE_B);
assert_eq!(cws, vec![65, 66, 67, 68, 99, 12, 34]);
}
#[test]
fn mode_c_sb1_shift_for_single_text_byte() {
let (mode, cws) = encode_data_cws_mixed(b"12X12").unwrap();
assert_eq!(mode, MODE_C_FROM_START);
assert_eq!(cws, vec![12, 104, 56, 12]);
}
#[test]
fn mode_c_sb1_shift_longer_payload() {
let (mode, cws) = encode_data_cws_mixed(b"1234X1234").unwrap();
assert_eq!(mode, MODE_C_FROM_START);
assert_eq!(cws, vec![12, 34, 104, 56, 12, 34]);
}
#[test]
fn mid_message_sc2_from_a() {
let (mode, cws) = encode_data_cws_mixed(b"\x011234B").unwrap();
assert_eq!(mode, MODE_A);
assert_eq!(cws, vec![65, 105, 12, 34, 34]);
}
#[test]
fn mid_message_sc3_from_a() {
let (mode, cws) = encode_data_cws_mixed(b"\x01123456B").unwrap();
assert_eq!(mode, MODE_A);
assert_eq!(cws, vec![65, 106, 12, 34, 56, 34]);
}
#[test]
fn codeword_constants_match_charmaps() {
assert_eq!(SB1_FROM_A, 98);
assert_eq!(SA1_FROM_B, 98);
assert_eq!(SWC_FROM_A_OR_B, 99);
assert_eq!(SWB_FROM_A, 100);
assert_eq!(FN4_FROM_B, 100);
assert_eq!(SWB_FROM_C, 100);
assert_eq!(FN4_FROM_A, 101);
assert_eq!(SWA_FROM_B, 101);
assert_eq!(SWA_FROM_C, 101);
assert_eq!(SB2_FROM_A, 104);
assert_eq!(SA2_FROM_B, 104);
assert_eq!(SB1_FROM_C, 104);
assert_eq!(SC2_FROM_A, 105);
assert_eq!(SC2_FROM_B, 105);
assert_eq!(SB2_FROM_C, 105);
assert_eq!(SC3_FROM_A, 106);
assert_eq!(SC3_FROM_B, 106);
assert_eq!(SB3_FROM_C, 106);
}
#[test]
fn leading_row_indicator_known_values() {
assert_eq!(leading_row_indicator(2, 0), 0);
assert_eq!(leading_row_indicator(2, 6), 6);
assert_eq!(leading_row_indicator(3, 0), 7);
assert_eq!(leading_row_indicator(3, 4), 11);
assert_eq!(leading_row_indicator(16, 6), 104);
}
#[test]
fn compute_checksums_simple_three_codewords() {
let (c1, c2) = compute_checksums(&[10, 20, 30]);
assert_eq!(c1, 93);
assert_eq!(c2, 84);
}
#[test]
fn pick_symbol_size_per_pair_count() {
assert_eq!(pick_symbol_size(1), Some((2, 7)));
assert_eq!(pick_symbol_size(7), Some((2, 7)));
assert_eq!(pick_symbol_size(8), Some((3, 12)));
assert_eq!(pick_symbol_size(12), Some((3, 12)));
assert_eq!(pick_symbol_size(17), Some((4, 17)));
}
#[test]
fn lookup_a_or_b_for_sentinel_or_byte_arms() {
assert_eq!(
lookup_a_for_sentinel_or_byte(FN4).unwrap(),
FN4_FROM_A,
"FN4 → FN4_FROM_A (101)"
);
let err = lookup_a_for_sentinel_or_byte(-1).expect_err("non-FN4 negative → Err");
let Error::InvalidData(msg) = err else {
panic!("lookup_a(-1) must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("unsupported sentinel") && msg.contains("-1"),
"lookup_a(-1) diagnostic must pin 'unsupported sentinel' + value echo; got {msg:?}"
);
let got = lookup_a_for_sentinel_or_byte(b'A' as i16).unwrap();
assert_eq!(got, lookup_a(b'A').unwrap(), "'A' delegates to lookup_a");
assert!(
lookup_a_for_sentinel_or_byte(0).is_ok(),
"byte 0 must be accepted in set A (kills `< 0` → `<= 0`)"
);
let err = lookup_a_for_sentinel_or_byte(b'a' as i16).expect_err("non-A byte 'a' → Err (A)");
let Error::InvalidData(msg) = err else {
panic!("lookup_a('a') must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("code16k mixed encoder:"),
"missing `code16k mixed encoder:` prefix: {msg}"
);
assert!(
msg.contains("not A-encodable"),
"missing `not A-encodable` predicate: {msg}"
);
assert!(
msg.contains("0x61"),
"missing hex echo `0x61` for byte 'a' (97): {msg}"
);
assert!(
!msg.contains("unsupported sentinel"),
"wrong arm — sentinel diagnostic leaked into byte path: {msg}"
);
assert_eq!(
lookup_b_for_sentinel_or_byte(FN4).unwrap(),
FN4_FROM_B,
"FN4 → FN4_FROM_B (100)"
);
assert_ne!(
FN4_FROM_A, FN4_FROM_B,
"kill FN4_FROM_A↔FN4_FROM_B arm swap"
);
let err = lookup_b_for_sentinel_or_byte(-1).expect_err("non-FN4 negative → Err (B)");
let Error::InvalidData(msg) = err else {
panic!("lookup_b(-1) must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("unsupported sentinel") && msg.contains("-1"),
"lookup_b(-1) diagnostic must pin 'unsupported sentinel' + value echo; got {msg:?}"
);
let got = lookup_b_for_sentinel_or_byte(b'a' as i16).unwrap();
assert_eq!(got, lookup_b(b'a').unwrap(), "'a' delegates to lookup_b");
let err = lookup_b_for_sentinel_or_byte(1).expect_err("control byte 0x01 → Err (B)");
let Error::InvalidData(msg) = err else {
panic!("lookup_b(1) must yield InvalidData; got {err:?}");
};
assert!(
msg.contains("code16k mixed encoder:"),
"missing `code16k mixed encoder:` prefix: {msg}"
);
assert!(
msg.contains("not B-encodable"),
"missing `not B-encodable` predicate: {msg}"
);
assert!(
msg.contains("0x01"),
"missing hex echo `0x01` for control byte 1: {msg}"
);
assert!(
!msg.contains("unsupported sentinel"),
"wrong arm — sentinel diagnostic leaked into byte path: {msg}"
);
}
#[test]
fn numsscr_counts_digits_and_fn1_alignment() {
assert_eq!(numsscr(&[], 0), (0, 0), "empty → (0, 0)");
let digits = [b'0' as i16, b'1' as i16, b'2' as i16];
assert_eq!(numsscr(&digits, 0), (3, 3), "3 digits → (3, 3)");
let digit9 = [b'9' as i16];
assert_eq!(numsscr(&digit9, 0), (1, 1), "single '9' → (1, 1)");
assert_eq!(
numsscr(&[FN1], 0),
(1, 2),
"FN1 at even s=0: s++ + n+=1 + s+=1 → (1, 2)"
);
let fn1_then_digit = [FN1, b'0' as i16];
assert_eq!(numsscr(&fn1_then_digit, 0), (2, 3), "FN1 + digit → (2, 3)");
let digit_then_fn1 = [b'0' as i16, FN1];
assert_eq!(
numsscr(&digit_then_fn1, 0),
(1, 1),
"digit then FN1 at odd s=1: FN1 breaks → (1, 1)"
);
let two_digits_fn1 = [b'0' as i16, b'1' as i16, FN1];
assert_eq!(
numsscr(&two_digits_fn1, 0),
(3, 4),
"two digits then FN1 at even s=2 → (3, 4)"
);
let just_letter = [b'A' as i16];
assert_eq!(
numsscr(&just_letter, 0),
(0, 0),
"single 'A' breaks → (0, 0)"
);
let digit_letter = [b'0' as i16, b'A' as i16];
assert_eq!(
numsscr(&digit_letter, 0),
(1, 1),
"digit then 'A' breaks → (1, 1)"
);
let skip_then_digits = [b'A' as i16, b'1' as i16, b'2' as i16];
assert_eq!(
numsscr(&skip_then_digits, 1),
(2, 2),
"start at p=1: 2 digits → (2, 2)"
);
assert_eq!(numsscr(&[b'0' as i16], 1), (0, 0), "p past end → (0, 0)");
assert_eq!(
numsscr(&[b'0' as i16], 99),
(0, 0),
"p way past end → (0, 0)"
);
let digit0 = [b'0' as i16];
let digit9 = [b'9' as i16];
assert_eq!(numsscr(&digit0, 0), (1, 1), "'0' accepted");
assert_eq!(numsscr(&digit9, 0), (1, 1), "'9' accepted");
let slash = [b'/' as i16];
let colon = [b':' as i16];
assert_eq!(
numsscr(&slash, 0),
(0, 0),
"'/' (47) just below '0' → (0, 0)"
);
assert_eq!(
numsscr(&colon, 0),
(0, 0),
"':' (58) just above '9' → (0, 0)"
);
let mixed = [b'0' as i16, FN1, b'1' as i16, b'2' as i16];
assert_eq!(
numsscr(&mixed, 0),
(1, 1),
"['0', FN1, ...]: FN1 at odd s=1 breaks → (1, 1)"
);
assert_eq!(
numsscr(&mixed, 1),
(3, 4),
"from p=1: FN1 at s=0 even, then 2 digits → (3, 4)"
);
let two_fn1 = [FN1, FN1];
assert_eq!(
numsscr(&two_fn1, 0),
(2, 4),
"two FN1s, both at even slots → (2, 4)"
);
}
#[test]
fn code16k_in_a_in_b_anotb_bnota_per_byte_class() {
for c in [
b'0' as i16,
b'9' as i16,
b'A' as i16,
b'Z' as i16,
b' ' as i16,
] {
assert!(in_a(c), "{c} ({}): in_a", char::from_u32(c as u32).unwrap());
assert!(in_b(c), "{c} ({}): in_b", char::from_u32(c as u32).unwrap());
assert!(!anotb(c), "{c}: anotb=false (in both)");
assert!(!bnota(c), "{c}: bnota=false (in both)");
}
for c in [0_i16, 1, 9, 10, 13, 31] {
assert!(in_a(c), "control {c}: in_a");
assert!(!in_b(c), "control {c}: NOT in_b");
assert!(anotb(c), "control {c}: anotb (A only)");
assert!(!bnota(c), "control {c}: NOT bnota");
}
for c in [
b'`' as i16,
b'a' as i16,
b'm' as i16,
b'z' as i16,
b'{' as i16,
b'~' as i16,
127,
] {
assert!(!in_a(c), "lowercase {c}: NOT in_a");
assert!(in_b(c), "lowercase {c}: in_b");
assert!(!anotb(c), "lowercase {c}: NOT anotb");
assert!(bnota(c), "lowercase {c}: bnota (B only)");
}
for c in [FN1, FN2, FN3, FN4, -1, -100] {
assert!(!in_a(c), "negative {c}: NOT in_a (b >= 0 guard)");
assert!(!in_b(c), "negative {c}: NOT in_b");
assert!(!anotb(c));
assert!(!bnota(c));
}
assert!(in_a(0), "NUL (0) IS in A (catches `b > 0` mutant)");
assert!(!in_b(0), "NUL (0) NOT in B");
assert!(anotb(0));
for b in 0_i16..=127 {
assert!(
!(anotb(b) && bnota(b)),
"anotb({b}) && bnota({b}) — must be mutually exclusive"
);
}
for b in (-20_i16..0).chain([FN1, FN2, FN3, FN4]) {
assert!(!anotb(b));
assert!(!bnota(b));
}
for b in 128_i16..=255 {
assert!(!in_a(b), "high byte {b}: NOT in_a");
assert!(!in_b(b), "high byte {b}: NOT in_b");
assert!(!anotb(b), "high byte {b}: NOT anotb (col0 != b mutant)");
assert!(!bnota(b), "high byte {b}: NOT bnota (col1 != b mutant)");
}
}
#[test]
fn pick_initial_mode_arms() {
let cases: &[(&[u8], Cset, u16, usize, Vec<u16>)] = &[
(b"1234", Cset::C, MODE_C_FROM_START, 0, vec![]),
(
b"123",
Cset::C,
MODE_C_THEN_B,
1,
vec![lookup_b(b'1').unwrap()],
),
(
b"A1234",
Cset::C,
MODE_C_THEN_B,
1,
vec![lookup_b(b'A').unwrap()],
),
(
b"AB1234",
Cset::C,
MODE_B_THEN_C,
2,
vec![lookup_b(b'A').unwrap(), lookup_b(b'B').unwrap()],
),
(b"abc", Cset::B, MODE_B, 0, vec![]),
(b"", Cset::B, MODE_B, 0, vec![]),
];
for (input, want_cset, want_mode, want_offset, want_prefix) in cases {
let msg: Vec<i16> = input.iter().map(|&b| i16::from(b)).collect();
let (next_anotb, next_bnota) = compute_lookahead(&msg);
let (cset, mode, offset, prefix) = pick_initial_mode(&msg, &next_anotb, &next_bnota);
assert_eq!(cset, *want_cset, "Cset mismatch for input {input:?}");
assert_eq!(mode, *want_mode, "mode mismatch for input {input:?}");
assert_eq!(offset, *want_offset, "offset mismatch for input {input:?}");
assert_eq!(&prefix, want_prefix, "prefix mismatch for input {input:?}");
}
}
#[test]
fn pick_initial_mode_boundary_pinned() {
let cases: &[(&str, Vec<i16>, Cset, u16, usize, Vec<u16>)] = &[
(
"s0=1 single-digit prefix",
vec![49, 65], Cset::B,
MODE_B,
0,
vec![],
),
(
"s1=1 single-digit middle",
vec![65, 49, 66], Cset::B,
MODE_B,
0,
vec![],
),
(
"arm-4 outer msglen-or-msg[0]>=0 disjunct",
vec![-191, -191, 49, 50], Cset::B,
MODE_B,
0,
vec![],
),
(
"arm-4 outer msg[0]>=0-or-msg[1]>=0 disjunct",
vec![65, -191, 49, 50], Cset::B,
MODE_B,
0,
vec![],
),
];
for (label, msg, want_cset, want_mode, want_offset, want_prefix) in cases {
let (next_anotb, next_bnota) = compute_lookahead(msg);
let (cset, mode, offset, prefix) = pick_initial_mode(msg, &next_anotb, &next_bnota);
assert_eq!(cset, *want_cset, "Cset mismatch for case '{label}'");
assert_eq!(mode, *want_mode, "mode mismatch for case '{label}'");
assert_eq!(offset, *want_offset, "offset mismatch for case '{label}'");
assert_eq!(&prefix, want_prefix, "prefix mismatch for case '{label}'");
}
}
#[test]
fn encode_data_cws_mixed_exhaustive_brute_fingerprint() {
let mut acc: u64 = 0u64;
let mut mix = |p: &[u8]| {
acc = acc.wrapping_mul(1000003);
match encode_data_cws_mixed(p) {
Ok((mode, cws)) => {
acc = acc.wrapping_add(mode as u64).wrapping_mul(1000003);
acc = acc.wrapping_add(cws.len() as u64);
for (i, &cw) in cws.iter().enumerate() {
acc = acc.wrapping_add(
(cw as u64).wrapping_mul((i as u64 + 1).wrapping_mul(2654435761)),
);
acc = acc.wrapping_mul(31);
}
}
Err(_) => {
acc = acc.wrapping_add(0xDEAD);
}
}
};
let alpha6: [u8; 6] = [b'1', b'2', b'a', b'A', 0x01, 0xC1];
let mut buf = [0u8; 9];
for len in 1..=7usize {
for mut idx in 0..6usize.pow(len as u32) {
for slot in buf.iter_mut().take(len) {
*slot = alpha6[idx % 6];
idx /= 6;
}
mix(&buf[..len]);
}
}
let alpha4: [u8; 4] = [b'1', b'a', b'A', 0x01];
for len in 8..=9usize {
for mut idx in 0..4usize.pow(len as u32) {
for slot in buf.iter_mut().take(len) {
*slot = alpha4[idx % 4];
idx /= 4;
}
mix(&buf[..len]);
}
}
let cls: [&[u8]; 13] = [
b"", b"1", b"a", b"A", b"\x01", b"ab", b"aA", b"Aa", b"a1", b"1a", b"abc", b"a\x01",
b"\x01a",
];
for pre in cls.iter() {
for suf in cls.iter() {
for run in 1..=14usize {
let mut v = pre.to_vec();
for k in 0..run {
v.push(b'0' + (k % 10) as u8);
}
v.extend_from_slice(suf);
mix(&v);
let mut w = pre.to_vec();
for k in 0..run {
w.push(b'0' + (k % 10) as u8);
}
w.push(b'b');
w.extend_from_slice(suf);
mix(&w);
}
}
}
for plant in [b'a', b'A', 0x01u8, b'b'] {
for runlen in 4..=12usize {
for pos in 0..runlen {
let mut v: Vec<u8> = (0..runlen).map(|k| b'0' + (k % 10) as u8).collect();
v[pos] = plant;
mix(&v);
let mut w = vec![b'1', b'2'];
w.extend_from_slice(&v);
mix(&w);
}
}
}
let plants: [u8; 5] = [b'a', b'A', 0x01, b'b', b'X'];
let leads: [&[u8]; 4] = [b"", b"\x01", b"a", b"12"];
for lead in leads.iter() {
for &p0 in plants.iter() {
for &p1 in plants.iter() {
for &p2 in plants.iter() {
for tail in 0..=6usize {
let mut v = lead.to_vec();
v.push(p0);
v.push(p1);
v.push(p2);
for k in 0..tail {
v.push(b'0' + (k % 10) as u8);
}
mix(&v);
let mut w = lead.to_vec();
for k in 0..tail {
w.push(b'0' + (k % 10) as u8);
}
w.push(p0);
w.push(p1);
w.push(p2);
for k in 0..tail {
w.push(b'0' + (k % 10) as u8);
}
mix(&w);
}
}
}
}
}
assert_eq!(acc, 10871198214724739770, "sm brute corpus changed");
}
#[test]
fn pick_initial_mode_and_encode_corpus_fingerprint() {
let mut acc: u64 = 0;
let mut corpus: Vec<Vec<u8>> = Vec::new();
for n in 1..=20usize {
corpus.push((0..n).map(|i| b'0' + (i % 10) as u8).collect());
}
for n in 0..=12usize {
let mut v = vec![b'A'];
v.extend((0..n).map(|i| b'0' + (i % 10) as u8));
corpus.push(v);
let mut v2 = vec![b'A', b'B'];
v2.extend((0..n).map(|i| b'0' + (i % 10) as u8));
corpus.push(v2);
let mut v3 = vec![b'a'];
v3.extend((0..n).map(|i| b'0' + (i % 10) as u8));
corpus.push(v3);
}
for n in 1..=10usize {
let mut v: Vec<u8> = (0..n).map(|i| b'0' + (i % 10) as u8).collect();
v.extend_from_slice(b"AB");
corpus.push(v);
let mut v2: Vec<u8> = (0..n).map(|i| b'0' + (i % 10) as u8).collect();
v2.extend_from_slice(b"ab");
corpus.push(v2);
}
for p in &corpus {
if let Ok((mode, cws)) = encode_data_cws_mixed(p) {
acc = acc.wrapping_add(mode as u64).wrapping_mul(1099511628211);
for (i, &cw) in cws.iter().enumerate() {
acc = acc.wrapping_add((cw as u64).wrapping_mul(i as u64 + 7));
}
acc = acc.wrapping_add(cws.len() as u64).wrapping_mul(31);
}
let msg = insert_fn4_markers(&p.iter().map(|&b| i16::from(b)).collect::<Vec<_>>());
let (na, nb) = compute_lookahead(&msg);
let (cset, mode, idx, pre) = pick_initial_mode(&msg, &na, &nb);
acc = acc
.wrapping_mul(31)
.wrapping_add(cset as u64)
.wrapping_mul(131)
.wrapping_add(mode as u64)
.wrapping_mul(137)
.wrapping_add(idx as u64);
for (i, &cw) in pre.iter().enumerate() {
acc = acc.wrapping_add((cw as u64).wrapping_mul(i as u64 + 3));
}
}
assert_eq!(acc, 7864529928009818353, "corpus fp changed");
}
fn cws_fingerprint(payload: &[u8]) -> (u16, usize, u64) {
let (mode, cws) = encode_data_cws_mixed(payload).expect("encode ok");
let mut s: u64 = 0;
for (i, &cw) in cws.iter().enumerate() {
s = s.wrapping_add(
(cw as u64).wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(mode, cws.len(), s)
}
#[test]
fn encode_data_cws_mixed_full_pinning() {
let cases: &[(&str, &[u8], (u16, usize, u64))] = &[
("alpha-only-B", b"ABCDEF", (1, 6, 2025334485643)),
("digit-only-C", b"12345678", (2, 4, 1486484026160)),
("mixed-digit-mid", b"AB12CD34EF", (4, 10, 5133678761774)),
("single-char", b"A", (1, 1, 87596380113)),
("ctrl-A-mode", b"\x01\x02BC", (0, 4, 1165297299079)),
("longer-mixed", b"Hello12345World", (1, 14, 18246591421114)),
("alpha-then-dig", b"ABC123", (1, 6, 1268820293758)),
("dig-then-alpha", b"123ABC", (5, 6, 2322631290875)),
("odd-digit-run", b"AB123CD", (1, 7, 2073114329341)),
("lowercase-B", b"abcdef", (1, 6, 3809115317035)),
];
for (name, p, want) in cases {
assert_eq!(cws_fingerprint(p), *want, "fingerprint changed for {name}");
}
}
#[test]
fn encode_data_cws_mixed_state_machine_fingerprint_pinned() {
fn fp(mode: u16, cws: &[u16]) -> (u16, usize, u64) {
let mut s: u64 = 0;
for (i, &cw) in cws.iter().enumerate() {
s = s.wrapping_add(
(cw as u64)
.wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
);
}
(mode, cws.len(), s)
}
let cases: &[(&str, &[u8], (u16, usize, u64))] = &[
("a1_pure_A", b"\x01\x02ABC", FP_SM_A1_PURE_A),
("a2_SB1_from_A", b"\x01ABCaDEF", FP_SM_A2_SB1),
("a3_SB2_from_A", b"\x01ABCabDEF", FP_SM_A3_SB2),
("a4_SWB_from_A", b"\x01ABCabcde", FP_SM_A4_SWB),
("a5_SC2_from_A", b"\x01AB1234CD", FP_SM_A5_SC2),
("a6_SC3_from_A", b"\x01AB123456CD", FP_SM_A6_SC3),
("a7_SWC_from_A", b"\x01AB12345678", FP_SM_A7_SWC),
("b1_pure_B", b"abcdef", FP_SM_B1_PURE_B),
("b2_SA1_from_B", b"abc\x01def", FP_SM_B2_SA1),
("b3_SA2_from_B", b"abc\x01\x02def", FP_SM_B3_SA2),
("b4_SWA_from_B", b"abc\x01\x02\x03\x04\x05", FP_SM_B4_SWA),
("b5_SC2_from_B", b"ab1234cd", FP_SM_B5_SC2),
("b6_SC3_from_B", b"ab123456cd", FP_SM_B6_SC3),
("b7_SWC_from_B", b"ab12345678", FP_SM_B7_SWC),
("c1_pure_C", b"12345678", FP_SM_C1_PURE_C),
("c2_SB1_from_C", b"12a3456", FP_SM_C2_SB1),
("c3_SB2_from_C", b"12ab345", FP_SM_C3_SB2),
("c4_SB3_from_C_v1", b"12abc345", FP_SM_C4_SB3_V1),
("c5_SB3_from_C_v2", b"12abc34", FP_SM_C5_SB3_V2),
("c6_SWA_from_C", b"1234ABC", FP_SM_C6_SWA),
("c7_SWB_from_C", b"1234abc", FP_SM_C7_SWB),
("x1_single_ctrl_A", b"\x01", FP_SM_X1_SINGLE_A),
("x2_FN4_in_A", "A\u{0080}".as_bytes(), FP_SM_X2_FN4_A),
("x3_FN4_round_B", "A\u{00c1}B".as_bytes(), FP_SM_X3_FN4_RT),
];
for (tag, input, want) in cases {
let (mode, cws) = encode_data_cws_mixed(input).unwrap_or_else(|e| {
panic!("encode_data_cws_mixed({tag}) must succeed; got Err: {e:?}")
});
let got = fp(mode, &cws);
assert_eq!(got, *want, "fingerprint changed for {tag}");
}
}
const FP_SM_A1_PURE_A: (u16, usize, u64) = (0, 5, 1611242506927);
const FP_SM_A2_SB1: (u16, usize, u64) = (0, 9, 5715000193433);
const FP_SM_A3_SB2: (u16, usize, u64) = (0, 10, 7235991884486);
const FP_SM_A4_SWB: (u16, usize, u64) = (0, 10, 9457754616443);
const FP_SM_A5_SC2: (u16, usize, u64) = (0, 8, 3848931853450);
const FP_SM_A6_SC3: (u16, usize, u64) = (0, 9, 5088553353837);
const FP_SM_A7_SWC: (u16, usize, u64) = (0, 8, 5067317867749);
const FP_SM_B1_PURE_B: (u16, usize, u64) = (1, 6, 3809115317035);
const FP_SM_B2_SA1: (u16, usize, u64) = (1, 8, 6811282162726);
const FP_SM_B3_SA2: (u16, usize, u64) = (1, 9, 8475613384873);
const FP_SM_B4_SWA: (u16, usize, u64) = (1, 9, 8380053697477);
const FP_SM_B5_SC2: (u16, usize, u64) = (4, 7, 4637299274467);
const FP_SM_B6_SC3: (u16, usize, u64) = (4, 8, 6004333691382);
const FP_SM_B7_SWC: (u16, usize, u64) = (4, 6, 2965004745037);
const FP_SM_C1_PURE_C: (u16, usize, u64) = (2, 4, 1486484026160);
const FP_SM_C2_SB1: (u16, usize, u64) = (2, 5, 2205836117391);
const FP_SM_C3_SB2: (u16, usize, u64) = (2, 6, 2781848677528);
const FP_SM_C4_SB3_V1: (u16, usize, u64) = (2, 8, 3790534266708);
const FP_SM_C5_SB3_V2: (u16, usize, u64) = (2, 6, 3243720499942);
const FP_SM_C6_SWA: (u16, usize, u64) = (2, 6, 2367756698812);
const FP_SM_C7_SWB: (u16, usize, u64) = (2, 6, 3641885864092);
const FP_SM_X1_SINGLE_A: (u16, usize, u64) = (0, 1, 172538324465);
const FP_SM_X2_FN4_A: (u16, usize, u64) = (0, 5, 2816356342421);
const FP_SM_X3_FN4_RT: (u16, usize, u64) = (0, 6, 3379096723753);
#[test]
fn code16k_sentinel_consts_pinned() {
assert_eq!(SWA, -1, "SWA sentinel must remain -1");
assert_eq!(SWB, -2, "SWB sentinel must remain -2");
assert_eq!(SWC, -3, "SWC sentinel must remain -3");
assert_eq!(SA1, -4, "SA1 sentinel must remain -4");
assert_eq!(SB1, -5, "SB1 sentinel must remain -5");
assert_eq!(SC1, -6, "SC1 sentinel must remain -6");
assert_eq!(SA2, -7, "SA2 sentinel must remain -7");
assert_eq!(SB2, -8, "SB2 sentinel must remain -8");
assert_eq!(SC2, -9, "SC2 sentinel must remain -9");
assert_eq!(PAD, -10, "PAD sentinel must remain -10");
assert_eq!(SB3, -11, "SB3 sentinel must remain -11");
assert_eq!(SC3, -12, "SC3 sentinel must remain -12");
assert_eq!(FN1, -13, "FN1 sentinel must remain -13");
assert_eq!(FN2, -14, "FN2 sentinel must remain -14");
assert_eq!(FN3, -15, "FN3 sentinel must remain -15");
assert_eq!(FN4, -16, "FN4 sentinel must remain -16");
}
#[test]
fn code16k_equivalence_notes() {
for raw in [&b"\x01ABC"[..], b"abc", b"a\x01b", b"12ab", b"ABCDE"] {
let msg = insert_fn4_markers(&raw.iter().map(|&b| i16::from(b)).collect::<Vec<_>>());
let (na, nb) = compute_lookahead(&msg);
let n = msg.len();
assert_eq!(na[n], 9999, "next_anotb sentinel at len");
assert_eq!(nb[n], 9999, "next_bnota sentinel at len");
assert!(!abeforeb(n, &na, &nb), "abeforeb(len) must be false");
assert!(!bbeforea(n, &na, &nb), "bbeforea(len) must be false");
}
for raw in [&b"12"[..], b"12a", b"abc34"] {
let msg = insert_fn4_markers(&raw.iter().map(|&b| i16::from(b)).collect::<Vec<_>>());
let n = msg.len();
assert_eq!(numsscr(&msg, n), (0, 0), "numsscr(len) is (0,0)");
}
for n in (1..=33).step_by(2) {
assert_eq!((n - 1) / 2, n / 2, "odd n: (n-1)/2 == n/2 for n={n}");
}
{
let mut a: Vec<u16> = Vec::new();
let mut b: Vec<u16> = Vec::with_capacity(999);
for v in [1u16, 2, 3, 103, 104] {
a.push(v);
b.push(v);
}
assert_eq!(a, b, "reserve must not affect contents");
}
let (mode, cws) = encode_data_cws_mixed(b"12aaA34").expect("ok");
assert_eq!(mode, MODE_C_FROM_START);
assert_eq!(
cws,
vec![12, SB3_FROM_C, 65, 65, 33, 34],
"'12aaA34' shifts the 3-B-byte run 'aaA' to C via SB3 (106) \
then resumes the '34' pair — the layout both SB3-in-C variants \
encode identically"
);
let probe: [i16; 4] = [b'1' as i16, b'2' as i16, b'3' as i16, b'4' as i16];
for p in 0..probe.len() {
let (_, s) = numsscr(&probe, p);
assert!(
p + s <= probe.len(),
"numsscr(p).s can never exceed the bytes remaining from p"
);
}
}
}