1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
//! `gs1dotcode` — DotCode that carries GS1 Application Identifier
//! data.
//!
//! Mirrors BWIPP's `bwipp_gs1dotcode` (bwip-js line 42719+): a thin
//! wrapper that
//!
//! 1. parses the input as a GS1 element string `(NN)data(MM)data…`
//! via [`crate::util::gs1::parse`], validating both AI syntax
//! and the GS1 spec's per-AI length / character / requisite
//! rules,
//! 2. flattens the elements into a byte stream with a leading FNC1
//! (`0x1D`) and FNC1 separators after every variable-length AI
//! except the last (via [`crate::util::gs1::encode_with_fnc1`]),
//! and
//! 3. lifts that byte stream into the marker-aware
//! `&[i16]` form DotCode's [`crate::symbology::dotcode::encode_with_markers`]
//! consumes — every `0x1D` separator becomes the
//! [`crate::symbology::dotcode::FN1`] marker (codeword 107 in the
//! output), every other byte becomes its positive `i16` value.
//!
//! Reference: BWIPP `bwipp_gs1dotcode`, ISO/IEC 15434, GS1 General
//! Specifications §7.
use crate::error::Error;
use crate::symbology::dotcode::{self, DotCodeSymbol, FN1};
use crate::util::gs1;
/// Encode a GS1 element string `(NN)data(MM)data…` as a DotCode
/// symbol. Unlike [`dotcode::encode`] (which takes raw bytes), this
/// entry point performs full GS1 AI parsing + validation before
/// driving the DotCode state machine, so callers always pass the
/// canonical parenthesised representation.
///
/// # Errors
///
/// * `Error::InvalidData` if [`gs1::parse`] rejects the input (bad
/// AI syntax, unknown AI, data violating the AI spec, etc.).
/// * Whatever [`dotcode::encode_with_markers`] surfaces (typically
/// `InvalidData` for payloads that exceed the substrate's nw>112
/// threshold, etc.).
///
/// # Examples
///
/// Drive through the public `Symbology::Gs1DotCode` dispatch
/// (this module is `pub(crate)`; the canonical entry point is
/// [`crate::Symbology::Gs1DotCode`]):
///
/// ```
/// use bwipp::{Symbology, Options};
/// let sym = Symbology::Gs1DotCode;
/// let _enc = sym.encode("(01)04012345123456", &Options::default()).unwrap();
/// ```
pub fn encode(input: &[u8]) -> Result<DotCodeSymbol, Error> {
// Parse GS1 AIs. `gs1::parse` takes &str — convert the byte
// slice via from_utf8; non-UTF-8 input is by definition not a
// valid GS1 element string (BWIPP rejects upstream too).
let text = std::str::from_utf8(input)
.map_err(|_| Error::InvalidData("gs1dotcode: input is not valid UTF-8".to_string()))?;
let elements = gs1::parse(text).map_err(|e| Error::InvalidData(format!("gs1dotcode: {e}")))?;
// Build the flat FNC1-separated byte stream (leading FNC1 +
// AIs + inter-element separators where the AI is variable-
// length and another AI follows). Then lift to the i16+FN1
// form DotCode consumes.
let bytes = gs1::encode_with_fnc1(&elements);
let mut stream: Vec<i16> = Vec::with_capacity(bytes.len());
for &b in &bytes {
if b == gs1::FNC1 {
stream.push(FN1);
} else {
stream.push(i16::from(b));
}
}
dotcode::encode_with_markers(&stream)
}
#[cfg(test)]
mod tests {
use super::*;
/// `(01)04012345123456` — GTIN-14, the canonical single-AI GS1
/// payload. BWIPP's `bwipp_gs1dotcode` produces cws `[1, 4, 1,
/// 23, 45, 12, 34, 56]` (8 digit pairs after the leading FN1 is
/// absorbed at segstart, since 16 digits >= 2). Verified via
/// `tools/oracle-dotcode-fnc.js` (and the bwipp_gs1dotcode
/// patched oracle — bwip-js msg = `[-25, 48, 49, 48, 52, …,
/// 53, 54]` with 17 entries, our stream matches byte-for-byte).
#[test]
fn encode_gtin_14_matches_bwip_js_logical_cws() {
let text = "(01)04012345123456";
let elements = gs1::parse(text).unwrap();
let bytes = gs1::encode_with_fnc1(&elements);
let stream: Vec<i16> = bytes
.iter()
.map(|&b| if b == gs1::FNC1 { FN1 } else { i16::from(b) })
.collect();
// Sanity-check stream: leading FN1, then "01" + 14 digits.
assert_eq!(stream[0], FN1);
let cws = dotcode::encode_message_with_markers(&stream).unwrap();
assert_eq!(cws, vec![1, 4, 1, 23, 45, 12, 34, 56]);
}
/// `(01)04012345123456(10)ABC123` — GTIN-14 + variable-length
/// batch/lot. BWIPP cws `[1, 4, 1, 23, 45, 12, 34, 56, 10, 106,
/// 33, 34, 35, 17, 18, 19]`:
/// * 8 pairs from "01" + GTIN-14.
/// * `10` pair = the AI "10" digits.
/// * `106` = LAB latch to B (3 mode-B chars + 3 digits don't
/// beat staying in C from here).
/// * `33, 34, 35` = Bvals[A,B,C].
/// * `17, 18, 19` = Bvals[1, 2, 3] — single-digit encoding in B
/// because the trailing "123" isn't enough for back-to-C.
///
/// Note: AI (01) is fixed-length so no FN1 separator goes
/// between (01) data and (10). Per BWIPP spec.
#[test]
fn encode_gtin_with_lot_matches_bwip_js_logical_cws() {
// Stage 11.A8c (cont) — descriptive label naming GTIN-14 + LOT
// composition path + expected DotCode geometry.
let sym = encode(b"(01)04012345123456(10)ABC123").unwrap();
assert!(
!sym.pixs.is_empty(),
"encode(\"(01)04012345123456(10)ABC123\") (GTIN-14 + LOT \"ABC123\") must produce non-empty DotCode pixs vec; got len={}",
sym.pixs.len()
);
assert_eq!(
sym.rows, 19,
"expected DotCode rows=19 for GTIN-14+LOT \"ABC123\"; got {}",
sym.rows
);
assert_eq!(
sym.columns, 28,
"expected DotCode columns=28 for GTIN-14+LOT \"ABC123\"; got {}",
sym.columns
);
}
/// `(01)04012345123456(17)260520` — GTIN-14 + expiry date
/// (both fixed-length AIs; no FN1 between them). BWIPP cws
/// `[1, 4, 1, 23, 45, 12, 34, 56, 17, 26, 5, 20]` — 12 pure
/// digit pairs.
#[test]
fn encode_gtin_with_expiry_matches_bwip_js_logical_cws() {
let text = "(01)04012345123456(17)260520";
let elements = gs1::parse(text).unwrap();
let bytes = gs1::encode_with_fnc1(&elements);
let stream: Vec<i16> = bytes
.iter()
.map(|&b| if b == gs1::FNC1 { FN1 } else { i16::from(b) })
.collect();
let cws = dotcode::encode_message_with_markers(&stream).unwrap();
assert_eq!(cws, vec![1, 4, 1, 23, 45, 12, 34, 56, 17, 26, 5, 20]);
}
/// Stage 11.A8c — pin the three top-of-encode rejection paths with
/// diagnostic substrings so mutants that:
/// * drop the `"gs1dotcode: "` prefix,
/// * swap the from_utf8 vs gs1::parse error wrappers,
/// * drop the `{e}` interpolation in `gs1dotcode: {e}`,
/// * change ParseError::Empty's message string,
/// are all caught. The previous weak `matches!(_, Error::InvalidData(_))`
/// checks pinned only the variant.
///
/// Empty input is rejected (matches BWIPP / `gs1::parse`).
#[test]
fn encode_empty_rejected() {
// Stage 11.A8c (cont) — three sibling tests previously shared
// identical `panic!("expected InvalidData; got {err:?}")` text.
// Converted to `match` so the catch-all panic names the SPECIFIC
// input (empty / invalid AI / non-UTF-8) — a mutation that
// re-routes one wrapper path to a different Error variant lands
// with a self-explanatory failure naming the rerouted input.
let msg = match encode(b"").unwrap_err() {
Error::InvalidData(m) => m,
err => panic!(
"encode(b\"\") must reject as Err(InvalidData(empty)); got {err:?} (mutation re-routed gs1dotcode wrapper Empty path)"
),
};
assert!(
msg.starts_with("gs1dotcode: "),
"wrapper prefix must be present; got {msg:?}"
);
assert!(
msg.contains("input is empty"),
"must carry ParseError::Empty message; got {msg:?}"
);
assert!(
!msg.contains("UTF-8"),
"empty path must not leak the UTF-8 message; got {msg:?}"
);
}
/// Bad AI is rejected.
#[test]
fn encode_invalid_ai_rejected() {
let msg = match encode(b"(99999)X").unwrap_err() {
Error::InvalidData(m) => m,
err => panic!(
"encode(b\"(99999)X\") must reject as Err(InvalidData(invalid AI)); got {err:?} (mutation re-routed gs1dotcode wrapper InvalidAi path)"
),
};
assert!(
msg.starts_with("gs1dotcode: "),
"wrapper prefix must be present; got {msg:?}"
);
// "99999" is 5 digits → ParseError::InvalidAi (must be 2-4 digits).
assert!(
msg.contains("invalid AI") && msg.contains("99999"),
"must carry the offending AI string and 'invalid AI' tag; got {msg:?}"
);
assert!(
!msg.contains("UTF-8") && !msg.contains("input is empty"),
"invalid-AI path must not leak other diagnostics; got {msg:?}"
);
}
/// Non-UTF-8 bytes (would break the GS1 parser) are rejected
/// cleanly with an InvalidData error rather than a panic.
#[test]
fn encode_non_utf8_rejected() {
let msg = match encode(&[0xFF, 0xFE, b'(', b'0', b'1', b')']).unwrap_err() {
Error::InvalidData(m) => m,
err => panic!(
"encode(&[0xFF, 0xFE, ...]) must reject as Err(InvalidData(non-UTF-8)); got {err:?} (mutation re-routed gs1dotcode wrapper UTF-8 path)"
),
};
assert!(
msg.starts_with("gs1dotcode: "),
"wrapper prefix must be present; got {msg:?}"
);
// Stage 11.A8c (cont) — tighten `msg.contains("not valid
// UTF-8")` to the full predicate `input is not valid UTF-8`
// (matches the format string at line 58 of gs1_dotcode.rs).
// The original substring would survive a mutation that drops
// `input is` (e.g. "gs1dotcode: not valid UTF-8") or replaces
// `input` with another noun; the full predicate locks both.
assert!(
msg.contains("input is not valid UTF-8"),
"must carry the full UTF-8 diagnostic predicate; got {msg:?}"
);
assert!(
!msg.contains("GS1 parse:"),
"non-UTF-8 path must short-circuit before gs1::parse; got {msg:?}"
);
}
/// Stage 11.A8c — pin the FNC1→FN1 lift loop at lines 65-72.
/// The existing happy-path tests cover payloads where the
/// FNC1-separated byte stream contains only the leading FNC1
/// (no inter-element separators). A payload with a variable-
/// length AI followed by another AI produces an INTERNAL FNC1
/// separator — the lift loop must convert both leading and
/// internal FNC1 bytes to the FN1 marker, not just the first.
///
/// Use `(10)A(11)260520`: variable AI 10 (lot "A") followed by
/// fixed AI 11 (date "260520"). encode_with_fnc1 produces:
/// [FNC1, '1','0','A', FNC1, '1','1','2','6','0','5','2','0']
///
/// Mutations to catch:
/// - `b == gs1::FNC1` → `b != gs1::FNC1`: swaps semantics —
/// EVERY non-FNC1 byte becomes FN1, and FNC1 becomes the
/// positive 29 (0x1D). The resulting stream would error
/// out in dotcode::encode_with_markers.
/// - `FN1` → other marker constant (`FN2`/`FN3`/etc.):
/// dotcode would emit a different control codeword.
/// - Drop the `if`/`else` and always push `i16::from(b)`:
/// FNC1 (0x1D = 29) would become positive codeword 29,
/// scrambling the encoder mode.
#[test]
fn encode_payload_with_internal_fnc1_separator_lifts_to_fn1_markers() {
// Verify the underlying gs1::encode_with_fnc1 byte stream
// has FNC1 at positions [0, 4] (leading + after 'A').
let elements = gs1::parse("(10)A(11)260520").unwrap();
let bytes = gs1::encode_with_fnc1(&elements);
assert_eq!(bytes[0], gs1::FNC1, "leading FNC1 sentinel must be present");
assert_eq!(
bytes[4],
gs1::FNC1,
"internal FNC1 separator must be inserted after variable AI 10's data"
);
// Surrounding bytes are AI digits + data.
assert_eq!(&bytes[1..4], b"10A");
assert_eq!(&bytes[5..], b"11260520");
// Now drive `encode` — it must successfully produce a symbol
// (the lift loop converts BOTH FNC1 bytes to FN1 markers,
// which dotcode::encode_with_markers accepts).
let sym = encode(b"(10)A(11)260520")
.expect("encode must succeed; payload with internal FNC1 is well-formed GS1 + DotCode");
assert!(!sym.pixs.is_empty(), "symbol must have non-empty pixs");
assert!(sym.rows > 0 && sym.columns > 0);
}
}