datamatrix/lib.rs
1//! Data Matrix (ECC 200) decoding and encoding library with an optimizing encoder.
2//!
3//! # Usage example
4//!
5//! ```rust
6//! # use datamatrix::{DataMatrix, SymbolList};
7//! let code = DataMatrix::encode(
8//!     b"Hello, World!",
9//!     SymbolList::default(),
10//! )?;
11//! print!("{}", code.bitmap().unicode());
12//! # Ok::<(), datamatrix::data::DataEncodingError>(())
13//! ```
14//!
15//! This toy example will print a Data Matrix using Unicode block characters.
16//! For guidance on how to generate other output formats see the helper functions
17//! defined for the [Bitmap struct](Bitmap), or the `examples/` directory of
18//! this project.
19//!
20//! You can specify other symbol sizes, see [SymbolList] for details.
21//!
22//! # Character encoding notes for Data Matrix
23//!
24//! > **TL;DR** Data should be printable ASCII because many decoders lack a proper charset
25//! > handling. Latin 1 is the next best choice, otherwise you rely on auto detection hacks of
26//! > decoders. This does not apply if you have control over decoding or if you are not overly paranoidal.
27//!
28//! This full section also applies to QR codes.
29//!
30//! Be careful when encoding strings which contain non printable ASCII characters.
31//! While indicating for example UTF-8 encoding is possible through [ECI](https://en.wikipedia.org/wiki/Extended_Channel_Interpretation),
32//! we doubt that many decoders around the world implement this.
33//! Also notice that some decoders are used as a keyboard source (e.g., handheld scanners)
34//! which _may_ be constrained by platform/locale specific keyboard layouts with
35//! limited Unicode input capabilities. We therefore recommend to stay within
36//! the _printable_ ASCII characters unless you have control over the full encoding
37//! and decoding process.
38//!
39//! The Data Matrix specification defines ISO 8859-1 (Latin-1) as the standard
40//! charset. Our tests indicate that some decoders (smartphone scanner apps) are
41//! reluctant to follow this and return binary output if there are characters in
42//! the upper range, which is a safe choice. Unfortunately, some decoders try to guess the charset
43//! or just always assume UTF-8.
44//!
45//! The full 8bit range can be encoded and
46//! the decoder will also return this exact input. So the problems mentioned above
47//! are related to the _interpretation_ of the data and possible input limitations
48//! in the case of handheld scanners.
49//!
50//! # Decoding
51//!
52//! Assuming you have detected a Data Matrix you may decode the message like
53//! this:
54//!
55//! ```rust
56//! # use datamatrix::{SymbolSize, placement::MatrixMap, DataMatrix};
57//! # let codewords1 = [73, 239, 116, 130, 175, 52, 19, 40, 179, 242, 106, 105, 97, 98, 35, 165, 137, 102, 203, 106, 207, 48, 186, 66];
58//! # let map = MatrixMap::new_with_codewords(&codewords1, SymbolSize::Square16);
59//! # let pixels: Vec<bool> = map.bitmap().bits().into();
60//! // let pixels: Vec<bool> = …
61//! let width = 16;
62//! let data = DataMatrix::decode(&pixels, width)?;
63//! assert_eq!(&data, b"Hello, World!");
64//! # Ok::<(), datamatrix::DecodingError>(())
65//! ```
66//!
67//! # Current limitations
68//!
69//! No visual detection is currently implemented, but the decoding backend
70//! is done and exposed in the API. All that is missing is a detector to extract a matrix of true and false values
71//! from an image. A general purpose detector is planned for the future, though.
72//!
73//! Other limitations: Currently there is only [limited support for GS1](DataMatrix::encode_gs1)/FNC1 character encoding,
74//! [limited ECI encoding](DataMatrix::encode_str), no structured append, and no reader programming. The decoding output
75//! format specified in ISO/IEC 15424 is also not implemented (metadata, ECI, etc.), if you have a use case for this
76//! please open an issue.
77
78#![no_std]
79extern crate alloc;
80
81mod decodation;
82mod encodation;
83pub mod errorcode;
84pub mod placement;
85mod symbol_size;
86
87pub mod data;
88
89pub use encodation::EncodationType;
90pub use symbol_size::{SymbolList, SymbolSize};
91
92use alloc::vec::Vec;
93use flagset::FlagSet;
94
95use encodation::DataEncodingError;
96use placement::{Bitmap, MatrixMap};
97
98#[cfg(test)]
99use pretty_assertions::assert_eq;
100
101/// Encoded Data Matrix.
102#[derive(Clone, Debug, PartialEq, Eq)]
103pub struct DataMatrix {
104    /// Size of the encoded Data Matrix
105    pub size: SymbolSize,
106    codewords: Vec<u8>,
107    num_data_codewords: usize,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111/// Errors when decoding a Data Matrix.
112pub enum DecodingError {
113    /// Signals that the pixels could not be mapped to a [symbol size](SymbolSize),
114    /// either because no symbol with matching dimensions was found or because the
115    /// alignment pattern was not correct.
116    PixelConversion(placement::BitmapConversionError),
117    /// Signals that the [error correction](errorcode) was either not done correctly when the
118    /// Data Matrix was encoded or there are too many detection errors, i.e.,
119    /// black or white squares that are wrong.
120    ErrorCorrection(errorcode::ErrorDecodingError),
121    DataDecoding(decodation::DataDecodingError),
122}
123
124impl DataMatrix {
125    /// Decode a Data Matrix from its pixels representation.
126    ///
127    /// The alignment pattern must be included. The argument `width` denotes the number of
128    /// pixels in one row.
129    ///
130    /// The pixels are expected to be given in row-major order, i.e., the top
131    /// row of pixels comes first, then the second row and so on.
132    ///
133    /// The Data Matrix may start with a `FNC1` codeword marking it as a GS1 Data Matrix. The ISO standard
134    /// demands from a scanner to prepend the symbology identifier `]d2` in this case. This is _not_ implemented
135    /// here, the decoder currently only ignores the `FNC1` codeword at the beginning. There are some ideas to
136    /// implement more detailed decoder output if there is demand.
137    pub fn decode(pixels: &[bool], width: usize) -> Result<Vec<u8>, DecodingError> {
138        let (matrix_map, size) =
139            MatrixMap::try_from_bits(pixels, width).map_err(DecodingError::PixelConversion)?;
140        let mut codewords = matrix_map.codewords();
141        errorcode::decode_error(&mut codewords, size).map_err(DecodingError::ErrorCorrection)?;
142        decodation::decode_data(&codewords[..size.num_data_codewords()])
143            .map_err(DecodingError::DataDecoding)
144    }
145
146    /// Get the data in encoded form.
147    ///
148    /// Error correction is included.
149    /// See [data_codewords()](Self::data_codewords) if you only need the data.
150    pub fn codewords(&self) -> &[u8] {
151        &self.codewords
152    }
153
154    /// Get the codewords that encode the data.
155    ///
156    /// This is a prefix of the codewords returned by [codewords()](Self::codewords).
157    pub fn data_codewords(&self) -> &[u8] {
158        &self.codewords[..self.num_data_codewords]
159    }
160
161    /// Create an abstract bitmap representing the Data Matrix.
162    pub fn bitmap(&self) -> Bitmap<bool> {
163        MatrixMap::new_with_codewords(&self.codewords, self.size).bitmap()
164    }
165
166    /// Encode data as a Data Matrix (ECC200).
167    ///
168    /// This is wrapper for [DataMatrixBuilder::encode].
169    pub fn encode<I: Into<SymbolList>>(
170        data: &[u8],
171        symbol_list: I,
172    ) -> Result<DataMatrix, DataEncodingError> {
173        DataMatrixBuilder::new()
174            .with_symbol_list(symbol_list)
175            .encode(data)
176    }
177
178    /// Encode a string as a Data Matrix (ECC200).
179    ///
180    /// This is wrapper for [DataMatrixBuilder::encode_str].
181    pub fn encode_str<I: Into<SymbolList>>(
182        text: &str,
183        symbol_list: I,
184    ) -> Result<DataMatrix, DataEncodingError> {
185        DataMatrixBuilder::new()
186            .with_symbol_list(symbol_list)
187            .encode_str(text)
188    }
189
190    /// Encode data as a GS1 Data Matrix.
191    ///
192    /// The only difference to [encode()](Self::encode) is that
193    /// the `FNC1` codeword is added in the first
194    /// position.
195    ///
196    /// Encoding `FNC1` in later positions is not implemented as of now.
197    ///
198    /// ```rust
199    /// # use datamatrix::{DataMatrix, SymbolList, data::DataEncodingError};
200    /// // use "\x1D" (ASCII GS control sequence) to concatenate element strings
201    /// let data = b"01034531200000111719112510ABCD1234\x1D2110";
202    /// let data_matrix = DataMatrix::encode_gs1(data, SymbolList::default())?;
203    /// let bitmap = data_matrix.bitmap();
204    /// # Ok::<(), DataEncodingError>(())
205    /// ```
206    pub fn encode_gs1<I: Into<SymbolList>>(
207        data: &[u8],
208        symbol_list: I,
209    ) -> Result<DataMatrix, DataEncodingError> {
210        DataMatrixBuilder::new()
211            .with_symbol_list(symbol_list)
212            .with_fnc1_start(true)
213            .encode(data)
214    }
215}
216
217#[derive(Clone, Debug, PartialEq, Eq)]
218/// Builder for encoding a Data Matrix with more control.
219pub struct DataMatrixBuilder {
220    encodation_types: FlagSet<EncodationType>,
221    symbol_list: SymbolList,
222    use_macros: bool,
223    fnc1_start: bool,
224}
225
226impl DataMatrixBuilder {
227    pub fn new() -> Self {
228        Self {
229            encodation_types: EncodationType::all(),
230            symbol_list: SymbolList::default(),
231            use_macros: true,
232            fnc1_start: false,
233        }
234    }
235
236    /// Specify which encodation can be used.
237    ///
238    /// By default all encodation types are enabled.
239    ///
240    /// # Example
241    ///
242    /// ```rust
243    /// # use datamatrix::{DataMatrixBuilder, data::EncodationType};
244    /// let datamatrix = DataMatrixBuilder::new()
245    ///     .with_encodation_types(EncodationType::Base256 | EncodationType::Edifact)
246    ///     .encode(b"\xFAaaa")?;
247    /// Ok::<(), datamatrix::data::DataEncodingError>(())
248    /// ```
249    pub fn with_encodation_types(self, types: impl Into<FlagSet<EncodationType>>) -> Self {
250        Self {
251            encodation_types: types.into(),
252            ..self
253        }
254    }
255
256    /// Specify whether the Data Matrix shall start with a `FNC1` codeword marking
257    /// it as a GS1 Data Matrix.
258    pub fn with_fnc1_start(self, fnc1_start: bool) -> Self {
259        Self { fnc1_start, ..self }
260    }
261
262    /// Whether to use macros or not.
263    ///
264    /// This is enabled by default.
265    pub fn with_macros(self, use_macros: bool) -> Self {
266        Self { use_macros, ..self }
267    }
268
269    /// Specify the list of allowed symbols sizes.
270    ///
271    /// Uses [SymbolList::default()] by default.
272    pub fn with_symbol_list<I: Into<SymbolList>>(self, symbol_list: I) -> Self {
273        Self {
274            symbol_list: symbol_list.into(),
275            ..self
276        }
277    }
278
279    /// Encode data as a Data Matrix (ECC200).
280    ///
281    /// Please read the [module documentation](crate) for some charset notes. If you
282    /// did that and your input can be represented with the Latin 1 charset you may
283    /// use the conversion function in the [data module](crate::data). If you only
284    /// use printable ASCII you can just pass the data as is.
285    ///
286    /// If the data does not fit into the given size encoding will fail. The encoder
287    /// can automatically pick the smallest size which fits the data (see [SymbolList])
288    /// but there is an upper limit.
289    pub fn encode(self, data: &[u8]) -> Result<DataMatrix, DataEncodingError> {
290        self.encode_eci(data, None)
291    }
292
293    /// Encodes a string as a Data Matrix (ECC200).
294    ///
295    /// If the string can be converted to Latin-1, no ECI is used, otherwise
296    /// an initial UTF8 ECI is inserted. Please check if your decoder has support
297    /// for that. See the notes on the [module documentation](crate) for more details.
298    pub fn encode_str(self, text: &str) -> Result<DataMatrix, DataEncodingError> {
299        if let Some(data) = data::utf8_to_latin1(text) {
300            // string is latin1
301            self.encode_eci(&data, None)
302        } else {
303            // encode with UTF8 ECI
304            self.encode_eci(text.as_bytes(), Some(decodation::ECI_UTF8))
305        }
306    }
307
308    #[doc(hidden)]
309    pub fn encode_eci(
310        self,
311        data: &[u8],
312        eci: Option<u32>,
313    ) -> Result<DataMatrix, DataEncodingError> {
314        let (mut codewords, size) = data::encode_data_internal(
315            data,
316            &self.symbol_list,
317            eci,
318            self.encodation_types,
319            self.use_macros,
320            self.fnc1_start,
321        )?;
322        let ecc = errorcode::encode_error(&codewords, size);
323        let num_data_codewords = codewords.len();
324        codewords.extend_from_slice(&ecc);
325        Ok(DataMatrix {
326            size,
327            codewords,
328            num_data_codewords,
329        })
330    }
331}
332
333impl Default for DataMatrixBuilder {
334    fn default() -> Self {
335        Self::new()
336    }
337}
338
339#[test]
340fn utf8_eci_test() {
341    let data = "🥸";
342    let code = DataMatrix::encode_str(data, SymbolList::default()).unwrap();
343    let decoded = data::decode_str(code.data_codewords()).unwrap();
344    assert_eq!(decoded, data);
345}
346
347#[test]
348fn test_tile_placement_forth_and_back() {
349    let mut rnd_data = test::random_data();
350    for size in SymbolList::all() {
351        let data = rnd_data(size.num_codewords());
352        let map = MatrixMap::new_with_codewords(&data, size);
353        assert_eq!(map.codewords(), data);
354        let bitmap = map.bitmap();
355        let (matrix_map, _size) = MatrixMap::try_from_bits(bitmap.bits(), bitmap.width()).unwrap();
356        assert_eq!(matrix_map.codewords(), data);
357    }
358}
359
360#[test]
361fn test_macro_str() {
362    let data = "[)>\x1E05\x1D🤘\x1E\x04";
363    let map = DataMatrix::encode_str(data, SymbolList::default()).unwrap();
364    let codewords = map.data_codewords();
365    assert_eq!(
366        codewords,
367        &[
368            encodation::MACRO05,
369            encodation::ascii::ECI,
370            decodation::ECI_UTF8 as u8 + 1,
371            // Base256 encoding of the four byte utf8 character plus padding
372            231,
373            240,
374            114,
375            183,
376            81,
377            219,
378            129,
379        ]
380    );
381    let out = data::decode_str(codewords).unwrap();
382    assert_eq!(data, out);
383}
384
385#[test]
386fn test_too_much_data() {
387    let mut rnd_data = test::random_data();
388    let data = rnd_data(5000);
389    let result = DataMatrix::encode(&data, SymbolList::default());
390    assert_eq!(result, Err(DataEncodingError::TooMuchOrIllegalData));
391}
392
393#[cfg(test)]
394mod test {
395    use crate::placement::MatrixMap;
396    use crate::symbol_size::SymbolSize;
397    use alloc::vec::Vec;
398
399    /// Simple LCG random generator for test data generation
400    pub fn random_maps() -> impl FnMut(SymbolSize) -> MatrixMap<bool> {
401        let mut rnd = random_bytes();
402        move |size| {
403            let mut map = MatrixMap::new(size);
404            map.traverse_mut(|_, bits| {
405                for bit in bits {
406                    *bit = rnd() > 127;
407                }
408            });
409            map.write_padding();
410            map
411        }
412    }
413
414    pub fn random_bytes() -> impl FnMut() -> u8 {
415        let mut seed = 0;
416        move || {
417            let modulus = 2u64.pow(31);
418            let a = 1103515245u64;
419            let c = 12345u64;
420            seed = (a * seed + c) % modulus;
421            (seed % 256) as u8
422        }
423    }
424
425    pub fn random_data() -> impl FnMut(usize) -> Vec<u8> {
426        let mut rnd = random_bytes();
427        move |len| {
428            let mut v = Vec::with_capacity(len);
429            for _ in 0..len {
430                v.push(rnd());
431            }
432            v
433        }
434    }
435}
436
437#[test]
438fn test_simple_gs1() {
439    let data = b"01034531200000111719112510ABCD1234\x1D2110";
440    let result = DataMatrix::encode_gs1(data, SymbolList::default()).unwrap();
441    let codewords = result.codewords();
442    assert_eq!(codewords[0], crate::encodation::ascii::FNC1);
443
444    let decoded = data::decode_data(result.data_codewords()).unwrap();
445    assert_eq!(decoded, data);
446}