#include "PDFDecoder.h"
#include "CharacterSet.h"
#include "DecoderResult.h"
#include "PDFDecoderResultExtra.h"
#include "ZXAlgorithms.h"
#include "ZXBigInteger.h"
#include "ZXTestSupport.h"
#include <array>
#include <cassert>
#include <sstream>
#include <utility>
namespace ZXing::Pdf417 {
enum class Mode
{
ALPHA,
LOWER,
MIXED,
PUNCT,
ALPHA_SHIFT,
PUNCT_SHIFT
};
constexpr int TEXT_COMPACTION_MODE_LATCH = 900;
constexpr int BYTE_COMPACTION_MODE_LATCH = 901;
constexpr int NUMERIC_COMPACTION_MODE_LATCH = 902;
constexpr int MODE_SHIFT_TO_BYTE_COMPACTION_MODE = 913;
constexpr int LINKAGE_OTHER = 918;
constexpr int LINKAGE_EANUCC = 920; constexpr int READER_INIT = 921; constexpr int MACRO_PDF417_TERMINATOR = 922;
constexpr int BEGIN_MACRO_PDF417_OPTIONAL_FIELD = 923;
constexpr int BYTE_COMPACTION_MODE_LATCH_6 = 924;
constexpr int ECI_USER_DEFINED = 925; constexpr int ECI_GENERAL_PURPOSE = 926; constexpr int ECI_CHARSET = 927; constexpr int BEGIN_MACRO_PDF417_CONTROL_BLOCK = 928;
constexpr int MAX_NUMERIC_CODEWORDS = 15;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_FILE_NAME = 0;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_SEGMENT_COUNT = 1;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_TIME_STAMP = 2;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_SENDER = 3;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_ADDRESSEE = 4;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_FILE_SIZE = 5;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_CHECKSUM = 6;
static const char* PUNCT_CHARS = ";<>@[\\]_`~!\r\t,:\n-.$/\"|*()?{}'";
static const char* MIXED_CHARS = "0123456789&\r\t,:#-.$/+%*=^";
constexpr int NUMBER_OF_SEQUENCE_CODEWORDS = 2;
inline bool IsECI(int code)
{
return code >= ECI_USER_DEFINED && code <= ECI_CHARSET;
}
static bool TerminatesCompaction(int code)
{
switch (code) {
case TEXT_COMPACTION_MODE_LATCH:
case BYTE_COMPACTION_MODE_LATCH:
case NUMERIC_COMPACTION_MODE_LATCH:
case BYTE_COMPACTION_MODE_LATCH_6:
case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
case MACRO_PDF417_TERMINATOR: return true;
}
return false;
}
static int ProcessECI(const std::vector<int>& codewords, int codeIndex, const int length, const int code, Content& result)
{
if (codeIndex < length && IsECI(code)) {
if (code == ECI_CHARSET)
result.switchEncoding(ECI(codewords[codeIndex++]));
else
codeIndex += code == ECI_GENERAL_PURPOSE ? 2 : 1; }
return codeIndex;
}
static void DecodeTextCompaction(const std::vector<int>& textCompactionData, int length, Content& result)
{
Mode subMode = Mode::ALPHA;
Mode priorToShiftMode = Mode::ALPHA;
int i = 0;
while (i < length) {
int subModeCh = textCompactionData[i];
if (IsECI(subModeCh)) {
i = ProcessECI(textCompactionData, i + 1, length, subModeCh, result);
continue;
}
if (subModeCh == MODE_SHIFT_TO_BYTE_COMPACTION_MODE) {
i++;
while (i < length && IsECI(textCompactionData[i]))
i = ProcessECI(textCompactionData, i + 1, length, textCompactionData[i], result);
if (i < length)
result.push_back((uint8_t)textCompactionData[i++]);
continue;
}
char ch = 0;
switch (subMode) {
case Mode::ALPHA:
case Mode::LOWER:
if (subModeCh < 26) {
ch = (char)((subMode == Mode::ALPHA ? 'A' : 'a') + subModeCh);
} else if (subModeCh == 26) { ch = ' ';
} else if (subModeCh == 27 && subMode == Mode::ALPHA) { subMode = Mode::LOWER;
} else if (subModeCh == 27 && subMode == Mode::LOWER) { priorToShiftMode = subMode;
subMode = Mode::ALPHA_SHIFT;
} else if (subModeCh == 28) { subMode = Mode::MIXED;
}
else if (i + 1 < length && textCompactionData[i + 1] != MODE_SHIFT_TO_BYTE_COMPACTION_MODE) {
priorToShiftMode = subMode;
subMode = Mode::PUNCT_SHIFT;
}
break;
case Mode::MIXED:
if (subModeCh < 25) {
ch = MIXED_CHARS[subModeCh];
} else if (subModeCh == 25) { subMode = Mode::PUNCT;
} else if (subModeCh == 26) { ch = ' ';
} else if (subModeCh == 27) { subMode = Mode::LOWER;
} else if (subModeCh == 28) { subMode = Mode::ALPHA;
}
else if (i + 1 < length && textCompactionData[i + 1] != MODE_SHIFT_TO_BYTE_COMPACTION_MODE) {
priorToShiftMode = subMode;
subMode = Mode::PUNCT_SHIFT;
}
break;
case Mode::PUNCT:
if (subModeCh < 29)
ch = PUNCT_CHARS[subModeCh];
else subMode = Mode::ALPHA;
break;
case Mode::ALPHA_SHIFT:
subMode = priorToShiftMode;
if (subModeCh < 26)
ch = (char)('A' + subModeCh);
else if (subModeCh == 26) ch = ' ';
break;
case Mode::PUNCT_SHIFT:
subMode = priorToShiftMode;
if (subModeCh < 29)
ch = PUNCT_CHARS[subModeCh];
else subMode = Mode::ALPHA;
break;
}
if (ch != 0)
result.push_back(ch); i++;
}
}
static int ProcessTextECI(std::vector<int>& textCompactionData, int& index, const std::vector<int>& codewords, int codeIndex,
const int code)
{
textCompactionData[index++] = code;
if (codeIndex < codewords[0]) {
textCompactionData[index++] = codewords[codeIndex++];
if (codeIndex < codewords[0] && code == ECI_GENERAL_PURPOSE) {
textCompactionData[index++] = codewords[codeIndex++];
}
}
return codeIndex;
}
static int TextCompaction(const std::vector<int>& codewords, int codeIndex, Content& result)
{
std::vector<int> textCompactionData((codewords[0] - codeIndex) * 2, 0);
int index = 0;
bool end = false;
while ((codeIndex < codewords[0]) && !end) {
int code = codewords[codeIndex++];
if (code < TEXT_COMPACTION_MODE_LATCH) {
textCompactionData[index] = code / 30;
textCompactionData[index + 1] = code % 30;
index += 2;
} else {
switch (code) {
case MODE_SHIFT_TO_BYTE_COMPACTION_MODE:
textCompactionData[index++] = MODE_SHIFT_TO_BYTE_COMPACTION_MODE;
while (codeIndex < codewords[0] && IsECI(codewords[codeIndex])) {
codeIndex = ProcessTextECI(textCompactionData, index, codewords, codeIndex + 1, codewords[codeIndex]);
}
if (codeIndex < codewords[0])
textCompactionData[index++] = codewords[codeIndex++]; break;
case ECI_CHARSET:
case ECI_GENERAL_PURPOSE:
case ECI_USER_DEFINED:
codeIndex = ProcessTextECI(textCompactionData, index, codewords, codeIndex, code);
break;
default:
if (!TerminatesCompaction(code))
throw FormatError();
codeIndex--;
end = true;
break;
}
}
}
DecodeTextCompaction(textCompactionData, index, result);
return codeIndex;
}
static int CountByteBatches(int mode, const std::vector<int>& codewords, int codeIndex, int& trailingCount)
{
int count = 0;
trailingCount = 0;
while (codeIndex < codewords[0]) {
int code = codewords[codeIndex++];
if (code >= TEXT_COMPACTION_MODE_LATCH) {
if (mode == BYTE_COMPACTION_MODE_LATCH_6 && count && count % 5)
throw FormatError();
if (IsECI(code)) {
codeIndex += code == ECI_GENERAL_PURPOSE ? 2 : 1;
continue;
}
if (!TerminatesCompaction(code))
throw FormatError();
break;
}
count++;
}
if (codeIndex > codewords[0])
throw FormatError();
if (count == 0)
return 0;
if (mode == BYTE_COMPACTION_MODE_LATCH) {
trailingCount = count % 5;
if (trailingCount == 0) {
trailingCount = 5;
count -= 5;
}
} else { if (count % 5 != 0)
throw FormatError();
}
return count / 5;
}
static int ProcessByteECIs(const std::vector<int>& codewords, int codeIndex, Content& result)
{
while (codeIndex < codewords[0] && codewords[codeIndex] >= TEXT_COMPACTION_MODE_LATCH
&& !TerminatesCompaction(codewords[codeIndex])) {
int code = codewords[codeIndex++];
if (IsECI(code))
codeIndex = ProcessECI(codewords, codeIndex, codewords[0], code, result);
}
return codeIndex;
}
static int ByteCompaction(int mode, const std::vector<int>& codewords, int codeIndex, Content& result)
{
int trailingCount;
int batches = CountByteBatches(mode, codewords, codeIndex, trailingCount);
codeIndex = ProcessByteECIs(codewords, codeIndex, result);
for (int batch = 0; batch < batches; batch++) {
int64_t value = 0;
for (int count = 0; count < 5; count++)
value = 900 * value + codewords[codeIndex++];
for (int j = 0; j < 6; ++j)
result.push_back((uint8_t)(value >> (8 * (5 - j))));
codeIndex = ProcessByteECIs(codewords, codeIndex, result);
}
for (int i = 0; i < trailingCount; i++) {
result.push_back((uint8_t)codewords[codeIndex++]);
codeIndex = ProcessByteECIs(codewords, codeIndex, result);
}
return codeIndex;
}
static std::string DecodeBase900toBase10(const std::vector<int>& codewords, int endIndex, int count)
{
static const auto EXP900 = []() {
std::array<BigInteger, 16> table = {1, 900};
for (size_t i = 2; i < table.size(); ++i)
table[i] = table[i - 1] * 900;
return table;
}();
assert(count <= 16);
BigInteger result;
for (int i = 0; i < count; i++)
result += EXP900[count - i - 1] * codewords[endIndex - count + i];
std::string resultString = result.toString();
if (!resultString.empty() && resultString.front() == '1')
return resultString.substr(1);
throw FormatError();
}
static int NumericCompaction(const std::vector<int>& codewords, int codeIndex, Content& result)
{
int count = 0;
while (codeIndex < codewords[0]) {
int code = codewords[codeIndex];
if (code < TEXT_COMPACTION_MODE_LATCH) {
count++;
codeIndex++;
}
if (count > 0 && (count == MAX_NUMERIC_CODEWORDS || codeIndex == codewords[0] || code >= TEXT_COMPACTION_MODE_LATCH)) {
result += DecodeBase900toBase10(codewords, codeIndex, count);
count = 0;
}
if (code >= TEXT_COMPACTION_MODE_LATCH) {
if (IsECI(code)) {
codeIndex = ProcessECI(codewords, codeIndex + 1, codewords[0], code, result);
} else if (TerminatesCompaction(code)) {
break;
} else {
throw FormatError();
}
}
}
return codeIndex;
}
static int DecodeMacroOptionalTextField(const std::vector<int>& codewords, int codeIndex, std::string& field)
{
Content result;
result.defaultCharset = CharacterSet::Cp437;
codeIndex = TextCompaction(codewords, codeIndex, result);
field = result.utf8();
return codeIndex;
}
static int DecodeMacroOptionalNumericField(const std::vector<int>& codewords, int codeIndex, uint64_t& field)
{
Content result;
result.defaultCharset = CharacterSet::Cp437;
codeIndex = NumericCompaction(codewords, codeIndex, result);
field = std::stoll(result.utf8());
return codeIndex;
}
ZXING_EXPORT_TEST_ONLY
int DecodeMacroBlock(const std::vector<int>& codewords, int codeIndex, DecoderResultExtra& resultMetadata)
{
if (codeIndex + NUMBER_OF_SEQUENCE_CODEWORDS > codewords[0])
throw FormatError();
std::string strBuf = DecodeBase900toBase10(codewords, codeIndex += NUMBER_OF_SEQUENCE_CODEWORDS, NUMBER_OF_SEQUENCE_CODEWORDS);
resultMetadata.setSegmentIndex(std::stoi(strBuf));
std::ostringstream fileId;
for (; codeIndex < codewords[0] && codewords[codeIndex] != MACRO_PDF417_TERMINATOR
&& codewords[codeIndex] != BEGIN_MACRO_PDF417_OPTIONAL_FIELD;
codeIndex++) {
fileId << ToString(codewords[codeIndex], 3);
}
resultMetadata.setFileId(fileId.str());
int optionalFieldsStart = -1;
if (codeIndex < codewords[0] && codewords[codeIndex] == BEGIN_MACRO_PDF417_OPTIONAL_FIELD)
optionalFieldsStart = codeIndex + 1;
while (codeIndex < codewords[0]) {
switch (codewords[codeIndex]) {
case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: {
codeIndex++;
if (codeIndex >= codewords[0])
break;
switch (codewords[codeIndex]) {
case MACRO_PDF417_OPTIONAL_FIELD_FILE_NAME: {
std::string fileName;
codeIndex = DecodeMacroOptionalTextField(codewords, codeIndex + 1, fileName);
resultMetadata.setFileName(fileName);
break;
}
case MACRO_PDF417_OPTIONAL_FIELD_SENDER: {
std::string sender;
codeIndex = DecodeMacroOptionalTextField(codewords, codeIndex + 1, sender);
resultMetadata.setSender(sender);
break;
}
case MACRO_PDF417_OPTIONAL_FIELD_ADDRESSEE: {
std::string addressee;
codeIndex = DecodeMacroOptionalTextField(codewords, codeIndex + 1, addressee);
resultMetadata.setAddressee(addressee);
break;
}
case MACRO_PDF417_OPTIONAL_FIELD_SEGMENT_COUNT: {
uint64_t segmentCount;
codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, segmentCount);
resultMetadata.setSegmentCount(narrow_cast<int>(segmentCount));
break;
}
case MACRO_PDF417_OPTIONAL_FIELD_TIME_STAMP: {
uint64_t timestamp;
codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, timestamp);
resultMetadata.setTimestamp(timestamp);
break;
}
case MACRO_PDF417_OPTIONAL_FIELD_CHECKSUM: {
uint64_t checksum;
codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, checksum);
resultMetadata.setChecksum(narrow_cast<int>(checksum));
break;
}
case MACRO_PDF417_OPTIONAL_FIELD_FILE_SIZE: {
uint64_t fileSize;
codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, fileSize);
resultMetadata.setFileSize(fileSize);
break;
}
default: throw FormatError();
}
break;
}
case MACRO_PDF417_TERMINATOR: {
codeIndex++;
resultMetadata.setLastSegment(true);
break;
}
default: throw FormatError();
}
}
if (optionalFieldsStart != -1) {
int optionalFieldsLength = codeIndex - optionalFieldsStart;
if (resultMetadata.isLastSegment())
optionalFieldsLength--;
resultMetadata.setOptionalData(
std::vector<int>(codewords.begin() + optionalFieldsStart, codewords.begin() + optionalFieldsStart + optionalFieldsLength));
}
return codeIndex;
}
DecoderResult Decode(const std::vector<int>& codewords)
{
Content result;
result.symbology = {'L', '2', char(-1)};
bool readerInit = false;
auto resultMetadata = std::make_shared<DecoderResultExtra>();
try {
for (int codeIndex = 1; codeIndex < codewords[0];) {
int code = codewords[codeIndex++];
switch (code) {
case TEXT_COMPACTION_MODE_LATCH: codeIndex = TextCompaction(codewords, codeIndex, result); break;
case MODE_SHIFT_TO_BYTE_COMPACTION_MODE: codeIndex = TextCompaction(codewords, codeIndex - 1, result); break;
case BYTE_COMPACTION_MODE_LATCH:
case BYTE_COMPACTION_MODE_LATCH_6: codeIndex = ByteCompaction(code, codewords, codeIndex, result); break;
case NUMERIC_COMPACTION_MODE_LATCH: codeIndex = NumericCompaction(codewords, codeIndex, result); break;
case ECI_CHARSET:
case ECI_GENERAL_PURPOSE:
case ECI_USER_DEFINED: codeIndex = ProcessECI(codewords, codeIndex, codewords[0], code, result); break;
case BEGIN_MACRO_PDF417_CONTROL_BLOCK: codeIndex = DecodeMacroBlock(codewords, codeIndex, *resultMetadata); break;
case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
case MACRO_PDF417_TERMINATOR:
throw FormatError();
break;
case READER_INIT:
if (codeIndex != 2) throw FormatError();
else
readerInit = true;
break;
case LINKAGE_EANUCC:
if (codeIndex != 2) throw FormatError();
break;
case LINKAGE_OTHER:
throw UnsupportedError("LINKAGE_OTHER, see ISO/IEC 15438:2015 5.4.1.5");
break;
default:
if (code >= TEXT_COMPACTION_MODE_LATCH) { throw UnsupportedError("Reserved codeword, see ISO/IEC 15438:2015 5.4.6.1");
} else {
codeIndex = TextCompaction(codewords, codeIndex - 1, result);
}
break;
}
}
} catch (std::exception& e) {
return FormatError(e.what());
} catch (Error e) {
return e;
}
if (result.empty() && resultMetadata->segmentIndex() == -1)
return FormatError();
StructuredAppendInfo sai;
if (resultMetadata->segmentIndex() > -1) {
sai.count = resultMetadata->segmentCount() != -1
? resultMetadata->segmentCount()
: (resultMetadata->isLastSegment() ? resultMetadata->segmentIndex() + 1 : 0);
sai.index = resultMetadata->segmentIndex();
sai.id = resultMetadata->fileId();
}
return DecoderResult(std::move(result))
.setStructuredAppend(sai)
.setReaderInit(readerInit)
.setExtra(resultMetadata);
}
}