#pragma once
#ifndef encoding_rs_cpp_h_
#define encoding_rs_cpp_h_
#include "gsl/gsl"
#include <experimental/optional>
#include <memory>
#include <string>
#include <tuple>
#include "encoding_rs.h"
class Encoding;
class Decoder final
{
public:
~Decoder() {}
static void operator delete(void* decoder)
{
decoder_free(reinterpret_cast<Decoder*>(decoder));
}
inline gsl::not_null<const Encoding*> encoding() const
{
return decoder_encoding(this);
}
inline size_t max_utf8_buffer_length(size_t byte_length) const
{
return decoder_max_utf8_buffer_length(this, byte_length);
}
inline size_t max_utf8_buffer_length_without_replacement(
size_t byte_length) const
{
return decoder_max_utf8_buffer_length_without_replacement(this,
byte_length);
}
inline std::tuple<uint32_t, size_t, size_t, bool> decode_to_utf8(
gsl::span<const uint8_t> src, gsl::span<uint8_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
bool had_replacements;
uint32_t result =
decoder_decode_to_utf8(this, src.data(), &src_read, dst.data(),
&dst_written, last, &had_replacements);
return std::make_tuple(result, src_read, dst_written, had_replacements);
}
inline std::tuple<uint32_t, size_t, size_t>
decode_to_utf8_without_replacement(gsl::span<const uint8_t> src,
gsl::span<uint8_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
uint32_t result = decoder_decode_to_utf8_without_replacement(
this, src.data(), &src_read, dst.data(), &dst_written, last);
return std::make_tuple(result, src_read, dst_written);
}
inline size_t max_utf16_buffer_length(size_t u16_length) const
{
return decoder_max_utf16_buffer_length(this, u16_length);
}
inline std::tuple<uint32_t, size_t, size_t, bool> decode_to_utf16(
gsl::span<const uint8_t> src, gsl::span<char16_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
bool had_replacements;
uint32_t result =
decoder_decode_to_utf16(this, src.data(), &src_read, dst.data(),
&dst_written, last, &had_replacements);
return std::make_tuple(result, src_read, dst_written, had_replacements);
}
inline std::tuple<uint32_t, size_t, size_t>
decode_to_utf16_without_replacement(gsl::span<const uint8_t> src,
gsl::span<char16_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
uint32_t result = decoder_decode_to_utf16_without_replacement(
this, src.data(), &src_read, dst.data(), &dst_written, last);
return std::make_tuple(result, src_read, dst_written);
}
private:
Decoder() = delete;
};
class Encoder final
{
public:
~Encoder() {}
static void operator delete(void* encoder)
{
encoder_free(reinterpret_cast<Encoder*>(encoder));
}
inline gsl::not_null<const Encoding*> encoding() const
{
return encoder_encoding(this);
}
inline bool has_pending_state() const
{
return encoder_has_pending_state(this);
}
inline size_t max_buffer_length_from_utf8_if_no_unmappables(
size_t byte_length) const
{
return encoder_max_buffer_length_from_utf8_if_no_unmappables(this,
byte_length);
}
inline size_t max_buffer_length_from_utf8_without_replacement(
size_t byte_length) const
{
return encoder_max_buffer_length_from_utf8_without_replacement(this,
byte_length);
}
inline std::tuple<uint32_t, size_t, size_t, bool> encode_from_utf8(
gsl::span<const uint8_t> src, gsl::span<uint8_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
bool had_replacements;
uint32_t result =
encoder_encode_from_utf8(this, src.data(), &src_read, dst.data(),
&dst_written, last, &had_replacements);
return std::make_tuple(result, src_read, dst_written, had_replacements);
}
inline std::tuple<uint32_t, size_t, size_t>
encode_from_utf8_without_replacement(gsl::span<const uint8_t> src,
gsl::span<uint8_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
uint32_t result = encoder_encode_from_utf8_without_replacement(
this, src.data(), &src_read, dst.data(), &dst_written, last);
return std::make_tuple(result, src_read, dst_written);
}
inline size_t max_buffer_length_from_utf16_if_no_unmappables(
size_t u16_length) const
{
return encoder_max_buffer_length_from_utf16_if_no_unmappables(this,
u16_length);
}
inline size_t max_buffer_length_from_utf16_without_replacement(
size_t u16_length) const
{
return encoder_max_buffer_length_from_utf16_without_replacement(this,
u16_length);
}
inline std::tuple<uint32_t, size_t, size_t, bool> encode_from_utf16(
gsl::span<const char16_t> src, gsl::span<uint8_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
bool had_replacements;
uint32_t result =
encoder_encode_from_utf16(this, src.data(), &src_read, dst.data(),
&dst_written, last, &had_replacements);
return std::make_tuple(result, src_read, dst_written, had_replacements);
}
inline std::tuple<uint32_t, size_t, size_t>
encode_from_utf16_without_replacement(gsl::span<const char16_t> src,
gsl::span<uint8_t> dst, bool last)
{
size_t src_read = src.size();
size_t dst_written = dst.size();
uint32_t result = encoder_encode_from_utf16_without_replacement(
this, src.data(), &src_read, dst.data(), &dst_written, last);
return std::make_tuple(result, src_read, dst_written);
}
private:
Encoder() = delete;
};
class Encoding final
{
public:
static inline const Encoding* for_label(gsl::cstring_span<> label)
{
return encoding_for_label(reinterpret_cast<const uint8_t*>(label.data()),
label.length());
}
static inline const Encoding* for_label_no_replacement(
gsl::cstring_span<> label)
{
return encoding_for_label_no_replacement(
reinterpret_cast<const uint8_t*>(label.data()), label.length());
}
static inline std::tuple<const Encoding*, size_t> for_bom(
gsl::span<const uint8_t> buffer)
{
size_t len = buffer.size();
const Encoding* encoding = encoding_for_bom(buffer.data(), &len);
return std::make_tuple(encoding, len);
}
static inline gsl::not_null<const Encoding*> for_name(
gsl::cstring_span<> name)
{
return encoding_for_name(reinterpret_cast<const uint8_t*>(name.data()),
name.length());
}
inline std::string name() const
{
std::string name(ENCODING_NAME_MAX_LENGTH, '\0');
size_t length = encoding_name(this, reinterpret_cast<uint8_t*>(&name[0]));
name.resize(length);
return name;
}
inline bool can_encode_everything() const
{
return encoding_can_encode_everything(this);
}
inline bool is_ascii_compatible() const
{
return encoding_is_ascii_compatible(this);
}
inline gsl::not_null<const Encoding*> output_encoding() const
{
return encoding_output_encoding(this);
}
inline std::tuple<std::string, const Encoding*, bool> decode(
gsl::span<const uint8_t> bytes) const
{
const Encoding* encoding;
size_t bom_length;
std::tie(encoding, bom_length) = Encoding::for_bom(bytes);
if (encoding) {
bytes = bytes.subspan(bom_length);
} else {
encoding = this;
}
bool had_errors;
std::string str;
std::tie(str, had_errors) = encoding->decode_without_bom_handling(bytes);
return std::make_tuple(str, encoding, had_errors);
}
inline std::tuple<std::string, bool> decode_with_bom_removal(
gsl::span<const uint8_t> bytes) const
{
if (this == UTF_8_ENCODING && bytes.size() >= 3 &&
(gsl::as_bytes(bytes.first<3>()) ==
gsl::as_bytes(gsl::make_span("\xEF\xBB\xBF")))) {
bytes = bytes.subspan(3, bytes.size() - 3);
} else if (this == UTF_16LE_ENCODING && bytes.size() >= 2 &&
(gsl::as_bytes(bytes.first<2>()) ==
gsl::as_bytes(gsl::make_span("\xFF\xFE")))) {
bytes = bytes.subspan(2, bytes.size() - 2);
} else if (this == UTF_16BE_ENCODING && bytes.size() >= 2 &&
(gsl::as_bytes(bytes.first<2>()) ==
gsl::as_bytes(gsl::make_span("\xFE\xFF")))) {
bytes = bytes.subspan(2, bytes.size() - 2);
}
return decode_without_bom_handling(bytes);
}
inline std::tuple<std::string, bool> decode_without_bom_handling(
gsl::span<const uint8_t> bytes) const
{
auto decoder = new_decoder_without_bom_handling();
size_t needed = decoder->max_utf8_buffer_length(bytes.size());
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
std::string string(needed, '\0');
uint32_t result;
size_t read;
size_t written;
bool had_errors;
std::tie(result, read, written, had_errors) = decoder->decode_to_utf8(
bytes,
gsl::make_span(reinterpret_cast<uint8_t*>(&string[0]), string.size()),
true);
assert(read == static_cast<size_t>(bytes.size()));
assert(written <= static_cast<size_t>(string.size()));
assert(result == INPUT_EMPTY);
string.resize(written);
return std::make_tuple(string, had_errors);
}
inline std::experimental::optional<std::string>
decode_without_bom_handling_and_without_replacement(
gsl::span<const uint8_t> bytes) const
{
auto decoder = new_decoder_without_bom_handling();
size_t needed = decoder->max_utf8_buffer_length_without_replacement(bytes.size());
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
std::string string(needed, '\0');
uint32_t result;
size_t read;
size_t written;
std::tie(result, read, written) =
decoder->decode_to_utf8_without_replacement(
bytes,
gsl::make_span(reinterpret_cast<uint8_t*>(&string[0]), string.size()),
true);
assert(result != OUTPUT_FULL);
if (result == INPUT_EMPTY) {
assert(read == static_cast<size_t>(bytes.size()));
assert(written <= static_cast<size_t>(string.size()));
string.resize(written);
return string;
}
}
inline std::tuple<std::u16string, const Encoding*, bool> decode16(
gsl::span<const uint8_t> bytes) const
{
const Encoding* encoding;
size_t bom_length;
std::tie(encoding, bom_length) = Encoding::for_bom(bytes);
if (encoding) {
bytes = bytes.subspan(bom_length);
} else {
encoding = this;
}
bool had_errors;
std::u16string str;
std::tie(str, had_errors) = encoding->decode16_without_bom_handling(bytes);
return std::make_tuple(str, encoding, had_errors);
}
inline std::tuple<std::u16string, bool> decode16_with_bom_removal(
gsl::span<const uint8_t> bytes) const
{
if (this == UTF_8_ENCODING && bytes.size() >= 3 &&
(gsl::as_bytes(bytes.first<3>()) ==
gsl::as_bytes(gsl::make_span("\xEF\xBB\xBF")))) {
bytes = bytes.subspan(3, bytes.size() - 3);
} else if (this == UTF_16LE_ENCODING && bytes.size() >= 2 &&
(gsl::as_bytes(bytes.first<2>()) ==
gsl::as_bytes(gsl::make_span("\xFF\xFE")))) {
bytes = bytes.subspan(2, bytes.size() - 2);
} else if (this == UTF_16BE_ENCODING && bytes.size() >= 2 &&
(gsl::as_bytes(bytes.first<2>()) ==
gsl::as_bytes(gsl::make_span("\xFE\xFF")))) {
bytes = bytes.subspan(2, bytes.size() - 2);
}
return decode16_without_bom_handling(bytes);
}
inline std::tuple<std::u16string, bool> decode16_without_bom_handling(
gsl::span<const uint8_t> bytes) const
{
auto decoder = new_decoder_without_bom_handling();
size_t needed = decoder->max_utf16_buffer_length(bytes.size());
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
std::u16string string(needed, '\0');
uint32_t result;
size_t read;
size_t written;
bool had_errors;
std::tie(result, read, written, had_errors) = decoder->decode_to_utf16(
bytes,
gsl::make_span(&string[0], string.size()),
true);
assert(read == static_cast<size_t>(bytes.size()));
assert(written <= static_cast<size_t>(string.size()));
assert(result == INPUT_EMPTY);
string.resize(written);
return std::make_tuple(string, had_errors);
}
inline std::experimental::optional<std::u16string>
decode16_without_bom_handling_and_without_replacement(
gsl::span<const uint8_t> bytes) const
{
auto decoder = new_decoder_without_bom_handling();
size_t needed = decoder->max_utf16_buffer_length(bytes.size());
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
std::u16string string(needed, '\0');
uint32_t result;
size_t read;
size_t written;
std::tie(result, read, written) =
decoder->decode_to_utf16_without_replacement(
bytes,
gsl::make_span(&string[0], string.size()),
true);
assert(result != OUTPUT_FULL);
if (result == INPUT_EMPTY) {
assert(read == static_cast<size_t>(bytes.size()));
assert(written <= static_cast<size_t>(string.size()));
string.resize(written);
return string;
}
}
inline std::tuple<std::vector<uint8_t>, const Encoding*, bool> encode(
gsl::span<const uint8_t> string) const
{
auto output_enc = output_encoding();
if (output_enc == UTF_8_ENCODING) {
std::vector<uint8_t> vec(string.size());
std::memcpy(&vec[0], string.data(), string.size());
}
auto encoder = output_enc->new_encoder();
size_t needed = encoder->max_buffer_length_from_utf8_if_no_unmappables(string.size());
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
std::vector<uint8_t> vec(needed);
bool total_had_errors = false;
size_t total_read = 0;
size_t total_written = 0;
uint32_t result;
size_t read;
size_t written;
bool had_errors;
for (;;) {
std::tie(result, read, written, had_errors) = encoder->encode_from_utf8(
gsl::make_span(string).subspan(total_read), gsl::make_span(vec).subspan(total_written), true);
total_read += read;
total_written += written;
total_had_errors |= had_errors;
if (result == INPUT_EMPTY) {
assert(total_read == static_cast<size_t>(string.size()));
assert(total_written <= static_cast<size_t>(vec.size()));
vec.resize(total_written);
return std::make_tuple(vec, output_enc, total_had_errors);
}
auto needed = encoder->max_buffer_length_from_utf8_if_no_unmappables(
string.size() - total_read);
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
vec.resize(total_written + needed);
}
}
inline std::tuple<std::vector<uint8_t>, const Encoding*, bool> encode(
gsl::span<const char16_t> string) const
{
auto output_enc = output_encoding();
auto encoder = output_enc->new_encoder();
size_t needed = encoder->max_buffer_length_from_utf16_if_no_unmappables(string.size());
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
std::vector<uint8_t> vec(needed);
bool total_had_errors = false;
size_t total_read = 0;
size_t total_written = 0;
uint32_t result;
size_t read;
size_t written;
bool had_errors;
for (;;) {
std::tie(result, read, written, had_errors) = encoder->encode_from_utf16(
gsl::make_span(string).subspan(total_read), gsl::make_span(vec).subspan(total_written), true);
total_read += read;
total_written += written;
total_had_errors |= had_errors;
if (result == INPUT_EMPTY) {
assert(total_read == static_cast<size_t>(string.size()));
assert(total_written <= static_cast<size_t>(vec.size()));
vec.resize(total_written);
return std::make_tuple(vec, output_enc, total_had_errors);
}
auto needed = encoder->max_buffer_length_from_utf16_if_no_unmappables(
string.size() - total_read);
if (needed == SIZE_MAX) {
throw std::overflow_error("Overflow in buffer size computation.");
}
vec.resize(total_written + needed);
}
}
inline std::unique_ptr<Decoder> new_decoder() const
{
std::unique_ptr<Decoder> decoder(encoding_new_decoder(this));
return decoder;
}
inline void new_decoder_into(Decoder& decoder) const
{
encoding_new_decoder_into(this, &decoder);
}
inline std::unique_ptr<Decoder> new_decoder_with_bom_removal() const
{
std::unique_ptr<Decoder> decoder(
encoding_new_decoder_with_bom_removal(this));
return decoder;
}
inline void new_decoder_with_bom_removal_into(Decoder& decoder) const
{
encoding_new_decoder_with_bom_removal_into(this, &decoder);
}
inline std::unique_ptr<Decoder> new_decoder_without_bom_handling() const
{
std::unique_ptr<Decoder> decoder(
encoding_new_decoder_without_bom_handling(this));
return decoder;
}
inline void new_decoder_without_bom_handling_into(Decoder& decoder) const
{
encoding_new_decoder_without_bom_handling_into(this, &decoder);
}
inline std::unique_ptr<Encoder> new_encoder() const
{
std::unique_ptr<Encoder> encoder(encoding_new_encoder(this));
return encoder;
}
inline void new_encoder_into(Encoder& encoder) const
{
encoding_new_encoder_into(this, &encoder);
}
static inline size_t utf8_valid_up_to(gsl::span<const uint8_t> buffer)
{
return encoding_utf8_valid_up_to(buffer.data(), buffer.size());
}
static inline size_t ascii_valid_up_to(gsl::span<const uint8_t> buffer)
{
return encoding_ascii_valid_up_to(buffer.data(), buffer.size());
}
static inline size_t iso_2022_jp_ascii_valid_up_to(
gsl::span<const uint8_t> buffer)
{
return encoding_iso_2022_jp_ascii_valid_up_to(buffer.data(), buffer.size());
}
private:
Encoding() = delete;
~Encoding() = delete;
};
#endif