transcoding_rs 0.1.1

Converts text encoding the easy and efficient way
Documentation
use encoding_rs as enc;

/// There is no evidence these appear more frequently than NON_TEXTS. It's just my guess.
pub const NON_TEXTS_FREQUENT: [char; 2] = [
    '\u{0000}', // Null character	NUL
    '\u{FFFD}', // REPLACEMENT CHARACTER
];
// These characters are based on the file command.
// https://github.com/file/file/blob/ac3fb1f582ea35c274ad776f26e57785c4cf976f/src/encoding.c#L236
pub const NON_TEXTS: [char; 24] = [
    '\u{0001}', // Start of Heading	SOH
    '\u{0002}', // Start of Text	STX
    '\u{0003}', // End-of-text character	ETX
    '\u{0004}', // End-of-transmission character	EOT
    '\u{0005}', // Enquiry character	ENQ
    '\u{0006}', // Acknowledge character	ACK
    // '\u{0007}', // Bell character	BEL
    // '\u{0008}', // Backspace	BS
    // '\u{0009}', // Horizontal tab	HT
    // '\u{000A}', // Line feed	LF
    // '\u{000B}', // Vertical tab	VT
    // '\u{000C}', // Form feed	FF
    // '\u{000D}', // Carriage return	CR
    '\u{000E}', // Shift Out	SO
    '\u{000F}', // Shift In	SI
    '\u{0010}', // Data Link Escape	DLE
    '\u{0011}', // Device Control 1	DC1
    '\u{0012}', // Device Control 2	DC2
    '\u{0013}', // Device Control 3	DC3
    '\u{0014}', // Device Control 4	DC4
    '\u{0015}', // Negative-acknowledge character	NAK
    '\u{0016}', // Synchronous Idle	SYN
    '\u{0017}', // End of Transmission Block	ETB
    '\u{0018}', // Cancel character	CAN
    '\u{0019}', // End of Medium	EM
    '\u{001A}', // Substitute character	SUB
    // '\u{001B}', // Escape character	ESC
    '\u{001C}', // File Separator	FS
    '\u{001D}', // Group Separator	GS
    '\u{001E}', // Record Separator	RS
    '\u{001F}', // Unit Separator	US
    // ...
    '\u{007F}', // Delete	DEL
];

/// List of supported encodings.
pub static ENCODINGS: [(&'static enc::Encoding, &'static str); 219] = [
    (&enc::BIG5_INIT,                 "big5",                 ),
    (&enc::BIG5_INIT,                 "big5-hkscs",           ),
    (&enc::BIG5_INIT,                 "cn-big5",              ),
    (&enc::BIG5_INIT,                 "csbig5",               ),
    (&enc::BIG5_INIT,                 "x-x-big5",             ),
    (&enc::EUC_JP_INIT,               "cseucpkdfmtjapanese",  ),
    (&enc::EUC_JP_INIT,               "euc-jp",               ),
    (&enc::EUC_JP_INIT,               "x-euc-jp",             ),
    (&enc::EUC_KR_INIT,               "cseuckr",              ),
    (&enc::EUC_KR_INIT,               "csksc56011987",        ),
    (&enc::EUC_KR_INIT,               "euc-kr",               ),
    (&enc::EUC_KR_INIT,               "iso-ir-149",           ),
    (&enc::EUC_KR_INIT,               "korean",               ),
    (&enc::EUC_KR_INIT,               "ks_c_5601-1987",       ),
    (&enc::EUC_KR_INIT,               "ks_c_5601-1989",       ),
    (&enc::EUC_KR_INIT,               "ksc5601",              ),
    (&enc::EUC_KR_INIT,               "ksc_5601",             ),
    (&enc::EUC_KR_INIT,               "windows-949",          ),
    (&enc::GB18030_INIT,              "gb18030",              ),
    (&enc::GBK_INIT,                  "chinese",              ),
    (&enc::GBK_INIT,                  "csgb2312",             ),
    (&enc::GBK_INIT,                  "csiso58gb231280",      ),
    (&enc::GBK_INIT,                  "gb2312",               ),
    (&enc::GBK_INIT,                  "gb_2312",              ),
    (&enc::GBK_INIT,                  "gb_2312-80",           ),
    (&enc::GBK_INIT,                  "gbk",                  ),
    (&enc::GBK_INIT,                  "iso-ir-58",            ),
    (&enc::GBK_INIT,                  "x-gbk",                ),
    (&enc::IBM866_INIT,               "866",                  ),
    (&enc::IBM866_INIT,               "cp866",                ),
    (&enc::IBM866_INIT,               "csibm866",             ),
    (&enc::IBM866_INIT,               "ibm866",               ),
    (&enc::ISO_2022_JP_INIT,          "csiso2022jp",          ),
    (&enc::ISO_2022_JP_INIT,          "iso-2022-jp",          ),
    (&enc::ISO_8859_10_INIT,          "csisolatin6",          ),
    (&enc::ISO_8859_10_INIT,          "iso-8859-10",          ),
    (&enc::ISO_8859_10_INIT,          "iso-ir-157",           ),
    (&enc::ISO_8859_10_INIT,          "iso8859-10",           ),
    (&enc::ISO_8859_10_INIT,          "iso885910",            ),
    (&enc::ISO_8859_10_INIT,          "l6",                   ),
    (&enc::ISO_8859_10_INIT,          "latin6",               ),
    (&enc::ISO_8859_13_INIT,          "iso-8859-13",          ),
    (&enc::ISO_8859_13_INIT,          "iso8859-13",           ),
    (&enc::ISO_8859_13_INIT,          "iso885913",            ),
    (&enc::ISO_8859_14_INIT,          "iso-8859-14",          ),
    (&enc::ISO_8859_14_INIT,          "iso8859-14",           ),
    (&enc::ISO_8859_14_INIT,          "iso885914",            ),
    (&enc::ISO_8859_15_INIT,          "csisolatin9",          ),
    (&enc::ISO_8859_15_INIT,          "iso-8859-15",          ),
    (&enc::ISO_8859_15_INIT,          "iso8859-15",           ),
    (&enc::ISO_8859_15_INIT,          "iso885915",            ),
    (&enc::ISO_8859_15_INIT,          "iso_8859-15",          ),
    (&enc::ISO_8859_15_INIT,          "l9",                   ),
    (&enc::ISO_8859_16_INIT,          "iso-8859-16",          ),
    (&enc::ISO_8859_2_INIT,           "csisolatin2",          ),
    (&enc::ISO_8859_2_INIT,           "iso-8859-2",           ),
    (&enc::ISO_8859_2_INIT,           "iso-ir-101",           ),
    (&enc::ISO_8859_2_INIT,           "iso8859-2",            ),
    (&enc::ISO_8859_2_INIT,           "iso88592",             ),
    (&enc::ISO_8859_2_INIT,           "iso_8859-2",           ),
    (&enc::ISO_8859_2_INIT,           "iso_8859-2:1987",      ),
    (&enc::ISO_8859_2_INIT,           "l2",                   ),
    (&enc::ISO_8859_2_INIT,           "latin2",               ),
    (&enc::ISO_8859_3_INIT,           "csisolatin3",          ),
    (&enc::ISO_8859_3_INIT,           "iso-8859-3",           ),
    (&enc::ISO_8859_3_INIT,           "iso-ir-109",           ),
    (&enc::ISO_8859_3_INIT,           "iso8859-3",            ),
    (&enc::ISO_8859_3_INIT,           "iso88593",             ),
    (&enc::ISO_8859_3_INIT,           "iso_8859-3",           ),
    (&enc::ISO_8859_3_INIT,           "iso_8859-3:1988",      ),
    (&enc::ISO_8859_3_INIT,           "l3",                   ),
    (&enc::ISO_8859_3_INIT,           "latin3",               ),
    (&enc::ISO_8859_4_INIT,           "csisolatin4",          ),
    (&enc::ISO_8859_4_INIT,           "iso-8859-4",           ),
    (&enc::ISO_8859_4_INIT,           "iso-ir-110",           ),
    (&enc::ISO_8859_4_INIT,           "iso8859-4",            ),
    (&enc::ISO_8859_4_INIT,           "iso88594",             ),
    (&enc::ISO_8859_4_INIT,           "iso_8859-4",           ),
    (&enc::ISO_8859_4_INIT,           "iso_8859-4:1988",      ),
    (&enc::ISO_8859_4_INIT,           "l4",                   ),
    (&enc::ISO_8859_4_INIT,           "latin4",               ),
    (&enc::ISO_8859_5_INIT,           "csisolatincyrillic",   ),
    (&enc::ISO_8859_5_INIT,           "cyrillic",             ),
    (&enc::ISO_8859_5_INIT,           "iso-8859-5",           ),
    (&enc::ISO_8859_5_INIT,           "iso-ir-144",           ),
    (&enc::ISO_8859_5_INIT,           "iso8859-5",            ),
    (&enc::ISO_8859_5_INIT,           "iso88595",             ),
    (&enc::ISO_8859_5_INIT,           "iso_8859-5",           ),
    (&enc::ISO_8859_5_INIT,           "iso_8859-5:1988",      ),
    (&enc::ISO_8859_6_INIT,           "arabic",               ),
    (&enc::ISO_8859_6_INIT,           "asmo-708",             ),
    (&enc::ISO_8859_6_INIT,           "csiso88596e",          ),
    (&enc::ISO_8859_6_INIT,           "csiso88596i",          ),
    (&enc::ISO_8859_6_INIT,           "csisolatinarabic",     ),
    (&enc::ISO_8859_6_INIT,           "ecma-114",             ),
    (&enc::ISO_8859_6_INIT,           "iso-8859-6",           ),
    (&enc::ISO_8859_6_INIT,           "iso-8859-6-e",         ),
    (&enc::ISO_8859_6_INIT,           "iso-8859-6-i",         ),
    (&enc::ISO_8859_6_INIT,           "iso-ir-127",           ),
    (&enc::ISO_8859_6_INIT,           "iso8859-6",            ),
    (&enc::ISO_8859_6_INIT,           "iso88596",             ),
    (&enc::ISO_8859_6_INIT,           "iso_8859-6",           ),
    (&enc::ISO_8859_6_INIT,           "iso_8859-6:1987",      ),
    (&enc::ISO_8859_7_INIT,           "csisolatingreek",      ),
    (&enc::ISO_8859_7_INIT,           "ecma-118",             ),
    (&enc::ISO_8859_7_INIT,           "elot_928",             ),
    (&enc::ISO_8859_7_INIT,           "greek",                ),
    (&enc::ISO_8859_7_INIT,           "greek8",               ),
    (&enc::ISO_8859_7_INIT,           "iso-8859-7",           ),
    (&enc::ISO_8859_7_INIT,           "iso-ir-126",           ),
    (&enc::ISO_8859_7_INIT,           "iso8859-7",            ),
    (&enc::ISO_8859_7_INIT,           "iso88597",             ),
    (&enc::ISO_8859_7_INIT,           "iso_8859-7",           ),
    (&enc::ISO_8859_7_INIT,           "iso_8859-7:1987",      ),
    (&enc::ISO_8859_7_INIT,           "sun_eu_greek",         ),
    (&enc::ISO_8859_8_INIT,           "csiso88598e",          ),
    (&enc::ISO_8859_8_INIT,           "csisolatinhebrew",     ),
    (&enc::ISO_8859_8_INIT,           "hebrew",               ),
    (&enc::ISO_8859_8_INIT,           "iso-8859-8",           ),
    (&enc::ISO_8859_8_INIT,           "iso-8859-8-e",         ),
    (&enc::ISO_8859_8_INIT,           "iso-ir-138",           ),
    (&enc::ISO_8859_8_INIT,           "iso8859-8",            ),
    (&enc::ISO_8859_8_INIT,           "iso88598",             ),
    (&enc::ISO_8859_8_INIT,           "iso_8859-8",           ),
    (&enc::ISO_8859_8_INIT,           "iso_8859-8:1988",      ),
    (&enc::ISO_8859_8_INIT,           "visual",               ),
    (&enc::ISO_8859_8_I_INIT,         "csiso88598i",          ),
    (&enc::ISO_8859_8_I_INIT,         "iso-8859-8-i",         ),
    (&enc::ISO_8859_8_I_INIT,         "logical",              ),
    (&enc::KOI8_R_INIT,               "cskoi8r",              ),
    (&enc::KOI8_R_INIT,               "koi",                  ),
    (&enc::KOI8_R_INIT,               "koi8",                 ),
    (&enc::KOI8_R_INIT,               "koi8-r",               ),
    (&enc::KOI8_R_INIT,               "koi8_r",               ),
    (&enc::KOI8_U_INIT,               "koi8-ru",              ),
    (&enc::KOI8_U_INIT,               "koi8-u",               ),
    (&enc::MACINTOSH_INIT,            "csmacintosh",          ),
    (&enc::MACINTOSH_INIT,            "mac",                  ),
    (&enc::MACINTOSH_INIT,            "macintosh",            ),
    (&enc::MACINTOSH_INIT,            "x-mac-roman",          ),
    (&enc::REPLACEMENT_INIT,          "csiso2022kr",          ),
    (&enc::REPLACEMENT_INIT,          "hz-gb-2312",           ),
    (&enc::REPLACEMENT_INIT,          "iso-2022-cn",          ),
    (&enc::REPLACEMENT_INIT,          "iso-2022-cn-ext",      ),
    (&enc::REPLACEMENT_INIT,          "iso-2022-kr",          ),
    (&enc::REPLACEMENT_INIT,          "replacement",          ),
    (&enc::SHIFT_JIS_INIT,            "csshiftjis",           ),
    (&enc::SHIFT_JIS_INIT,            "ms932",                ),
    (&enc::SHIFT_JIS_INIT,            "ms_kanji",             ),
    (&enc::SHIFT_JIS_INIT,            "shift-jis",            ),
    (&enc::SHIFT_JIS_INIT,            "shift_jis",            ),
    (&enc::SHIFT_JIS_INIT,            "sjis",                 ),
    (&enc::SHIFT_JIS_INIT,            "windows-31j",          ),
    (&enc::SHIFT_JIS_INIT,            "x-sjis",               ),
    (&enc::UTF_16BE_INIT,             "utf-16be",             ),
    (&enc::UTF_16LE_INIT,             "utf-16",               ),
    (&enc::UTF_16LE_INIT,             "utf-16le",             ),
    (&enc::UTF_8_INIT,                "unicode-1-1-utf-8",    ),
    (&enc::UTF_8_INIT,                "utf-8",                ),
    (&enc::UTF_8_INIT,                "utf8",                 ),
    (&enc::WINDOWS_1250_INIT,         "cp1250",               ),
    (&enc::WINDOWS_1250_INIT,         "windows-1250",         ),
    (&enc::WINDOWS_1250_INIT,         "x-cp1250",             ),
    (&enc::WINDOWS_1251_INIT,         "cp1251",               ),
    (&enc::WINDOWS_1251_INIT,         "windows-1251",         ),
    (&enc::WINDOWS_1251_INIT,         "x-cp1251",             ),
    (&enc::WINDOWS_1252_INIT,         "ansi_x3.4-1968",       ),
    (&enc::WINDOWS_1252_INIT,         "ascii",                ),
    (&enc::WINDOWS_1252_INIT,         "cp1252",               ),
    (&enc::WINDOWS_1252_INIT,         "cp819",                ),
    (&enc::WINDOWS_1252_INIT,         "csisolatin1",          ),
    (&enc::WINDOWS_1252_INIT,         "ibm819",               ),
    (&enc::WINDOWS_1252_INIT,         "iso-8859-1",           ),
    (&enc::WINDOWS_1252_INIT,         "iso-ir-100",           ),
    (&enc::WINDOWS_1252_INIT,         "iso8859-1",            ),
    (&enc::WINDOWS_1252_INIT,         "iso88591",             ),
    (&enc::WINDOWS_1252_INIT,         "iso_8859-1",           ),
    (&enc::WINDOWS_1252_INIT,         "iso_8859-1:1987",      ),
    (&enc::WINDOWS_1252_INIT,         "l1",                   ),
    (&enc::WINDOWS_1252_INIT,         "latin1",               ),
    (&enc::WINDOWS_1252_INIT,         "us-ascii",             ),
    (&enc::WINDOWS_1252_INIT,         "windows-1252",         ),
    (&enc::WINDOWS_1252_INIT,         "x-cp1252",             ),
    (&enc::WINDOWS_1253_INIT,         "cp1253",               ),
    (&enc::WINDOWS_1253_INIT,         "windows-1253",         ),
    (&enc::WINDOWS_1253_INIT,         "x-cp1253",             ),
    (&enc::WINDOWS_1254_INIT,         "cp1254",               ),
    (&enc::WINDOWS_1254_INIT,         "csisolatin5",          ),
    (&enc::WINDOWS_1254_INIT,         "iso-8859-9",           ),
    (&enc::WINDOWS_1254_INIT,         "iso-ir-148",           ),
    (&enc::WINDOWS_1254_INIT,         "iso8859-9",            ),
    (&enc::WINDOWS_1254_INIT,         "iso88599",             ),
    (&enc::WINDOWS_1254_INIT,         "iso_8859-9",           ),
    (&enc::WINDOWS_1254_INIT,         "iso_8859-9:1989",      ),
    (&enc::WINDOWS_1254_INIT,         "l5",                   ),
    (&enc::WINDOWS_1254_INIT,         "latin5",               ),
    (&enc::WINDOWS_1254_INIT,         "windows-1254",         ),
    (&enc::WINDOWS_1254_INIT,         "x-cp1254",             ),
    (&enc::WINDOWS_1255_INIT,         "cp1255",               ),
    (&enc::WINDOWS_1255_INIT,         "windows-1255",         ),
    (&enc::WINDOWS_1255_INIT,         "x-cp1255",             ),
    (&enc::WINDOWS_1256_INIT,         "cp1256",               ),
    (&enc::WINDOWS_1256_INIT,         "windows-1256",         ),
    (&enc::WINDOWS_1256_INIT,         "x-cp1256",             ),
    (&enc::WINDOWS_1257_INIT,         "cp1257",               ),
    (&enc::WINDOWS_1257_INIT,         "windows-1257",         ),
    (&enc::WINDOWS_1257_INIT,         "x-cp1257",             ),
    (&enc::WINDOWS_1258_INIT,         "cp1258",               ),
    (&enc::WINDOWS_1258_INIT,         "windows-1258",         ),
    (&enc::WINDOWS_1258_INIT,         "x-cp1258",             ),
    (&enc::WINDOWS_874_INIT,          "dos-874",              ),
    (&enc::WINDOWS_874_INIT,          "iso-8859-11",          ),
    (&enc::WINDOWS_874_INIT,          "iso8859-11",           ),
    (&enc::WINDOWS_874_INIT,          "iso885911",            ),
    (&enc::WINDOWS_874_INIT,          "tis-620",              ),
    (&enc::WINDOWS_874_INIT,          "windows-874",          ),
    (&enc::X_MAC_CYRILLIC_INIT,       "x-mac-cyrillic",       ),
    (&enc::X_MAC_CYRILLIC_INIT,       "x-mac-ukrainian",      ),
    (&enc::X_USER_DEFINED_INIT,       "x-user-defined",       ),
];