Skip to main content

anyxml_encoding/
lib.rs

1//! Provide a unified interface for encoders and decoders,
2//! and a default implementation for some encodings.
3//!
4//! If it is necessary to provide a custom decoder for XML processor, a type implementing
5//! the [`Decoder`] trait can be registered using the [`register_decoder`] function.
6//!
7//! By default, the encoding name provided to the [`register_encoder`] or [`register_decoder`]
8//! function is used to search for encoders and decoders.  \
9//! If it is necessary to assign multiple names to a single encoder or decoder,
10//! it is possible to set aliases for encoding names using [`register_encoding_alias`].
11//!
12//! The default encoding names and aliases are based on
13//! [IANA registrations](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
14
15mod ebcdic;
16mod euc;
17mod iso_8859;
18mod jisx;
19mod ksx;
20mod shift_jis;
21mod ucs4;
22mod us_ascii;
23mod utf16;
24mod utf8;
25
26use std::{
27    borrow::Cow,
28    collections::BTreeMap,
29    sync::{LazyLock, RwLock},
30};
31
32pub use ebcdic::*;
33pub use euc::*;
34pub use iso_8859::*;
35pub use shift_jis::*;
36pub use ucs4::*;
37pub use us_ascii::*;
38pub use utf8::*;
39pub use utf16::*;
40
41#[derive(Debug, Clone)]
42pub enum EncodeError {
43    /// Input buffer is empty.
44    InputIsEmpty,
45    /// The length of the output buffer is too short.  
46    /// If this error is returned, it is guaranteed that the encoder is consuming the input buffer.
47    OutputTooShort,
48    /// A UTF-8 character `c` cannot map any codepoints of the target encoding.
49    ///
50    /// The input and output buffer have consumed `read` and `write` bytes respectively.  
51    /// `read` includes the length of `c`. Thus, the correctly read length is `read - c.len_utf8()`.  
52    /// `write` does not include the length of `c` because encoder cannot write unmapped characters.
53    Unmappable { read: usize, write: usize, c: char },
54    /// Other errors.
55    Other { msg: Cow<'static, str> },
56}
57
58impl std::fmt::Display for EncodeError {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        write!(f, "{self:?}")
61    }
62}
63
64impl std::error::Error for EncodeError {}
65
66pub trait Encoder {
67    fn name(&self) -> &'static str;
68    /// If no error occurs, return `Ok((read_bytes, write_bytes))`.
69    fn encode(
70        &mut self,
71        src: &str,
72        dst: &mut [u8],
73        finish: bool,
74    ) -> Result<(usize, usize), EncodeError>;
75}
76
77#[derive(Debug, Clone)]
78pub enum DecodeError {
79    /// Input buffer is empty.
80    InputIsEmpty,
81    /// The length of the output buffer is too short.  
82    /// If this error is returned, it is guaranteed that the decoder is consuming the input buffer.
83    OutputTooShort,
84    /// Malformed byte sequence is found.  
85    ///
86    /// The input and output buffer have consumed `read` and `write` bytes respectively.  
87    /// Malformed sequence occurs `input[read-length-offset..read-offset]`.  
88    Malformed {
89        read: usize,
90        write: usize,
91        length: usize,
92        offset: usize,
93    },
94    /// Other errors.
95    Other { msg: Cow<'static, str> },
96}
97
98impl std::fmt::Display for DecodeError {
99    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100        write!(f, "{self:?}")
101    }
102}
103
104impl std::error::Error for DecodeError {}
105
106pub trait Decoder {
107    fn name(&self) -> &'static str;
108    /// If no error occurs, return `Ok((read_bytes, write_bytes))`.
109    fn decode(
110        &mut self,
111        src: &[u8],
112        dst: &mut String,
113        finish: bool,
114    ) -> Result<(usize, usize), DecodeError>;
115}
116
117/// Supported encodings.  
118///
119/// Encoding names are listed in lexical order.
120pub const DEFAULT_SUPPORTED_ENCODINGS: &[&str] = {
121    const NAMES: &[&str] = &[
122        EUCJP_NAME,
123        EUCKR_NAME,
124        IBM037,
125        IBM1026,
126        IBM273,
127        IBM274,
128        IBM275,
129        IBM277,
130        IBM278,
131        IBM280,
132        IBM284,
133        IBM285,
134        IBM290,
135        IBM297,
136        IBM420,
137        IBM423,
138        IBM424,
139        IBM437,
140        IBM500,
141        IBM850,
142        IBM851,
143        IBM852,
144        IBM855,
145        IBM857,
146        IBM860,
147        IBM861,
148        IBM862,
149        IBM863,
150        IBM864,
151        IBM865,
152        IBM868,
153        IBM869,
154        IBM870,
155        IBM871,
156        IBM880,
157        IBM891,
158        IBM903,
159        IBM904,
160        IBM905,
161        IBM918,
162        ISO_8859_10_NAME,
163        ISO_8859_13_NAME,
164        ISO_8859_14_NAME,
165        ISO_8859_15_NAME,
166        ISO_8859_16_NAME,
167        ISO_8859_1_NAME,
168        ISO_8859_2_NAME,
169        ISO_8859_3_NAME,
170        ISO_8859_4_NAME,
171        ISO_8859_5_NAME,
172        ISO_8859_6_NAME,
173        ISO_8859_7_NAME,
174        ISO_8859_8_NAME,
175        ISO_8859_9_NAME,
176        SHIFT_JIS_NAME,
177        ISO_8859_11_NAME,
178        US_ASCII_NAME,
179        UTF16_NAME,
180        UTF16BE_NAME,
181        UTF16LE_NAME,
182        UTF32_NAME,
183        UTF32BE_NAME,
184        UTF32LE_NAME,
185        UTF8_NAME,
186    ];
187    let len = NAMES.len();
188    let mut i = 0;
189    while i + 1 < len {
190        let x = NAMES[i].as_bytes();
191        let y = NAMES[i + 1].as_bytes();
192        let mut j = 0;
193        while j < x.len() {
194            assert!(x[j] <= y[j]);
195            if x[j] < y[j] {
196                break;
197            }
198            j += 1;
199            if j == x.len() {
200                break;
201            }
202            assert!(j < y.len());
203        }
204        i += 1;
205    }
206    NAMES
207};
208/// Manage aliases for encoding names.
209pub static ENCODING_ALIASES: LazyLock<RwLock<BTreeMap<Cow<'static, str>, &'static str>>> =
210    LazyLock::new(|| {
211        // To perform case-insensitive comparisons, capitalize all aliases.
212        RwLock::new(BTreeMap::from([
213            ("UTF8".into(), UTF8_NAME),
214            ("UTF16".into(), UTF16_NAME),
215            ("UTF16BE".into(), UTF16BE_NAME),
216            ("UTF16LE".into(), UTF16LE_NAME),
217            ("ISO-IR-100".into(), ISO_8859_1_NAME),
218            ("ISO_8859-1".into(), ISO_8859_1_NAME),
219            ("ISO-8859-1".into(), ISO_8859_1_NAME),
220            ("LATIN1".into(), ISO_8859_1_NAME),
221            ("L1".into(), ISO_8859_1_NAME),
222            ("IBM819".into(), ISO_8859_1_NAME),
223            ("CP819".into(), ISO_8859_1_NAME),
224            ("ISOLATIN1".into(), ISO_8859_1_NAME),
225            ("ISO-IR-101".into(), ISO_8859_2_NAME),
226            ("ISO_8859-2".into(), ISO_8859_2_NAME),
227            ("ISO-8859-2".into(), ISO_8859_2_NAME),
228            ("LATIN2".into(), ISO_8859_2_NAME),
229            ("L2".into(), ISO_8859_2_NAME),
230            ("ISOLATIN2".into(), ISO_8859_2_NAME),
231            ("ISO-IR-109".into(), ISO_8859_3_NAME),
232            ("ISO_8859-3".into(), ISO_8859_3_NAME),
233            ("ISO-8859-3".into(), ISO_8859_3_NAME),
234            ("LATIN3".into(), ISO_8859_3_NAME),
235            ("L3".into(), ISO_8859_3_NAME),
236            ("ISOLATIN3".into(), ISO_8859_3_NAME),
237            ("ISO-IR-110".into(), ISO_8859_4_NAME),
238            ("ISO_8859-4".into(), ISO_8859_4_NAME),
239            ("ISO-8859-4".into(), ISO_8859_4_NAME),
240            ("LATIN4".into(), ISO_8859_4_NAME),
241            ("L4".into(), ISO_8859_4_NAME),
242            ("ISOLATIN4".into(), ISO_8859_4_NAME),
243            ("ISO-IR-144".into(), ISO_8859_5_NAME),
244            ("ISO_8859-5".into(), ISO_8859_5_NAME),
245            ("ISO-8859-5".into(), ISO_8859_5_NAME),
246            ("CYRILLIC".into(), ISO_8859_5_NAME),
247            ("ISOLATINCYRILLIC".into(), ISO_8859_5_NAME),
248            ("ISO-IR-127".into(), ISO_8859_6_NAME),
249            ("ISO_8859-6".into(), ISO_8859_6_NAME),
250            ("ISO-8859-6".into(), ISO_8859_6_NAME),
251            ("ECMA-114".into(), ISO_8859_6_NAME),
252            ("ASMO-708".into(), ISO_8859_6_NAME),
253            ("ARABIC".into(), ISO_8859_6_NAME),
254            ("ISOLATINARABIC".into(), ISO_8859_6_NAME),
255            ("ISO-IR-126".into(), ISO_8859_7_NAME),
256            ("ISO_8859-7".into(), ISO_8859_7_NAME),
257            ("ISO-8859-7".into(), ISO_8859_7_NAME),
258            ("ELOT_928".into(), ISO_8859_7_NAME),
259            ("ECMA-118".into(), ISO_8859_7_NAME),
260            ("GREEK".into(), ISO_8859_7_NAME),
261            ("GREEK8".into(), ISO_8859_7_NAME),
262            ("ISOLATINGREEK".into(), ISO_8859_7_NAME),
263            ("ISO-IR-138".into(), ISO_8859_8_NAME),
264            ("ISO_8859-8".into(), ISO_8859_8_NAME),
265            ("ISO-8859-8".into(), ISO_8859_8_NAME),
266            ("HEBREW".into(), ISO_8859_8_NAME),
267            ("ISOLATINHEBREW".into(), ISO_8859_8_NAME),
268            ("ISO-IR-148".into(), ISO_8859_9_NAME),
269            ("ISO_8859-9".into(), ISO_8859_9_NAME),
270            ("ISO-8859-9".into(), ISO_8859_9_NAME),
271            ("LATIN5".into(), ISO_8859_9_NAME),
272            ("L5".into(), ISO_8859_9_NAME),
273            ("ISOLATIN5".into(), ISO_8859_9_NAME),
274            ("ISO-IR-157".into(), ISO_8859_10_NAME),
275            ("L6".into(), ISO_8859_10_NAME),
276            ("ISO_8859-10:1992".into(), ISO_8859_10_NAME),
277            ("ISOLATIN6".into(), ISO_8859_10_NAME),
278            ("LATIN6".into(), ISO_8859_10_NAME),
279            ("TIS620".into(), ISO_8859_11_NAME),
280            ("ISO-8859-11".into(), ISO_8859_11_NAME),
281            ("ISO885913".into(), ISO_8859_13_NAME),
282            ("ISO-IR-199".into(), ISO_8859_14_NAME),
283            ("ISO_8859-14:1998".into(), ISO_8859_14_NAME),
284            ("ISO_8859-14".into(), ISO_8859_14_NAME),
285            ("LATIN8".into(), ISO_8859_14_NAME),
286            ("ISO-CELTIC".into(), ISO_8859_14_NAME),
287            ("L8".into(), ISO_8859_14_NAME),
288            ("ISO885914".into(), ISO_8859_14_NAME),
289            ("ISO_8859-15".into(), ISO_8859_15_NAME),
290            ("LATIN-9".into(), ISO_8859_15_NAME),
291            ("ISO885915".into(), ISO_8859_15_NAME),
292            ("ISO-IR-226".into(), ISO_8859_16_NAME),
293            ("ISO_8859-16:2001".into(), ISO_8859_16_NAME),
294            ("ISO_8859-16".into(), ISO_8859_16_NAME),
295            ("LATIN10".into(), ISO_8859_16_NAME),
296            ("L10".into(), ISO_8859_16_NAME),
297            ("ISO885916".into(), ISO_8859_16_NAME),
298            ("UTF32".into(), UTF32_NAME),
299            ("UTF32BE".into(), UTF32BE_NAME),
300            ("UTF32LE".into(), UTF32LE_NAME),
301            ("MS_KANJI".into(), SHIFT_JIS_NAME),
302            ("SHIFTJIS".into(), SHIFT_JIS_NAME),
303            ("ISO-IR-6".into(), US_ASCII_NAME),
304            ("ANSI_X3.4-1968".into(), US_ASCII_NAME),
305            ("ANSI_X3.4-1986".into(), US_ASCII_NAME),
306            ("ISO_646.IRV:1991".into(), US_ASCII_NAME),
307            ("ISO646-US".into(), US_ASCII_NAME),
308            ("US-ASCII".into(), US_ASCII_NAME),
309            ("US".into(), US_ASCII_NAME),
310            ("IBM367".into(), US_ASCII_NAME),
311            ("CP367".into(), US_ASCII_NAME),
312            ("ASCII".into(), US_ASCII_NAME),
313            ("CP037".into(), IBM037),
314            ("EBCDIC-CP-US".into(), IBM037),
315            ("EBCDIC-CP-CA".into(), IBM037),
316            ("EBCDIC-CP-WT".into(), IBM037),
317            ("EBCDIC-CP-NL".into(), IBM037),
318            ("CP273".into(), IBM273),
319            ("EBCDIC-BE".into(), IBM274),
320            ("CP274".into(), IBM274),
321            ("EBCDIC-BR".into(), IBM275),
322            ("CP275".into(), IBM275),
323            ("EBCDIC-CP-DK".into(), IBM277),
324            ("EBCDIC-CP-NO".into(), IBM277),
325            ("CP278".into(), IBM278),
326            ("EBCDIC-CP-FI".into(), IBM278),
327            ("EBCDIC-CP-SE".into(), IBM278),
328            ("CP280".into(), IBM280),
329            ("EBCDIC-CP-IT".into(), IBM280),
330            ("CP284".into(), IBM284),
331            ("EBCDIC-CP-ES".into(), IBM284),
332            ("CP285".into(), IBM285),
333            ("EBCDIC-CP-GB".into(), IBM285),
334            ("CP290".into(), IBM290),
335            ("EBCDIC-JP-KANA".into(), IBM290),
336            ("CP297".into(), IBM297),
337            ("EBCDIC-CP-FR".into(), IBM297),
338            ("CP420".into(), IBM420),
339            ("EBCDIC-CP-AR1".into(), IBM420),
340            ("CP423".into(), IBM423),
341            ("EBCDIC-CP-GR".into(), IBM423),
342            ("CP424".into(), IBM424),
343            ("EBCDIC-CP-HE".into(), IBM424),
344            ("CP437".into(), IBM437),
345            ("437".into(), IBM437),
346            ("PC8CODEPAGE437".into(), IBM437),
347            ("CP500".into(), IBM500),
348            ("EBCDIC-CP-BE".into(), IBM500),
349            ("EBCDIC-CP-CH".into(), IBM500),
350            ("CP851".into(), IBM851),
351            ("851".into(), IBM851),
352            ("CP852".into(), IBM852),
353            ("852".into(), IBM852),
354            ("PCP852".into(), IBM852),
355            ("CP855".into(), IBM855),
356            ("855".into(), IBM855),
357            ("CP857".into(), IBM857),
358            ("857".into(), IBM857),
359            ("CP860".into(), IBM860),
360            ("860".into(), IBM860),
361            ("CP861".into(), IBM861),
362            ("861".into(), IBM861),
363            ("CP-IS".into(), IBM861),
364            ("CP863".into(), IBM863),
365            ("863".into(), IBM863),
366            ("CP864".into(), IBM864),
367            ("CP865".into(), IBM865),
368            ("865".into(), IBM865),
369            ("CP868".into(), IBM868),
370            ("CP-AR".into(), IBM868),
371            ("CP869".into(), IBM869),
372            ("869".into(), IBM869),
373            ("CP-GR".into(), IBM869),
374            ("CP870".into(), IBM870),
375            ("EBCDIC-CP-ROECE".into(), IBM870),
376            ("EBCDIC-CP-YU".into(), IBM870),
377            ("CP871".into(), IBM871),
378            ("EBCDIC-CP-IS".into(), IBM871),
379            ("CP880".into(), IBM880),
380            ("EBCDIC-CYRILLIC".into(), IBM880),
381            ("CP891".into(), IBM891),
382            ("CP903".into(), IBM903),
383            ("CP904".into(), IBM904),
384            ("904".into(), IBM904),
385            // is this correct ????
386            // But since it really says "IBBM", I'll just list it for now...
387            ("IBBM904".into(), IBM904),
388            ("CP905".into(), IBM905),
389            ("EBCDIC-CP-TR".into(), IBM905),
390            ("CP918".into(), IBM918),
391            ("EBCDIC-CP-AR2".into(), IBM918),
392            ("CP1026".into(), IBM1026),
393            ("EUCPKDFMTJAPANESE".into(), EUCJP_NAME),
394            ("EUCKR".into(), EUCKR_NAME),
395        ]))
396    });
397/// Register `alias` as an alias for the encoding name `real`.  \
398/// If `alias` is already an alias for another encoding name, overwrite it and return
399/// the encoding name before the overwrite.
400///
401/// It is assumed that real names and aliases will be linked based on the IANA list,
402/// but this is not required.  \
403/// However, since aliases do not redirect multiple times, `real` must be the name registered
404/// with the encoder/decoder.
405///
406/// If an encoding name becomes both a real name and an alias, searches may not work properly.
407///
408/// Reference: [Charcter sets registered by IANA](https://www.iana.org/assignments/character-sets/character-sets.xhtml)
409pub fn register_encoding_alias(alias: &'static str, real: &'static str) -> Option<&'static str> {
410    let mut table = ENCODING_ALIASES.write().unwrap();
411    if alias.chars().all(|c| c.is_ascii_uppercase()) {
412        table.insert(alias.into(), real)
413    } else {
414        table.insert(alias.to_ascii_uppercase().into(), real)
415    }
416}
417/// Unregister `alias` if it is registerd as an alias for an encoding name.  \
418/// If successfully removed, return the real name.
419pub fn unregister_encoding_alias(alias: &'static str) -> Option<&'static str> {
420    ENCODING_ALIASES
421        .write()
422        .unwrap()
423        .remove(alias.to_ascii_uppercase().as_str())
424}
425/// Retrieve the encoding name from `alias`, which is an alias for a certain encoding name.  \
426/// If retrieval fails, returns [`None`].
427///
428/// Alias comparisons are case-insensitive.
429pub fn resolve_encoding_alias(alias: &str) -> Option<&'static str> {
430    let aliases = ENCODING_ALIASES.read().unwrap();
431    aliases
432        .get(alias)
433        .or_else(|| aliases.get(alias.to_ascii_uppercase().as_str()))
434        .copied()
435}
436
437pub type EncoderFactory = fn() -> Box<dyn Encoder>;
438pub static ENCODER_TABLE: LazyLock<RwLock<BTreeMap<&'static str, EncoderFactory>>> =
439    LazyLock::new(|| {
440        let mut map = BTreeMap::<&'static str, EncoderFactory>::new();
441        map.insert(UTF8_NAME, || Box::new(UTF8Encoder));
442        map.insert(UTF16_NAME, || Box::new(UTF16Encoder::default()));
443        map.insert(UTF16BE_NAME, || Box::new(UTF16BEEncoder));
444        map.insert(UTF16LE_NAME, || Box::new(UTF16LEEncoder));
445        map.insert(ISO_8859_1_NAME, || Box::new(ISO8859_1Encoder));
446        map.insert(ISO_8859_2_NAME, || Box::new(ISO8859_2Encoder));
447        map.insert(ISO_8859_3_NAME, || Box::new(ISO8859_3Encoder));
448        map.insert(ISO_8859_4_NAME, || Box::new(ISO8859_4Encoder));
449        map.insert(ISO_8859_5_NAME, || Box::new(ISO8859_5Encoder));
450        map.insert(ISO_8859_6_NAME, || Box::new(ISO8859_6Encoder));
451        map.insert(ISO_8859_7_NAME, || Box::new(ISO8859_7Encoder));
452        map.insert(ISO_8859_8_NAME, || Box::new(ISO8859_8Encoder));
453        map.insert(ISO_8859_9_NAME, || Box::new(ISO8859_9Encoder));
454        map.insert(ISO_8859_10_NAME, || Box::new(ISO8859_10Encoder));
455        map.insert(ISO_8859_11_NAME, || Box::new(ISO8859_11Encoder));
456        map.insert(ISO_8859_13_NAME, || Box::new(ISO8859_13Encoder));
457        map.insert(ISO_8859_14_NAME, || Box::new(ISO8859_14Encoder));
458        map.insert(ISO_8859_15_NAME, || Box::new(ISO8859_15Encoder));
459        map.insert(ISO_8859_16_NAME, || Box::new(ISO8859_16Encoder));
460        map.insert(UTF32_NAME, || Box::new(UTF32Encoder::default()));
461        map.insert(UTF32BE_NAME, || Box::new(UTF32BEEncoder));
462        map.insert(UTF32LE_NAME, || Box::new(UTF32LEEncoder));
463        map.insert(SHIFT_JIS_NAME, || Box::new(ShiftJISEncoder));
464        map.insert(US_ASCII_NAME, || Box::new(USASCIIEncoder));
465        map.insert(IBM037, || Box::new(IBM037Encoder));
466        map.insert(IBM273, || Box::new(IBM273Encoder));
467        map.insert(IBM274, || Box::new(IBM274Encoder));
468        map.insert(IBM275, || Box::new(IBM275Encoder));
469        map.insert(IBM277, || Box::new(IBM277Encoder));
470        map.insert(IBM278, || Box::new(IBM278Encoder));
471        map.insert(IBM280, || Box::new(IBM280Encoder));
472        map.insert(IBM284, || Box::new(IBM284Encoder));
473        map.insert(IBM285, || Box::new(IBM285Encoder));
474        map.insert(IBM290, || Box::new(IBM290Encoder));
475        map.insert(IBM297, || Box::new(IBM297Encoder));
476        map.insert(IBM420, || Box::new(IBM420Encoder));
477        map.insert(IBM423, || Box::new(IBM423Encoder));
478        map.insert(IBM424, || Box::new(IBM424Encoder));
479        map.insert(IBM437, || Box::new(IBM437Encoder));
480        map.insert(IBM500, || Box::new(IBM500Encoder));
481        map.insert(IBM850, || Box::new(IBM850Encoder));
482        map.insert(IBM851, || Box::new(IBM851Encoder));
483        map.insert(IBM852, || Box::new(IBM852Encoder));
484        map.insert(IBM855, || Box::new(IBM855Encoder));
485        map.insert(IBM857, || Box::new(IBM857Encoder));
486        map.insert(IBM860, || Box::new(IBM860Encoder));
487        map.insert(IBM861, || Box::new(IBM861Encoder));
488        map.insert(IBM862, || Box::new(IBM862Encoder));
489        map.insert(IBM863, || Box::new(IBM863Encoder));
490        map.insert(IBM864, || Box::new(IBM864Encoder));
491        map.insert(IBM865, || Box::new(IBM865Encoder));
492        map.insert(IBM868, || Box::new(IBM868Encoder));
493        map.insert(IBM869, || Box::new(IBM869Encoder));
494        map.insert(IBM870, || Box::new(IBM870Encoder));
495        map.insert(IBM871, || Box::new(IBM871Encoder));
496        map.insert(IBM880, || Box::new(IBM880Encoder));
497        map.insert(IBM891, || Box::new(IBM891Encoder));
498        map.insert(IBM903, || Box::new(IBM903Encoder));
499        map.insert(IBM904, || Box::new(IBM904Encoder));
500        map.insert(IBM905, || Box::new(IBM905Encoder));
501        map.insert(IBM918, || Box::new(IBM918Encoder));
502        map.insert(IBM1026, || Box::new(IBM1026Encoder));
503        map.insert(EUCJP_NAME, eucjp_encoder_factory);
504        map.insert(EUCKR_NAME, euckr_encoder_factory);
505        RwLock::new(map)
506    });
507pub fn find_encoder(encoding_name: &str) -> Option<Box<dyn Encoder>> {
508    let table = ENCODER_TABLE.read().unwrap();
509    if let Some(factory) = table.get(encoding_name) {
510        return Some(factory());
511    }
512    if let Some(factory) = table.get(encoding_name.to_ascii_uppercase().as_str()) {
513        return Some(factory());
514    }
515
516    let alias = resolve_encoding_alias(encoding_name)?;
517    table.get(alias).map(|f| f())
518}
519pub fn register_encoder(
520    encoding_name: &'static str,
521    factory: EncoderFactory,
522) -> Option<EncoderFactory> {
523    ENCODER_TABLE
524        .write()
525        .unwrap()
526        .insert(encoding_name, factory)
527}
528pub fn unregister_encoder(encoding_name: &str) -> Option<EncoderFactory> {
529    ENCODER_TABLE.write().unwrap().remove(encoding_name)
530}
531
532pub type DecoderFactory = fn() -> Box<dyn Decoder>;
533pub static DECODER_TABLE: LazyLock<RwLock<BTreeMap<&'static str, DecoderFactory>>> =
534    LazyLock::new(|| {
535        let mut map = BTreeMap::<&'static str, DecoderFactory>::new();
536        map.insert(UTF8_NAME, || Box::new(UTF8Decoder));
537        map.insert(UTF16_NAME, || Box::new(UTF16Decoder::default()));
538        map.insert(UTF16BE_NAME, || Box::new(UTF16BEDecoder));
539        map.insert(UTF16LE_NAME, || Box::new(UTF16LEDecoder));
540        map.insert(ISO_8859_1_NAME, || Box::new(ISO8859_1Decoder));
541        map.insert(ISO_8859_2_NAME, || Box::new(ISO8859_2Decoder));
542        map.insert(ISO_8859_3_NAME, || Box::new(ISO8859_3Decoder));
543        map.insert(ISO_8859_4_NAME, || Box::new(ISO8859_4Decoder));
544        map.insert(ISO_8859_5_NAME, || Box::new(ISO8859_5Decoder));
545        map.insert(ISO_8859_6_NAME, || Box::new(ISO8859_6Decoder));
546        map.insert(ISO_8859_7_NAME, || Box::new(ISO8859_7Decoder));
547        map.insert(ISO_8859_8_NAME, || Box::new(ISO8859_8Decoder));
548        map.insert(ISO_8859_9_NAME, || Box::new(ISO8859_9Decoder));
549        map.insert(ISO_8859_10_NAME, || Box::new(ISO8859_10Decoder));
550        map.insert(ISO_8859_11_NAME, || Box::new(ISO8859_11Decoder));
551        map.insert(ISO_8859_13_NAME, || Box::new(ISO8859_13Decoder));
552        map.insert(ISO_8859_14_NAME, || Box::new(ISO8859_14Decoder));
553        map.insert(ISO_8859_15_NAME, || Box::new(ISO8859_15Decoder));
554        map.insert(ISO_8859_16_NAME, || Box::new(ISO8859_16Decoder));
555        map.insert(UTF32_NAME, || Box::new(UTF32Decoder::default()));
556        map.insert(UTF32BE_NAME, || Box::new(UTF32BEDecoder));
557        map.insert(UTF32LE_NAME, || Box::new(UTF32LEDecoder));
558        map.insert(SHIFT_JIS_NAME, || Box::new(ShiftJISDecoder));
559        map.insert(US_ASCII_NAME, || Box::new(USASCIIDecoder));
560        map.insert(IBM037, || Box::new(IBM037Decoder));
561        map.insert(IBM273, || Box::new(IBM273Decoder));
562        map.insert(IBM274, || Box::new(IBM274Decoder));
563        map.insert(IBM275, || Box::new(IBM275Decoder));
564        map.insert(IBM277, || Box::new(IBM277Decoder));
565        map.insert(IBM278, || Box::new(IBM278Decoder));
566        map.insert(IBM280, || Box::new(IBM280Decoder));
567        map.insert(IBM284, || Box::new(IBM284Decoder));
568        map.insert(IBM285, || Box::new(IBM285Decoder));
569        map.insert(IBM290, || Box::new(IBM290Decoder));
570        map.insert(IBM297, || Box::new(IBM297Decoder));
571        map.insert(IBM420, || Box::new(IBM420Decoder));
572        map.insert(IBM423, || Box::new(IBM423Decoder));
573        map.insert(IBM424, || Box::new(IBM424Decoder));
574        map.insert(IBM437, || Box::new(IBM437Decoder));
575        map.insert(IBM500, || Box::new(IBM500Decoder));
576        map.insert(IBM850, || Box::new(IBM850Decoder));
577        map.insert(IBM851, || Box::new(IBM851Decoder));
578        map.insert(IBM852, || Box::new(IBM852Decoder));
579        map.insert(IBM855, || Box::new(IBM855Decoder));
580        map.insert(IBM857, || Box::new(IBM857Decoder));
581        map.insert(IBM860, || Box::new(IBM860Decoder));
582        map.insert(IBM861, || Box::new(IBM861Decoder));
583        map.insert(IBM862, || Box::new(IBM862Decoder));
584        map.insert(IBM863, || Box::new(IBM863Decoder));
585        map.insert(IBM864, || Box::new(IBM864Decoder));
586        map.insert(IBM865, || Box::new(IBM865Decoder));
587        map.insert(IBM868, || Box::new(IBM868Decoder));
588        map.insert(IBM869, || Box::new(IBM869Decoder));
589        map.insert(IBM870, || Box::new(IBM870Decoder));
590        map.insert(IBM871, || Box::new(IBM871Decoder));
591        map.insert(IBM880, || Box::new(IBM880Decoder));
592        map.insert(IBM891, || Box::new(IBM891Decoder));
593        map.insert(IBM903, || Box::new(IBM903Decoder));
594        map.insert(IBM904, || Box::new(IBM904Decoder));
595        map.insert(IBM905, || Box::new(IBM905Decoder));
596        map.insert(IBM918, || Box::new(IBM918Decoder));
597        map.insert(IBM1026, || Box::new(IBM1026Decoder));
598        map.insert(EUCJP_NAME, eucjp_decoder_factory);
599        map.insert(EUCKR_NAME, euckr_decoder_factory);
600        RwLock::new(map)
601    });
602pub fn find_decoder(encoding_name: &str) -> Option<Box<dyn Decoder>> {
603    let table = DECODER_TABLE.read().unwrap();
604    if let Some(factory) = table.get(encoding_name) {
605        return Some(factory());
606    }
607    if let Some(factory) = table.get(encoding_name.to_ascii_uppercase().as_str()) {
608        return Some(factory());
609    }
610
611    let alias = resolve_encoding_alias(encoding_name)?;
612    table.get(alias).map(|f| f())
613}
614pub fn register_decoder(
615    encoding_name: &'static str,
616    factory: DecoderFactory,
617) -> Option<DecoderFactory> {
618    DECODER_TABLE
619        .write()
620        .unwrap()
621        .insert(encoding_name, factory)
622}
623pub fn unregister_decoder(encoding_name: &str) -> Option<DecoderFactory> {
624    DECODER_TABLE.write().unwrap().remove(encoding_name)
625}