Skip to main content

anyxml_encoding/
lib.rs

1//! Provide a unified interface for encoders and decoders,
2//! and a default implementation for some encodings.
3//!
4//! If it is necessary to provide a custom decoder for XML processor, a type implementing
5//! the [`Decoder`] trait can be registered using the [`register_decoder`] function.
6//!
7//! By default, the encoding name provided to the [`register_encoder`] or [`register_decoder`]
8//! function is used to search for encoders and decoders.  \
9//! If it is necessary to assign multiple names to a single encoder or decoder,
10//! it is possible to set aliases for encoding names using [`register_encoding_alias`].
11//!
12//! The default encoding names and aliases are based on
13//! [IANA registrations](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
14
15mod ebcdic;
16mod euc;
17mod iso_8859;
18mod jisx;
19mod ksx;
20mod shift_jis;
21mod ucs4;
22mod us_ascii;
23mod utf16;
24mod utf8;
25
26use std::{
27    borrow::Cow,
28    collections::BTreeMap,
29    sync::{LazyLock, RwLock},
30};
31
32pub use ebcdic::*;
33pub use euc::*;
34pub use iso_8859::*;
35pub use shift_jis::*;
36pub use ucs4::*;
37pub use us_ascii::*;
38pub use utf8::*;
39pub use utf16::*;
40
41/// Encode error.
42#[derive(Debug, Clone)]
43pub enum EncodeError {
44    /// Input buffer is empty.
45    InputIsEmpty,
46    /// The length of the output buffer is too short.  \
47    /// If this error is returned, it is guaranteed that the encoder is consuming the input buffer.
48    OutputTooShort,
49    /// A UTF-8 character `c` cannot map any codepoints of the target encoding.
50    ///
51    /// The input and output buffer have consumed `read` and `write` bytes respectively.  \
52    /// `read` includes the length of `c`. Thus, the correctly read length is `read - c.len_utf8()`.  \
53    /// `write` does not include the length of `c` because encoder cannot write unmapped characters.
54    Unmappable { read: usize, write: usize, c: char },
55    /// Other errors.
56    Other { msg: Cow<'static, str> },
57}
58
59impl std::fmt::Display for EncodeError {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        write!(f, "{self:?}")
62    }
63}
64
65impl std::error::Error for EncodeError {}
66
67pub trait Encoder {
68    fn name(&self) -> &'static str;
69    /// If no error occurs, return `Ok((read_bytes, write_bytes))`.
70    fn encode(
71        &mut self,
72        src: &str,
73        dst: &mut [u8],
74        finish: bool,
75    ) -> Result<(usize, usize), EncodeError>;
76}
77
78/// Decode error.
79#[derive(Debug, Clone)]
80pub enum DecodeError {
81    /// Input buffer is empty.
82    InputIsEmpty,
83    /// The length of the output buffer is too short.  \
84    /// If this error is returned, it is guaranteed that the decoder is consuming the input buffer.
85    OutputTooShort,
86    /// Malformed byte sequence is found.
87    ///
88    /// The input and output buffer have consumed `read` and `write` bytes respectively.  \
89    /// Malformed sequence occurs `input[read-length-offset..read-offset]`.
90    Malformed {
91        read: usize,
92        write: usize,
93        length: usize,
94        offset: usize,
95    },
96    /// Other errors.
97    Other { msg: Cow<'static, str> },
98}
99
100impl std::fmt::Display for DecodeError {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        write!(f, "{self:?}")
103    }
104}
105
106impl std::error::Error for DecodeError {}
107
108pub trait Decoder {
109    fn name(&self) -> &'static str;
110    /// If no error occurs, return `Ok((read_bytes, write_bytes))`.
111    fn decode(
112        &mut self,
113        src: &[u8],
114        dst: &mut String,
115        finish: bool,
116    ) -> Result<(usize, usize), DecodeError>;
117}
118
119/// Supported encodings.
120///
121/// Encoding names are listed in lexical order.
122pub const DEFAULT_SUPPORTED_ENCODINGS: &[&str] = {
123    const NAMES: &[&str] = &[
124        EUCJP_NAME,
125        EUCKR_NAME,
126        IBM037,
127        IBM1026,
128        IBM273,
129        IBM274,
130        IBM275,
131        IBM277,
132        IBM278,
133        IBM280,
134        IBM284,
135        IBM285,
136        IBM290,
137        IBM297,
138        IBM420,
139        IBM423,
140        IBM424,
141        IBM437,
142        IBM500,
143        IBM850,
144        IBM851,
145        IBM852,
146        IBM855,
147        IBM857,
148        IBM860,
149        IBM861,
150        IBM862,
151        IBM863,
152        IBM864,
153        IBM865,
154        IBM868,
155        IBM869,
156        IBM870,
157        IBM871,
158        IBM880,
159        IBM891,
160        IBM903,
161        IBM904,
162        IBM905,
163        IBM918,
164        ISO_8859_10_NAME,
165        ISO_8859_13_NAME,
166        ISO_8859_14_NAME,
167        ISO_8859_15_NAME,
168        ISO_8859_16_NAME,
169        ISO_8859_1_NAME,
170        ISO_8859_2_NAME,
171        ISO_8859_3_NAME,
172        ISO_8859_4_NAME,
173        ISO_8859_5_NAME,
174        ISO_8859_6_NAME,
175        ISO_8859_7_NAME,
176        ISO_8859_8_NAME,
177        ISO_8859_9_NAME,
178        SHIFT_JIS_NAME,
179        ISO_8859_11_NAME,
180        US_ASCII_NAME,
181        UTF16_NAME,
182        UTF16BE_NAME,
183        UTF16LE_NAME,
184        UTF32_NAME,
185        UTF32BE_NAME,
186        UTF32LE_NAME,
187        UTF8_NAME,
188    ];
189    let len = NAMES.len();
190    let mut i = 0;
191    while i + 1 < len {
192        let x = NAMES[i].as_bytes();
193        let y = NAMES[i + 1].as_bytes();
194        let mut j = 0;
195        while j < x.len() {
196            assert!(x[j] <= y[j]);
197            if x[j] < y[j] {
198                break;
199            }
200            j += 1;
201            if j == x.len() {
202                break;
203            }
204            assert!(j < y.len());
205        }
206        i += 1;
207    }
208    NAMES
209};
210/// Manage aliases for encoding names.
211pub static ENCODING_ALIASES: LazyLock<RwLock<BTreeMap<Cow<'static, str>, &'static str>>> =
212    LazyLock::new(|| {
213        // To perform case-insensitive comparisons, capitalize all aliases.
214        RwLock::new(BTreeMap::from([
215            ("UTF8".into(), UTF8_NAME),
216            ("UTF16".into(), UTF16_NAME),
217            ("UTF16BE".into(), UTF16BE_NAME),
218            ("UTF16LE".into(), UTF16LE_NAME),
219            ("ISO-IR-100".into(), ISO_8859_1_NAME),
220            ("ISO_8859-1".into(), ISO_8859_1_NAME),
221            ("ISO-8859-1".into(), ISO_8859_1_NAME),
222            ("LATIN1".into(), ISO_8859_1_NAME),
223            ("L1".into(), ISO_8859_1_NAME),
224            ("IBM819".into(), ISO_8859_1_NAME),
225            ("CP819".into(), ISO_8859_1_NAME),
226            ("ISOLATIN1".into(), ISO_8859_1_NAME),
227            ("ISO-IR-101".into(), ISO_8859_2_NAME),
228            ("ISO_8859-2".into(), ISO_8859_2_NAME),
229            ("ISO-8859-2".into(), ISO_8859_2_NAME),
230            ("LATIN2".into(), ISO_8859_2_NAME),
231            ("L2".into(), ISO_8859_2_NAME),
232            ("ISOLATIN2".into(), ISO_8859_2_NAME),
233            ("ISO-IR-109".into(), ISO_8859_3_NAME),
234            ("ISO_8859-3".into(), ISO_8859_3_NAME),
235            ("ISO-8859-3".into(), ISO_8859_3_NAME),
236            ("LATIN3".into(), ISO_8859_3_NAME),
237            ("L3".into(), ISO_8859_3_NAME),
238            ("ISOLATIN3".into(), ISO_8859_3_NAME),
239            ("ISO-IR-110".into(), ISO_8859_4_NAME),
240            ("ISO_8859-4".into(), ISO_8859_4_NAME),
241            ("ISO-8859-4".into(), ISO_8859_4_NAME),
242            ("LATIN4".into(), ISO_8859_4_NAME),
243            ("L4".into(), ISO_8859_4_NAME),
244            ("ISOLATIN4".into(), ISO_8859_4_NAME),
245            ("ISO-IR-144".into(), ISO_8859_5_NAME),
246            ("ISO_8859-5".into(), ISO_8859_5_NAME),
247            ("ISO-8859-5".into(), ISO_8859_5_NAME),
248            ("CYRILLIC".into(), ISO_8859_5_NAME),
249            ("ISOLATINCYRILLIC".into(), ISO_8859_5_NAME),
250            ("ISO-IR-127".into(), ISO_8859_6_NAME),
251            ("ISO_8859-6".into(), ISO_8859_6_NAME),
252            ("ISO-8859-6".into(), ISO_8859_6_NAME),
253            ("ECMA-114".into(), ISO_8859_6_NAME),
254            ("ASMO-708".into(), ISO_8859_6_NAME),
255            ("ARABIC".into(), ISO_8859_6_NAME),
256            ("ISOLATINARABIC".into(), ISO_8859_6_NAME),
257            ("ISO-IR-126".into(), ISO_8859_7_NAME),
258            ("ISO_8859-7".into(), ISO_8859_7_NAME),
259            ("ISO-8859-7".into(), ISO_8859_7_NAME),
260            ("ELOT_928".into(), ISO_8859_7_NAME),
261            ("ECMA-118".into(), ISO_8859_7_NAME),
262            ("GREEK".into(), ISO_8859_7_NAME),
263            ("GREEK8".into(), ISO_8859_7_NAME),
264            ("ISOLATINGREEK".into(), ISO_8859_7_NAME),
265            ("ISO-IR-138".into(), ISO_8859_8_NAME),
266            ("ISO_8859-8".into(), ISO_8859_8_NAME),
267            ("ISO-8859-8".into(), ISO_8859_8_NAME),
268            ("HEBREW".into(), ISO_8859_8_NAME),
269            ("ISOLATINHEBREW".into(), ISO_8859_8_NAME),
270            ("ISO-IR-148".into(), ISO_8859_9_NAME),
271            ("ISO_8859-9".into(), ISO_8859_9_NAME),
272            ("ISO-8859-9".into(), ISO_8859_9_NAME),
273            ("LATIN5".into(), ISO_8859_9_NAME),
274            ("L5".into(), ISO_8859_9_NAME),
275            ("ISOLATIN5".into(), ISO_8859_9_NAME),
276            ("ISO-IR-157".into(), ISO_8859_10_NAME),
277            ("L6".into(), ISO_8859_10_NAME),
278            ("ISO_8859-10:1992".into(), ISO_8859_10_NAME),
279            ("ISOLATIN6".into(), ISO_8859_10_NAME),
280            ("LATIN6".into(), ISO_8859_10_NAME),
281            ("TIS620".into(), ISO_8859_11_NAME),
282            ("ISO-8859-11".into(), ISO_8859_11_NAME),
283            ("ISO885913".into(), ISO_8859_13_NAME),
284            ("ISO-IR-199".into(), ISO_8859_14_NAME),
285            ("ISO_8859-14:1998".into(), ISO_8859_14_NAME),
286            ("ISO_8859-14".into(), ISO_8859_14_NAME),
287            ("LATIN8".into(), ISO_8859_14_NAME),
288            ("ISO-CELTIC".into(), ISO_8859_14_NAME),
289            ("L8".into(), ISO_8859_14_NAME),
290            ("ISO885914".into(), ISO_8859_14_NAME),
291            ("ISO_8859-15".into(), ISO_8859_15_NAME),
292            ("LATIN-9".into(), ISO_8859_15_NAME),
293            ("ISO885915".into(), ISO_8859_15_NAME),
294            ("ISO-IR-226".into(), ISO_8859_16_NAME),
295            ("ISO_8859-16:2001".into(), ISO_8859_16_NAME),
296            ("ISO_8859-16".into(), ISO_8859_16_NAME),
297            ("LATIN10".into(), ISO_8859_16_NAME),
298            ("L10".into(), ISO_8859_16_NAME),
299            ("ISO885916".into(), ISO_8859_16_NAME),
300            ("UTF32".into(), UTF32_NAME),
301            ("UTF32BE".into(), UTF32BE_NAME),
302            ("UTF32LE".into(), UTF32LE_NAME),
303            ("MS_KANJI".into(), SHIFT_JIS_NAME),
304            ("SHIFTJIS".into(), SHIFT_JIS_NAME),
305            ("ISO-IR-6".into(), US_ASCII_NAME),
306            ("ANSI_X3.4-1968".into(), US_ASCII_NAME),
307            ("ANSI_X3.4-1986".into(), US_ASCII_NAME),
308            ("ISO_646.IRV:1991".into(), US_ASCII_NAME),
309            ("ISO646-US".into(), US_ASCII_NAME),
310            ("US-ASCII".into(), US_ASCII_NAME),
311            ("US".into(), US_ASCII_NAME),
312            ("IBM367".into(), US_ASCII_NAME),
313            ("CP367".into(), US_ASCII_NAME),
314            ("ASCII".into(), US_ASCII_NAME),
315            ("CP037".into(), IBM037),
316            ("EBCDIC-CP-US".into(), IBM037),
317            ("EBCDIC-CP-CA".into(), IBM037),
318            ("EBCDIC-CP-WT".into(), IBM037),
319            ("EBCDIC-CP-NL".into(), IBM037),
320            ("CP273".into(), IBM273),
321            ("EBCDIC-BE".into(), IBM274),
322            ("CP274".into(), IBM274),
323            ("EBCDIC-BR".into(), IBM275),
324            ("CP275".into(), IBM275),
325            ("EBCDIC-CP-DK".into(), IBM277),
326            ("EBCDIC-CP-NO".into(), IBM277),
327            ("CP278".into(), IBM278),
328            ("EBCDIC-CP-FI".into(), IBM278),
329            ("EBCDIC-CP-SE".into(), IBM278),
330            ("CP280".into(), IBM280),
331            ("EBCDIC-CP-IT".into(), IBM280),
332            ("CP284".into(), IBM284),
333            ("EBCDIC-CP-ES".into(), IBM284),
334            ("CP285".into(), IBM285),
335            ("EBCDIC-CP-GB".into(), IBM285),
336            ("CP290".into(), IBM290),
337            ("EBCDIC-JP-KANA".into(), IBM290),
338            ("CP297".into(), IBM297),
339            ("EBCDIC-CP-FR".into(), IBM297),
340            ("CP420".into(), IBM420),
341            ("EBCDIC-CP-AR1".into(), IBM420),
342            ("CP423".into(), IBM423),
343            ("EBCDIC-CP-GR".into(), IBM423),
344            ("CP424".into(), IBM424),
345            ("EBCDIC-CP-HE".into(), IBM424),
346            ("CP437".into(), IBM437),
347            ("437".into(), IBM437),
348            ("PC8CODEPAGE437".into(), IBM437),
349            ("CP500".into(), IBM500),
350            ("EBCDIC-CP-BE".into(), IBM500),
351            ("EBCDIC-CP-CH".into(), IBM500),
352            ("CP851".into(), IBM851),
353            ("851".into(), IBM851),
354            ("CP852".into(), IBM852),
355            ("852".into(), IBM852),
356            ("PCP852".into(), IBM852),
357            ("CP855".into(), IBM855),
358            ("855".into(), IBM855),
359            ("CP857".into(), IBM857),
360            ("857".into(), IBM857),
361            ("CP860".into(), IBM860),
362            ("860".into(), IBM860),
363            ("CP861".into(), IBM861),
364            ("861".into(), IBM861),
365            ("CP-IS".into(), IBM861),
366            ("CP863".into(), IBM863),
367            ("863".into(), IBM863),
368            ("CP864".into(), IBM864),
369            ("CP865".into(), IBM865),
370            ("865".into(), IBM865),
371            ("CP868".into(), IBM868),
372            ("CP-AR".into(), IBM868),
373            ("CP869".into(), IBM869),
374            ("869".into(), IBM869),
375            ("CP-GR".into(), IBM869),
376            ("CP870".into(), IBM870),
377            ("EBCDIC-CP-ROECE".into(), IBM870),
378            ("EBCDIC-CP-YU".into(), IBM870),
379            ("CP871".into(), IBM871),
380            ("EBCDIC-CP-IS".into(), IBM871),
381            ("CP880".into(), IBM880),
382            ("EBCDIC-CYRILLIC".into(), IBM880),
383            ("CP891".into(), IBM891),
384            ("CP903".into(), IBM903),
385            ("CP904".into(), IBM904),
386            ("904".into(), IBM904),
387            // is this correct ????
388            // But since it really says "IBBM", I'll just list it for now...
389            ("IBBM904".into(), IBM904),
390            ("CP905".into(), IBM905),
391            ("EBCDIC-CP-TR".into(), IBM905),
392            ("CP918".into(), IBM918),
393            ("EBCDIC-CP-AR2".into(), IBM918),
394            ("CP1026".into(), IBM1026),
395            ("EUCPKDFMTJAPANESE".into(), EUCJP_NAME),
396            ("EUCKR".into(), EUCKR_NAME),
397        ]))
398    });
399/// Register `alias` as an alias for the encoding name `real`.  \
400/// If `alias` is already an alias for another encoding name, overwrite it and return
401/// the encoding name before the overwrite.
402///
403/// It is assumed that real names and aliases will be linked based on the IANA list,
404/// but this is not required.  \
405/// However, since aliases do not redirect multiple times, `real` must be the name registered
406/// with the encoder/decoder.
407///
408/// If an encoding name becomes both a real name and an alias, searches may not work properly.
409///
410/// Reference: [Charcter sets registered by IANA](https://www.iana.org/assignments/character-sets/character-sets.xhtml)
411pub fn register_encoding_alias(alias: &'static str, real: &'static str) -> Option<&'static str> {
412    let mut table = ENCODING_ALIASES.write().unwrap();
413    if alias.chars().all(|c| c.is_ascii_uppercase()) {
414        table.insert(alias.into(), real)
415    } else {
416        table.insert(alias.to_ascii_uppercase().into(), real)
417    }
418}
419/// Unregister `alias` if it is registerd as an alias for an encoding name.  \
420/// If successfully removed, return the real name.
421pub fn unregister_encoding_alias(alias: &'static str) -> Option<&'static str> {
422    ENCODING_ALIASES
423        .write()
424        .unwrap()
425        .remove(alias.to_ascii_uppercase().as_str())
426}
427/// Retrieve the encoding name from `alias`, which is an alias for a certain encoding name.  \
428/// If retrieval fails, returns [`None`].
429///
430/// Alias comparisons are case-insensitive.
431pub fn resolve_encoding_alias(alias: &str) -> Option<&'static str> {
432    let aliases = ENCODING_ALIASES.read().unwrap();
433    aliases
434        .get(alias)
435        .or_else(|| aliases.get(alias.to_ascii_uppercase().as_str()))
436        .copied()
437}
438
439pub type EncoderFactory = fn() -> Box<dyn Encoder>;
440pub static ENCODER_TABLE: LazyLock<RwLock<BTreeMap<&'static str, EncoderFactory>>> =
441    LazyLock::new(|| {
442        let mut map = BTreeMap::<&'static str, EncoderFactory>::new();
443        map.insert(UTF8_NAME, || Box::new(UTF8Encoder));
444        map.insert(UTF16_NAME, || Box::new(UTF16Encoder::default()));
445        map.insert(UTF16BE_NAME, || Box::new(UTF16BEEncoder));
446        map.insert(UTF16LE_NAME, || Box::new(UTF16LEEncoder));
447        map.insert(ISO_8859_1_NAME, || Box::new(ISO8859_1Encoder));
448        map.insert(ISO_8859_2_NAME, || Box::new(ISO8859_2Encoder));
449        map.insert(ISO_8859_3_NAME, || Box::new(ISO8859_3Encoder));
450        map.insert(ISO_8859_4_NAME, || Box::new(ISO8859_4Encoder));
451        map.insert(ISO_8859_5_NAME, || Box::new(ISO8859_5Encoder));
452        map.insert(ISO_8859_6_NAME, || Box::new(ISO8859_6Encoder));
453        map.insert(ISO_8859_7_NAME, || Box::new(ISO8859_7Encoder));
454        map.insert(ISO_8859_8_NAME, || Box::new(ISO8859_8Encoder));
455        map.insert(ISO_8859_9_NAME, || Box::new(ISO8859_9Encoder));
456        map.insert(ISO_8859_10_NAME, || Box::new(ISO8859_10Encoder));
457        map.insert(ISO_8859_11_NAME, || Box::new(ISO8859_11Encoder));
458        map.insert(ISO_8859_13_NAME, || Box::new(ISO8859_13Encoder));
459        map.insert(ISO_8859_14_NAME, || Box::new(ISO8859_14Encoder));
460        map.insert(ISO_8859_15_NAME, || Box::new(ISO8859_15Encoder));
461        map.insert(ISO_8859_16_NAME, || Box::new(ISO8859_16Encoder));
462        map.insert(UTF32_NAME, || Box::new(UTF32Encoder::default()));
463        map.insert(UTF32BE_NAME, || Box::new(UTF32BEEncoder));
464        map.insert(UTF32LE_NAME, || Box::new(UTF32LEEncoder));
465        map.insert(SHIFT_JIS_NAME, || Box::new(ShiftJISEncoder));
466        map.insert(US_ASCII_NAME, || Box::new(USASCIIEncoder));
467        map.insert(IBM037, || Box::new(IBM037Encoder));
468        map.insert(IBM273, || Box::new(IBM273Encoder));
469        map.insert(IBM274, || Box::new(IBM274Encoder));
470        map.insert(IBM275, || Box::new(IBM275Encoder));
471        map.insert(IBM277, || Box::new(IBM277Encoder));
472        map.insert(IBM278, || Box::new(IBM278Encoder));
473        map.insert(IBM280, || Box::new(IBM280Encoder));
474        map.insert(IBM284, || Box::new(IBM284Encoder));
475        map.insert(IBM285, || Box::new(IBM285Encoder));
476        map.insert(IBM290, || Box::new(IBM290Encoder));
477        map.insert(IBM297, || Box::new(IBM297Encoder));
478        map.insert(IBM420, || Box::new(IBM420Encoder));
479        map.insert(IBM423, || Box::new(IBM423Encoder));
480        map.insert(IBM424, || Box::new(IBM424Encoder));
481        map.insert(IBM437, || Box::new(IBM437Encoder));
482        map.insert(IBM500, || Box::new(IBM500Encoder));
483        map.insert(IBM850, || Box::new(IBM850Encoder));
484        map.insert(IBM851, || Box::new(IBM851Encoder));
485        map.insert(IBM852, || Box::new(IBM852Encoder));
486        map.insert(IBM855, || Box::new(IBM855Encoder));
487        map.insert(IBM857, || Box::new(IBM857Encoder));
488        map.insert(IBM860, || Box::new(IBM860Encoder));
489        map.insert(IBM861, || Box::new(IBM861Encoder));
490        map.insert(IBM862, || Box::new(IBM862Encoder));
491        map.insert(IBM863, || Box::new(IBM863Encoder));
492        map.insert(IBM864, || Box::new(IBM864Encoder));
493        map.insert(IBM865, || Box::new(IBM865Encoder));
494        map.insert(IBM868, || Box::new(IBM868Encoder));
495        map.insert(IBM869, || Box::new(IBM869Encoder));
496        map.insert(IBM870, || Box::new(IBM870Encoder));
497        map.insert(IBM871, || Box::new(IBM871Encoder));
498        map.insert(IBM880, || Box::new(IBM880Encoder));
499        map.insert(IBM891, || Box::new(IBM891Encoder));
500        map.insert(IBM903, || Box::new(IBM903Encoder));
501        map.insert(IBM904, || Box::new(IBM904Encoder));
502        map.insert(IBM905, || Box::new(IBM905Encoder));
503        map.insert(IBM918, || Box::new(IBM918Encoder));
504        map.insert(IBM1026, || Box::new(IBM1026Encoder));
505        map.insert(EUCJP_NAME, eucjp_encoder_factory);
506        map.insert(EUCKR_NAME, euckr_encoder_factory);
507        RwLock::new(map)
508    });
509pub fn find_encoder(encoding_name: &str) -> Option<Box<dyn Encoder>> {
510    let table = ENCODER_TABLE.read().unwrap();
511    if let Some(factory) = table.get(encoding_name) {
512        return Some(factory());
513    }
514    if let Some(factory) = table.get(encoding_name.to_ascii_uppercase().as_str()) {
515        return Some(factory());
516    }
517
518    let alias = resolve_encoding_alias(encoding_name)?;
519    table.get(alias).map(|f| f())
520}
521pub fn register_encoder(
522    encoding_name: &'static str,
523    factory: EncoderFactory,
524) -> Option<EncoderFactory> {
525    ENCODER_TABLE
526        .write()
527        .unwrap()
528        .insert(encoding_name, factory)
529}
530pub fn unregister_encoder(encoding_name: &str) -> Option<EncoderFactory> {
531    ENCODER_TABLE.write().unwrap().remove(encoding_name)
532}
533
534pub type DecoderFactory = fn() -> Box<dyn Decoder>;
535pub static DECODER_TABLE: LazyLock<RwLock<BTreeMap<&'static str, DecoderFactory>>> =
536    LazyLock::new(|| {
537        let mut map = BTreeMap::<&'static str, DecoderFactory>::new();
538        map.insert(UTF8_NAME, || Box::new(UTF8Decoder));
539        map.insert(UTF16_NAME, || Box::new(UTF16Decoder::default()));
540        map.insert(UTF16BE_NAME, || Box::new(UTF16BEDecoder));
541        map.insert(UTF16LE_NAME, || Box::new(UTF16LEDecoder));
542        map.insert(ISO_8859_1_NAME, || Box::new(ISO8859_1Decoder));
543        map.insert(ISO_8859_2_NAME, || Box::new(ISO8859_2Decoder));
544        map.insert(ISO_8859_3_NAME, || Box::new(ISO8859_3Decoder));
545        map.insert(ISO_8859_4_NAME, || Box::new(ISO8859_4Decoder));
546        map.insert(ISO_8859_5_NAME, || Box::new(ISO8859_5Decoder));
547        map.insert(ISO_8859_6_NAME, || Box::new(ISO8859_6Decoder));
548        map.insert(ISO_8859_7_NAME, || Box::new(ISO8859_7Decoder));
549        map.insert(ISO_8859_8_NAME, || Box::new(ISO8859_8Decoder));
550        map.insert(ISO_8859_9_NAME, || Box::new(ISO8859_9Decoder));
551        map.insert(ISO_8859_10_NAME, || Box::new(ISO8859_10Decoder));
552        map.insert(ISO_8859_11_NAME, || Box::new(ISO8859_11Decoder));
553        map.insert(ISO_8859_13_NAME, || Box::new(ISO8859_13Decoder));
554        map.insert(ISO_8859_14_NAME, || Box::new(ISO8859_14Decoder));
555        map.insert(ISO_8859_15_NAME, || Box::new(ISO8859_15Decoder));
556        map.insert(ISO_8859_16_NAME, || Box::new(ISO8859_16Decoder));
557        map.insert(UTF32_NAME, || Box::new(UTF32Decoder::default()));
558        map.insert(UTF32BE_NAME, || Box::new(UTF32BEDecoder));
559        map.insert(UTF32LE_NAME, || Box::new(UTF32LEDecoder));
560        map.insert(SHIFT_JIS_NAME, || Box::new(ShiftJISDecoder));
561        map.insert(US_ASCII_NAME, || Box::new(USASCIIDecoder));
562        map.insert(IBM037, || Box::new(IBM037Decoder));
563        map.insert(IBM273, || Box::new(IBM273Decoder));
564        map.insert(IBM274, || Box::new(IBM274Decoder));
565        map.insert(IBM275, || Box::new(IBM275Decoder));
566        map.insert(IBM277, || Box::new(IBM277Decoder));
567        map.insert(IBM278, || Box::new(IBM278Decoder));
568        map.insert(IBM280, || Box::new(IBM280Decoder));
569        map.insert(IBM284, || Box::new(IBM284Decoder));
570        map.insert(IBM285, || Box::new(IBM285Decoder));
571        map.insert(IBM290, || Box::new(IBM290Decoder));
572        map.insert(IBM297, || Box::new(IBM297Decoder));
573        map.insert(IBM420, || Box::new(IBM420Decoder));
574        map.insert(IBM423, || Box::new(IBM423Decoder));
575        map.insert(IBM424, || Box::new(IBM424Decoder));
576        map.insert(IBM437, || Box::new(IBM437Decoder));
577        map.insert(IBM500, || Box::new(IBM500Decoder));
578        map.insert(IBM850, || Box::new(IBM850Decoder));
579        map.insert(IBM851, || Box::new(IBM851Decoder));
580        map.insert(IBM852, || Box::new(IBM852Decoder));
581        map.insert(IBM855, || Box::new(IBM855Decoder));
582        map.insert(IBM857, || Box::new(IBM857Decoder));
583        map.insert(IBM860, || Box::new(IBM860Decoder));
584        map.insert(IBM861, || Box::new(IBM861Decoder));
585        map.insert(IBM862, || Box::new(IBM862Decoder));
586        map.insert(IBM863, || Box::new(IBM863Decoder));
587        map.insert(IBM864, || Box::new(IBM864Decoder));
588        map.insert(IBM865, || Box::new(IBM865Decoder));
589        map.insert(IBM868, || Box::new(IBM868Decoder));
590        map.insert(IBM869, || Box::new(IBM869Decoder));
591        map.insert(IBM870, || Box::new(IBM870Decoder));
592        map.insert(IBM871, || Box::new(IBM871Decoder));
593        map.insert(IBM880, || Box::new(IBM880Decoder));
594        map.insert(IBM891, || Box::new(IBM891Decoder));
595        map.insert(IBM903, || Box::new(IBM903Decoder));
596        map.insert(IBM904, || Box::new(IBM904Decoder));
597        map.insert(IBM905, || Box::new(IBM905Decoder));
598        map.insert(IBM918, || Box::new(IBM918Decoder));
599        map.insert(IBM1026, || Box::new(IBM1026Decoder));
600        map.insert(EUCJP_NAME, eucjp_decoder_factory);
601        map.insert(EUCKR_NAME, euckr_decoder_factory);
602        RwLock::new(map)
603    });
604pub fn find_decoder(encoding_name: &str) -> Option<Box<dyn Decoder>> {
605    let table = DECODER_TABLE.read().unwrap();
606    if let Some(factory) = table.get(encoding_name) {
607        return Some(factory());
608    }
609    if let Some(factory) = table.get(encoding_name.to_ascii_uppercase().as_str()) {
610        return Some(factory());
611    }
612
613    let alias = resolve_encoding_alias(encoding_name)?;
614    table.get(alias).map(|f| f())
615}
616pub fn register_decoder(
617    encoding_name: &'static str,
618    factory: DecoderFactory,
619) -> Option<DecoderFactory> {
620    DECODER_TABLE
621        .write()
622        .unwrap()
623        .insert(encoding_name, factory)
624}
625pub fn unregister_decoder(encoding_name: &str) -> Option<DecoderFactory> {
626    DECODER_TABLE.write().unwrap().remove(encoding_name)
627}