Skip to main content

anyxml_encoding/
lib.rs

1//! Provide a unified interface for encoders and decoders,
2//! and a default implementation for some encodings.
3//!
4//! If it is necessary to provide a custom decoder for XML processor, a type implementing
5//! the [`Decoder`] trait can be registered using the [`register_decoder`] function.
6//!
7//! By default, the encoding name provided to the [`register_encoder`] or [`register_decoder`]
8//! function is used to search for encoders and decoders.  \
9//! If it is necessary to assign multiple names to a single encoder or decoder,
10//! it is possible to set aliases for encoding names using [`register_encoding_alias`].
11//!
12//! The default encoding names and aliases are based on
13//! [IANA registrations](https://www.iana.org/assignments/character-sets/character-sets.xhtml).
14
15mod ebcdic;
16mod iso_8859;
17mod shift_jis;
18mod ucs4;
19mod us_ascii;
20mod utf16;
21mod utf8;
22
23use std::{
24    borrow::Cow,
25    collections::BTreeMap,
26    sync::{LazyLock, RwLock},
27};
28
29pub use ebcdic::*;
30pub use iso_8859::*;
31pub use shift_jis::*;
32pub use ucs4::*;
33pub use us_ascii::*;
34pub use utf8::*;
35pub use utf16::*;
36
37#[derive(Debug, Clone)]
38pub enum EncodeError {
39    /// Input buffer is empty.
40    InputIsEmpty,
41    /// The length of the output buffer is too short.  
42    /// If this error is returned, it is guaranteed that the encoder is consuming the input buffer.
43    OutputTooShort,
44    /// A UTF-8 character `c` cannot map any codepoints of the target encoding.
45    ///
46    /// The input and output buffer have consumed `read` and `write` bytes respectively.  
47    /// `read` includes the length of `c`. Thus, the correctly read length is `read - c.len_utf8()`.  
48    /// `write` does not include the length of `c` because encoder cannot write unmapped characters.
49    Unmappable { read: usize, write: usize, c: char },
50    /// Other errors.
51    Other { msg: Cow<'static, str> },
52}
53
54impl std::fmt::Display for EncodeError {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        write!(f, "{self:?}")
57    }
58}
59
60impl std::error::Error for EncodeError {}
61
62pub trait Encoder {
63    fn name(&self) -> &'static str;
64    /// If no error occurs, return `Ok((read_bytes, write_bytes))`.
65    fn encode(
66        &mut self,
67        src: &str,
68        dst: &mut [u8],
69        finish: bool,
70    ) -> Result<(usize, usize), EncodeError>;
71}
72
73#[derive(Debug, Clone)]
74pub enum DecodeError {
75    /// Input buffer is empty.
76    InputIsEmpty,
77    /// The length of the output buffer is too short.  
78    /// If this error is returned, it is guaranteed that the decoder is consuming the input buffer.
79    OutputTooShort,
80    /// Malformed byte sequence is found.  
81    ///
82    /// The input and output buffer have consumed `read` and `write` bytes respectively.  
83    /// Malformed sequence occurs `input[read-length-offset..read-offset]`.  
84    Malformed {
85        read: usize,
86        write: usize,
87        length: usize,
88        offset: usize,
89    },
90    /// Other errors.
91    Other { msg: Cow<'static, str> },
92}
93
94impl std::fmt::Display for DecodeError {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        write!(f, "{self:?}")
97    }
98}
99
100impl std::error::Error for DecodeError {}
101
102pub trait Decoder {
103    fn name(&self) -> &'static str;
104    /// If no error occurs, return `Ok((read_bytes, write_bytes))`.
105    fn decode(
106        &mut self,
107        src: &[u8],
108        dst: &mut String,
109        finish: bool,
110    ) -> Result<(usize, usize), DecodeError>;
111}
112
113/// Supported encodings.  
114///
115/// Encoding names are listed in lexical order.
116pub const DEFAULT_SUPPORTED_ENCODINGS: &[&str] = {
117    const NAMES: &[&str] = &[
118        IBM037,
119        IBM1026,
120        IBM273,
121        IBM274,
122        IBM275,
123        IBM277,
124        IBM278,
125        IBM280,
126        IBM284,
127        IBM285,
128        IBM290,
129        IBM297,
130        IBM420,
131        IBM423,
132        IBM424,
133        IBM437,
134        IBM500,
135        IBM850,
136        IBM851,
137        IBM852,
138        IBM855,
139        IBM857,
140        IBM860,
141        IBM861,
142        IBM862,
143        IBM863,
144        IBM864,
145        IBM865,
146        IBM868,
147        IBM869,
148        IBM870,
149        IBM871,
150        IBM880,
151        IBM891,
152        IBM903,
153        IBM904,
154        IBM905,
155        IBM918,
156        ISO_8859_10_NAME,
157        ISO_8859_13_NAME,
158        ISO_8859_14_NAME,
159        ISO_8859_15_NAME,
160        ISO_8859_16_NAME,
161        ISO_8859_1_NAME,
162        ISO_8859_2_NAME,
163        ISO_8859_3_NAME,
164        ISO_8859_4_NAME,
165        ISO_8859_5_NAME,
166        ISO_8859_6_NAME,
167        ISO_8859_7_NAME,
168        ISO_8859_8_NAME,
169        ISO_8859_9_NAME,
170        SHIFT_JIS_NAME,
171        ISO_8859_11_NAME,
172        US_ASCII_NAME,
173        UTF16_NAME,
174        UTF16BE_NAME,
175        UTF16LE_NAME,
176        UTF32_NAME,
177        UTF32BE_NAME,
178        UTF32LE_NAME,
179        UTF8_NAME,
180    ];
181    let len = NAMES.len();
182    let mut i = 0;
183    while i + 1 < len {
184        let x = NAMES[i].as_bytes();
185        let y = NAMES[i + 1].as_bytes();
186        let mut j = 0;
187        while j < x.len() {
188            assert!(x[j] <= y[j]);
189            if x[j] < y[j] {
190                break;
191            }
192            j += 1;
193            if j == x.len() {
194                break;
195            }
196            assert!(j < y.len());
197        }
198        i += 1;
199    }
200    NAMES
201};
202/// Manage aliases for encoding names.
203pub static ENCODING_ALIASES: LazyLock<RwLock<BTreeMap<Cow<'static, str>, &'static str>>> =
204    LazyLock::new(|| {
205        // To perform case-insensitive comparisons, capitalize all aliases.
206        RwLock::new(BTreeMap::from([
207            ("UTF8".into(), UTF8_NAME),
208            ("UTF16".into(), UTF16_NAME),
209            ("UTF16BE".into(), UTF16BE_NAME),
210            ("UTF16LE".into(), UTF16LE_NAME),
211            ("ISO-IR-100".into(), ISO_8859_1_NAME),
212            ("ISO_8859-1".into(), ISO_8859_1_NAME),
213            ("ISO-8859-1".into(), ISO_8859_1_NAME),
214            ("LATIN1".into(), ISO_8859_1_NAME),
215            ("L1".into(), ISO_8859_1_NAME),
216            ("IBM819".into(), ISO_8859_1_NAME),
217            ("CP819".into(), ISO_8859_1_NAME),
218            ("ISOLATIN1".into(), ISO_8859_1_NAME),
219            ("ISO-IR-101".into(), ISO_8859_2_NAME),
220            ("ISO_8859-2".into(), ISO_8859_2_NAME),
221            ("ISO-8859-2".into(), ISO_8859_2_NAME),
222            ("LATIN2".into(), ISO_8859_2_NAME),
223            ("L2".into(), ISO_8859_2_NAME),
224            ("ISOLATIN2".into(), ISO_8859_2_NAME),
225            ("ISO-IR-109".into(), ISO_8859_3_NAME),
226            ("ISO_8859-3".into(), ISO_8859_3_NAME),
227            ("ISO-8859-3".into(), ISO_8859_3_NAME),
228            ("LATIN3".into(), ISO_8859_3_NAME),
229            ("L3".into(), ISO_8859_3_NAME),
230            ("ISOLATIN3".into(), ISO_8859_3_NAME),
231            ("ISO-IR-110".into(), ISO_8859_4_NAME),
232            ("ISO_8859-4".into(), ISO_8859_4_NAME),
233            ("ISO-8859-4".into(), ISO_8859_4_NAME),
234            ("LATIN4".into(), ISO_8859_4_NAME),
235            ("L4".into(), ISO_8859_4_NAME),
236            ("ISOLATIN4".into(), ISO_8859_4_NAME),
237            ("ISO-IR-144".into(), ISO_8859_5_NAME),
238            ("ISO_8859-5".into(), ISO_8859_5_NAME),
239            ("ISO-8859-5".into(), ISO_8859_5_NAME),
240            ("CYRILLIC".into(), ISO_8859_5_NAME),
241            ("ISOLATINCYRILLIC".into(), ISO_8859_5_NAME),
242            ("ISO-IR-127".into(), ISO_8859_6_NAME),
243            ("ISO_8859-6".into(), ISO_8859_6_NAME),
244            ("ISO-8859-6".into(), ISO_8859_6_NAME),
245            ("ECMA-114".into(), ISO_8859_6_NAME),
246            ("ASMO-708".into(), ISO_8859_6_NAME),
247            ("ARABIC".into(), ISO_8859_6_NAME),
248            ("ISOLATINARABIC".into(), ISO_8859_6_NAME),
249            ("ISO-IR-126".into(), ISO_8859_7_NAME),
250            ("ISO_8859-7".into(), ISO_8859_7_NAME),
251            ("ISO-8859-7".into(), ISO_8859_7_NAME),
252            ("ELOT_928".into(), ISO_8859_7_NAME),
253            ("ECMA-118".into(), ISO_8859_7_NAME),
254            ("GREEK".into(), ISO_8859_7_NAME),
255            ("GREEK8".into(), ISO_8859_7_NAME),
256            ("ISOLATINGREEK".into(), ISO_8859_7_NAME),
257            ("ISO-IR-138".into(), ISO_8859_8_NAME),
258            ("ISO_8859-8".into(), ISO_8859_8_NAME),
259            ("ISO-8859-8".into(), ISO_8859_8_NAME),
260            ("HEBREW".into(), ISO_8859_8_NAME),
261            ("ISOLATINHEBREW".into(), ISO_8859_8_NAME),
262            ("ISO-IR-148".into(), ISO_8859_9_NAME),
263            ("ISO_8859-9".into(), ISO_8859_9_NAME),
264            ("ISO-8859-9".into(), ISO_8859_9_NAME),
265            ("LATIN5".into(), ISO_8859_9_NAME),
266            ("L5".into(), ISO_8859_9_NAME),
267            ("ISOLATIN5".into(), ISO_8859_9_NAME),
268            ("ISO-IR-157".into(), ISO_8859_10_NAME),
269            ("L6".into(), ISO_8859_10_NAME),
270            ("ISO_8859-10:1992".into(), ISO_8859_10_NAME),
271            ("ISOLATIN6".into(), ISO_8859_10_NAME),
272            ("LATIN6".into(), ISO_8859_10_NAME),
273            ("TIS620".into(), ISO_8859_11_NAME),
274            ("ISO-8859-11".into(), ISO_8859_11_NAME),
275            ("ISO885913".into(), ISO_8859_13_NAME),
276            ("ISO-IR-199".into(), ISO_8859_14_NAME),
277            ("ISO_8859-14:1998".into(), ISO_8859_14_NAME),
278            ("ISO_8859-14".into(), ISO_8859_14_NAME),
279            ("LATIN8".into(), ISO_8859_14_NAME),
280            ("ISO-CELTIC".into(), ISO_8859_14_NAME),
281            ("L8".into(), ISO_8859_14_NAME),
282            ("ISO885914".into(), ISO_8859_14_NAME),
283            ("ISO_8859-15".into(), ISO_8859_15_NAME),
284            ("LATIN-9".into(), ISO_8859_15_NAME),
285            ("ISO885915".into(), ISO_8859_15_NAME),
286            ("ISO-IR-226".into(), ISO_8859_16_NAME),
287            ("ISO_8859-16:2001".into(), ISO_8859_16_NAME),
288            ("ISO_8859-16".into(), ISO_8859_16_NAME),
289            ("LATIN10".into(), ISO_8859_16_NAME),
290            ("L10".into(), ISO_8859_16_NAME),
291            ("ISO885916".into(), ISO_8859_16_NAME),
292            ("UTF32".into(), UTF32_NAME),
293            ("UTF32BE".into(), UTF32BE_NAME),
294            ("UTF32LE".into(), UTF32LE_NAME),
295            ("MS_KANJI".into(), SHIFT_JIS_NAME),
296            ("SHIFTJIS".into(), SHIFT_JIS_NAME),
297            ("ISO-IR-6".into(), US_ASCII_NAME),
298            ("ANSI_X3.4-1968".into(), US_ASCII_NAME),
299            ("ANSI_X3.4-1986".into(), US_ASCII_NAME),
300            ("ISO_646.IRV:1991".into(), US_ASCII_NAME),
301            ("ISO646-US".into(), US_ASCII_NAME),
302            ("US-ASCII".into(), US_ASCII_NAME),
303            ("US".into(), US_ASCII_NAME),
304            ("IBM367".into(), US_ASCII_NAME),
305            ("CP367".into(), US_ASCII_NAME),
306            ("ASCII".into(), US_ASCII_NAME),
307            ("CP037".into(), IBM037),
308            ("EBCDIC-CP-US".into(), IBM037),
309            ("EBCDIC-CP-CA".into(), IBM037),
310            ("EBCDIC-CP-WT".into(), IBM037),
311            ("EBCDIC-CP-NL".into(), IBM037),
312            ("CP273".into(), IBM273),
313            ("EBCDIC-BE".into(), IBM274),
314            ("CP274".into(), IBM274),
315            ("EBCDIC-BR".into(), IBM275),
316            ("CP275".into(), IBM275),
317            ("EBCDIC-CP-DK".into(), IBM277),
318            ("EBCDIC-CP-NO".into(), IBM277),
319            ("CP278".into(), IBM278),
320            ("EBCDIC-CP-FI".into(), IBM278),
321            ("EBCDIC-CP-SE".into(), IBM278),
322            ("CP280".into(), IBM280),
323            ("EBCDIC-CP-IT".into(), IBM280),
324            ("CP284".into(), IBM284),
325            ("EBCDIC-CP-ES".into(), IBM284),
326            ("CP285".into(), IBM285),
327            ("EBCDIC-CP-GB".into(), IBM285),
328            ("CP290".into(), IBM290),
329            ("EBCDIC-JP-KANA".into(), IBM290),
330            ("CP297".into(), IBM297),
331            ("EBCDIC-CP-FR".into(), IBM297),
332            ("CP420".into(), IBM420),
333            ("EBCDIC-CP-AR1".into(), IBM420),
334            ("CP423".into(), IBM423),
335            ("EBCDIC-CP-GR".into(), IBM423),
336            ("CP424".into(), IBM424),
337            ("EBCDIC-CP-HE".into(), IBM424),
338            ("CP437".into(), IBM437),
339            ("437".into(), IBM437),
340            ("PC8CODEPAGE437".into(), IBM437),
341            ("CP500".into(), IBM500),
342            ("EBCDIC-CP-BE".into(), IBM500),
343            ("EBCDIC-CP-CH".into(), IBM500),
344            ("CP851".into(), IBM851),
345            ("851".into(), IBM851),
346            ("CP852".into(), IBM852),
347            ("852".into(), IBM852),
348            ("PCP852".into(), IBM852),
349            ("CP855".into(), IBM855),
350            ("855".into(), IBM855),
351            ("CP857".into(), IBM857),
352            ("857".into(), IBM857),
353            ("CP860".into(), IBM860),
354            ("860".into(), IBM860),
355            ("CP861".into(), IBM861),
356            ("861".into(), IBM861),
357            ("CP-IS".into(), IBM861),
358            ("CP863".into(), IBM863),
359            ("863".into(), IBM863),
360            ("CP864".into(), IBM864),
361            ("CP865".into(), IBM865),
362            ("865".into(), IBM865),
363            ("CP868".into(), IBM868),
364            ("CP-AR".into(), IBM868),
365            ("CP869".into(), IBM869),
366            ("869".into(), IBM869),
367            ("CP-GR".into(), IBM869),
368            ("CP870".into(), IBM870),
369            ("EBCDIC-CP-ROECE".into(), IBM870),
370            ("EBCDIC-CP-YU".into(), IBM870),
371            ("CP871".into(), IBM871),
372            ("EBCDIC-CP-IS".into(), IBM871),
373            ("CP880".into(), IBM880),
374            ("EBCDIC-CYRILLIC".into(), IBM880),
375            ("CP891".into(), IBM891),
376            ("CP903".into(), IBM903),
377            ("CP904".into(), IBM904),
378            ("904".into(), IBM904),
379            // is this correct ????
380            // But since it really says "IBBM", I'll just list it for now...
381            ("IBBM904".into(), IBM904),
382            ("CP905".into(), IBM905),
383            ("EBCDIC-CP-TR".into(), IBM905),
384            ("CP918".into(), IBM918),
385            ("EBCDIC-CP-AR2".into(), IBM918),
386            ("CP1026".into(), IBM1026),
387        ]))
388    });
389/// Register `alias` as an alias for the encoding name `real`.  \
390/// If `alias` is already an alias for another encoding name, overwrite it and return
391/// the encoding name before the overwrite.
392///
393/// It is assumed that real names and aliases will be linked based on the IANA list,
394/// but this is not required.  \
395/// However, since aliases do not redirect multiple times, `real` must be the name registered
396/// with the encoder/decoder.
397///
398/// If an encoding name becomes both a real name and an alias, searches may not work properly.
399///
400/// Reference: [Charcter sets registered by IANA](https://www.iana.org/assignments/character-sets/character-sets.xhtml)
401pub fn register_encoding_alias(alias: &'static str, real: &'static str) -> Option<&'static str> {
402    let mut table = ENCODING_ALIASES.write().unwrap();
403    if alias.chars().all(|c| c.is_ascii_uppercase()) {
404        table.insert(alias.into(), real)
405    } else {
406        table.insert(alias.to_ascii_uppercase().into(), real)
407    }
408}
409/// Unregister `alias` if it is registerd as an alias for an encoding name.  \
410/// If successfully removed, return the real name.
411pub fn unregister_encoding_alias(alias: &'static str) -> Option<&'static str> {
412    ENCODING_ALIASES
413        .write()
414        .unwrap()
415        .remove(alias.to_ascii_uppercase().as_str())
416}
417/// Retrieve the encoding name from `alias`, which is an alias for a certain encoding name.  \
418/// If retrieval fails, returns [`None`].
419///
420/// Alias comparisons are case-insensitive.
421pub fn resolve_encoding_alias(alias: &str) -> Option<&'static str> {
422    let aliases = ENCODING_ALIASES.read().unwrap();
423    aliases
424        .get(alias)
425        .or_else(|| aliases.get(alias.to_ascii_uppercase().as_str()))
426        .copied()
427}
428
429pub type EncoderFactory = fn() -> Box<dyn Encoder>;
430pub static ENCODER_TABLE: LazyLock<RwLock<BTreeMap<&'static str, EncoderFactory>>> =
431    LazyLock::new(|| {
432        let mut map = BTreeMap::<&'static str, EncoderFactory>::new();
433        map.insert(UTF8_NAME, || Box::new(UTF8Encoder));
434        map.insert(UTF16_NAME, || Box::new(UTF16Encoder::default()));
435        map.insert(UTF16BE_NAME, || Box::new(UTF16BEEncoder));
436        map.insert(UTF16LE_NAME, || Box::new(UTF16LEEncoder));
437        map.insert(ISO_8859_1_NAME, || Box::new(ISO8859_1Encoder));
438        map.insert(ISO_8859_2_NAME, || Box::new(ISO8859_2Encoder));
439        map.insert(ISO_8859_3_NAME, || Box::new(ISO8859_3Encoder));
440        map.insert(ISO_8859_4_NAME, || Box::new(ISO8859_4Encoder));
441        map.insert(ISO_8859_5_NAME, || Box::new(ISO8859_5Encoder));
442        map.insert(ISO_8859_6_NAME, || Box::new(ISO8859_6Encoder));
443        map.insert(ISO_8859_7_NAME, || Box::new(ISO8859_7Encoder));
444        map.insert(ISO_8859_8_NAME, || Box::new(ISO8859_8Encoder));
445        map.insert(ISO_8859_9_NAME, || Box::new(ISO8859_9Encoder));
446        map.insert(ISO_8859_10_NAME, || Box::new(ISO8859_10Encoder));
447        map.insert(ISO_8859_11_NAME, || Box::new(ISO8859_11Encoder));
448        map.insert(ISO_8859_13_NAME, || Box::new(ISO8859_13Encoder));
449        map.insert(ISO_8859_14_NAME, || Box::new(ISO8859_14Encoder));
450        map.insert(ISO_8859_15_NAME, || Box::new(ISO8859_15Encoder));
451        map.insert(ISO_8859_16_NAME, || Box::new(ISO8859_16Encoder));
452        map.insert(UTF32_NAME, || Box::new(UTF32Encoder::default()));
453        map.insert(UTF32BE_NAME, || Box::new(UTF32BEEncoder));
454        map.insert(UTF32LE_NAME, || Box::new(UTF32LEEncoder));
455        map.insert(SHIFT_JIS_NAME, || Box::new(ShiftJISEncoder));
456        map.insert(US_ASCII_NAME, || Box::new(USASCIIEncoder));
457        RwLock::new(map)
458    });
459pub fn find_encoder(encoding_name: &str) -> Option<Box<dyn Encoder>> {
460    let table = ENCODER_TABLE.read().unwrap();
461    if let Some(factory) = table.get(encoding_name) {
462        return Some(factory());
463    }
464    if let Some(factory) = table.get(encoding_name.to_ascii_uppercase().as_str()) {
465        return Some(factory());
466    }
467
468    let alias = resolve_encoding_alias(encoding_name)?;
469    table.get(alias).map(|f| f())
470}
471pub fn register_encoder(
472    encoding_name: &'static str,
473    factory: EncoderFactory,
474) -> Option<EncoderFactory> {
475    ENCODER_TABLE
476        .write()
477        .unwrap()
478        .insert(encoding_name, factory)
479}
480pub fn unregister_encoder(encoding_name: &str) -> Option<EncoderFactory> {
481    ENCODER_TABLE.write().unwrap().remove(encoding_name)
482}
483
484pub type DecoderFactory = fn() -> Box<dyn Decoder>;
485pub static DECODER_TABLE: LazyLock<RwLock<BTreeMap<&'static str, DecoderFactory>>> =
486    LazyLock::new(|| {
487        let mut map = BTreeMap::<&'static str, DecoderFactory>::new();
488        map.insert(UTF8_NAME, || Box::new(UTF8Decoder));
489        map.insert(UTF16_NAME, || Box::new(UTF16Decoder::default()));
490        map.insert(UTF16BE_NAME, || Box::new(UTF16BEDecoder));
491        map.insert(UTF16LE_NAME, || Box::new(UTF16LEDecoder));
492        map.insert(ISO_8859_1_NAME, || Box::new(ISO8859_1Decoder));
493        map.insert(ISO_8859_2_NAME, || Box::new(ISO8859_2Decoder));
494        map.insert(ISO_8859_3_NAME, || Box::new(ISO8859_3Decoder));
495        map.insert(ISO_8859_4_NAME, || Box::new(ISO8859_4Decoder));
496        map.insert(ISO_8859_5_NAME, || Box::new(ISO8859_5Decoder));
497        map.insert(ISO_8859_6_NAME, || Box::new(ISO8859_6Decoder));
498        map.insert(ISO_8859_7_NAME, || Box::new(ISO8859_7Decoder));
499        map.insert(ISO_8859_8_NAME, || Box::new(ISO8859_8Decoder));
500        map.insert(ISO_8859_9_NAME, || Box::new(ISO8859_9Decoder));
501        map.insert(ISO_8859_10_NAME, || Box::new(ISO8859_10Decoder));
502        map.insert(ISO_8859_11_NAME, || Box::new(ISO8859_11Decoder));
503        map.insert(ISO_8859_13_NAME, || Box::new(ISO8859_13Decoder));
504        map.insert(ISO_8859_14_NAME, || Box::new(ISO8859_14Decoder));
505        map.insert(ISO_8859_15_NAME, || Box::new(ISO8859_15Decoder));
506        map.insert(ISO_8859_16_NAME, || Box::new(ISO8859_16Decoder));
507        map.insert(UTF32_NAME, || Box::new(UTF32Decoder::default()));
508        map.insert(UTF32BE_NAME, || Box::new(UTF32BEDecoder));
509        map.insert(UTF32LE_NAME, || Box::new(UTF32LEDecoder));
510        map.insert(SHIFT_JIS_NAME, || Box::new(ShiftJISDecoder));
511        map.insert(US_ASCII_NAME, || Box::new(USASCIIDecoder));
512        RwLock::new(map)
513    });
514pub fn find_decoder(encoding_name: &str) -> Option<Box<dyn Decoder>> {
515    let table = DECODER_TABLE.read().unwrap();
516    if let Some(factory) = table.get(encoding_name) {
517        return Some(factory());
518    }
519    if let Some(factory) = table.get(encoding_name.to_ascii_uppercase().as_str()) {
520        return Some(factory());
521    }
522
523    let alias = resolve_encoding_alias(encoding_name)?;
524    table.get(alias).map(|f| f())
525}
526pub fn register_decoder(
527    encoding_name: &'static str,
528    factory: DecoderFactory,
529) -> Option<DecoderFactory> {
530    DECODER_TABLE
531        .write()
532        .unwrap()
533        .insert(encoding_name, factory)
534}
535pub fn unregister_decoder(encoding_name: &str) -> Option<DecoderFactory> {
536    DECODER_TABLE.write().unwrap().remove(encoding_name)
537}