1use core::str;
4
5use unicode_normalization::char::compose;
6
7use crate::{DecodeError, TextDecoder};
8
9pub struct Marc8Decoder {}
17
18impl TextDecoder for Marc8Decoder {
19 fn decode<'a>(&self, text: &'a [u8]) -> Result<std::borrow::Cow<'a, str>, DecodeError> {
23 if text.iter().all(|ch| *ch <= 127) {
26 return Ok(std::borrow::Cow::Borrowed(
27 str::from_utf8(text).map_err(|e| DecodeError::Utf(e))?,
28 ));
29 }
30
31 let mut out = String::with_capacity(text.len());
32 let mut it = text.into_iter();
33
34 let mut combining_buffer: Vec<char> = Vec::new();
35 while let Some(ch) = it.next() {
36 if *ch <= 127 {
38 if combining_buffer.is_empty() {
39 out.push(char::from_u32(*ch as u32).unwrap());
40 continue;
41 }
42
43 let mut base =
44 char::from_u32(*ch as u32).expect("we already checked it was below 128");
45 let mut combining_it = combining_buffer.iter();
48 while let Some(combining) = combining_it.next() {
49 if let Some(new) = compose(base, *combining) {
50 base = new;
51 } else {
52 return Err(DecodeError::InvalidPair(base, *combining));
53 }
54 }
55
56 out.push(base);
57 combining_buffer.clear();
58 continue;
59 }
60
61 if *ch >= 0xA1 && *ch <= 0xC8 {
62 match ch {
63 0xA1 => out.push('\u{0141}'),
64 0xA2 => out.push('\u{00D8}'),
65 0xA3 => out.push('\u{0110}'),
66 0xA4 => out.push('\u{00DE}'),
67 0xA5 => out.push('\u{00C6}'),
68 0xA6 => out.push('\u{0152}'),
69 0xA7 => out.push('\u{02B9}'),
70 0xA8 => out.push('\u{00B7}'),
71 0xA9 => out.push('\u{266D}'),
72 0xAA => out.push('\u{00AE}'),
73 0xAB => out.push('\u{00B1}'),
74 0xAC => out.push('\u{01A0}'),
75 0xAD => out.push('\u{01AF}'),
76 0xAE => out.push('\u{02BC}'),
77 0xB0 => out.push('\u{02BB}'),
78 0xB1 => out.push('\u{0142}'),
79 0xB2 => out.push('\u{00F8}'),
80 0xB3 => out.push('\u{0111}'),
81 0xB4 => out.push('\u{00FE}'),
82 0xB5 => out.push('\u{00E6}'),
83 0xB6 => out.push('\u{0153}'),
84 0xB7 => out.push('\u{02BA}'),
85 0xB8 => out.push('\u{0131}'),
86 0xB9 => out.push('\u{00A3}'),
87 0xBA => out.push('\u{00F0}'),
88 0xBC => out.push('\u{01A1}'),
89 0xBD => out.push('\u{01B0}'),
90 0xC0 => out.push('\u{00B0}'),
91 0xC1 => out.push('\u{2113}'),
92 0xC2 => out.push('\u{2117}'),
93 0xC3 => out.push('\u{00A9}'),
94 0xC4 => out.push('\u{266F}'),
95 0xC5 => out.push('\u{00BF}'),
96 0xC6 => out.push('\u{00A1}'),
97 0xC7 => out.push('\u{00DF}'), 0xC8 => out.push('\u{20AC}'), _ => return Err(DecodeError::Unknown(*ch)),
100 };
101 continue;
102 }
103
104 if *ch >= 0xE0 && *ch <= 0xFE {
106 let combining = match ch {
115 0xE0 => '\u{0309}',
116 0xE1 => '\u{0300}',
117 0xE2 => '\u{0301}',
118 0xE3 => '\u{0302}',
119 0xE4 => '\u{0303}',
120 0xE5 => '\u{0304}',
121 0xE6 => '\u{0306}',
122 0xE7 => '\u{0307}',
123 0xE8 => '\u{0308}',
124 0xE9 => '\u{030C}',
125 0xEA => '\u{030A}',
126 0xEB => '\u{FE20}',
127 0xEC => '\u{FE21}',
128 0xED => '\u{0315}',
129 0xEE => '\u{030B}',
130 0xEF => '\u{0310}',
131 0xF0 => '\u{0327}',
132 0xF1 => '\u{0328}',
133 0xF2 => '\u{0323}',
134 0xF3 => '\u{0324}',
135 0xF4 => '\u{0325}',
136 0xF5 => '\u{0333}',
137 0xF6 => '\u{0332}',
138 0xF7 => '\u{0326}',
139 0xF8 => '\u{031C}',
140 0xF9 => '\u{032E}',
141 0xFA => '\u{FE22}',
142 0xFB => '\u{FE23}',
143 0xFE => '\u{0313}',
144 _ => return Err(DecodeError::Unknown(*ch)),
145 };
146
147 combining_buffer.push(combining);
148
149 continue;
150 }
151
152 return Err(DecodeError::Unknown(*ch));
153 }
154
155 if !combining_buffer.is_empty() {
157 return Err(DecodeError::InvalidSequence);
158 }
159
160 return Ok(std::borrow::Cow::Owned(out));
161 }
162}