1use crate::format::Encoding as MarcEncoding;
2use encoding_rs::Encoding;
3
4pub fn convert_to_utf8(data: &[u8], encoding: MarcEncoding) -> Result<String, String> {
6 if encoding == MarcEncoding::Iso5426 {
7 return decode_iso5426(data);
8 }
9
10 let enc = get_encoding(encoding);
11 let (cow, _, had_errors) = enc.decode(data);
12
13 if had_errors {
14 return Err("Encoding conversion had errors".to_string());
15 }
16
17 Ok(cow.to_string())
18}
19
20pub fn convert_from_encoding(text: &str, encoding: MarcEncoding) -> Result<Vec<u8>, String> {
22 if encoding == MarcEncoding::Iso5426 {
23 return encode_iso5426(text);
24 }
25
26 let enc = get_encoding(encoding);
27 let (cow, _, had_errors) = enc.encode(text);
28
29 if had_errors {
30 return Err("Encoding conversion had errors".to_string());
31 }
32
33 Ok(cow.to_vec())
34}
35
36fn get_encoding(encoding: MarcEncoding) -> &'static Encoding {
38 match encoding {
39 MarcEncoding::Utf8 => Encoding::for_label(b"utf-8").unwrap_or(encoding_rs::UTF_8),
40 MarcEncoding::Marc8 => {
41 Encoding::for_label(b"iso-8859-1").unwrap_or(encoding_rs::WINDOWS_1252)
44 }
45 MarcEncoding::Iso8859_1 => Encoding::for_label(b"iso-8859-1").unwrap_or(encoding_rs::WINDOWS_1252),
46 MarcEncoding::Iso8859_2 => Encoding::for_label(b"iso-8859-2").unwrap(),
47 MarcEncoding::Iso8859_5 => Encoding::for_label(b"iso-8859-5").unwrap(),
48 MarcEncoding::Iso8859_7 => Encoding::for_label(b"iso-8859-7").unwrap(),
49 MarcEncoding::Iso8859_15 => Encoding::for_label(b"iso-8859-15").unwrap(),
50 MarcEncoding::Iso5426 => {
51 Encoding::for_label(b"iso-8859-1").unwrap_or(encoding_rs::WINDOWS_1252)
54 }
55 }
56}
57
58fn decode_iso5426(data: &[u8]) -> Result<String, String> {
62 let mut result = String::with_capacity(data.len());
63
64 for &byte in data {
65 match byte {
66 0x20..=0x7E => {
68 result.push(byte as char);
69 }
70 0x00..=0x1F => {
72 if byte == 0x09 || byte == 0x0A || byte == 0x0D {
74 result.push(byte as char);
75 }
76 }
77 0x7F => {
79 }
81 0x80..=0x9F => {
83 if let Some(ch) = map_iso5426_special(byte) {
84 result.push(ch);
85 } else {
86 result.push('\u{FFFD}');
88 }
89 }
90 0xA0..=0xFF => {
92 let iso8859_1_enc = Encoding::for_label(b"iso-8859-1").unwrap();
94 let byte_array = [byte];
95 let (cow, _, _) = iso8859_1_enc.decode(&byte_array);
96 let decoded_str = cow.to_string();
97 result.push_str(&decoded_str);
98 }
99 }
100 }
101
102 Ok(result)
103}
104
105fn encode_iso5426(text: &str) -> Result<Vec<u8>, String> {
107 let mut result = Vec::with_capacity(text.len());
108
109 for ch in text.chars() {
110 let code_point = ch as u32;
111
112 match code_point {
113 0x20..=0x7E => {
115 result.push(code_point as u8);
116 }
117 0x00..=0x1F => {
119 if code_point == 0x09 || code_point == 0x0A || code_point == 0x0D {
120 result.push(code_point as u8);
121 }
122 }
123 _ => {
125 if let Some(byte) = map_unicode_to_iso5426(ch) {
126 result.push(byte);
127 } else {
128 let iso8859_1_enc = Encoding::for_label(b"iso-8859-1").unwrap();
130 let ch_str = ch.to_string();
131 let (cow, _, had_errors) = iso8859_1_enc.encode(&ch_str);
132 let encoded_bytes = cow.to_vec();
133 if had_errors || encoded_bytes.is_empty() {
134 return Err(format!("Cannot encode character '{}' to ISO-5426", ch));
135 }
136 result.extend_from_slice(&encoded_bytes);
137 }
138 }
139 }
140 }
141
142 Ok(result)
143}
144
145fn map_iso5426_special(byte: u8) -> Option<char> {
148 match byte {
149 0x80..=0x9F => {
152 let iso8859_1_enc = Encoding::for_label(b"iso-8859-1").unwrap();
155 let byte_array = [byte];
156 let (cow, _, _) = iso8859_1_enc.decode(&byte_array);
157 let decoded_str = cow.to_string();
158 decoded_str.chars().next()
159 }
160 _ => None,
161 }
162}
163
164fn map_unicode_to_iso5426(ch: char) -> Option<u8> {
166 let iso8859_1_enc = Encoding::for_label(b"iso-8859-1").unwrap();
169 let ch_str = ch.to_string();
170 let (cow, _, had_errors) = iso8859_1_enc.encode(&ch_str);
171 let encoded_bytes = cow.to_vec();
172 if !had_errors && encoded_bytes.len() == 1 {
173 Some(encoded_bytes[0])
174 } else {
175 None
176 }
177}