rxing/common/eci_encoder_set.rs
1/*
2 * Copyright 2021 ZXing authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use unicode_segmentation::UnicodeSegmentation;
18
19use super::{CharacterSet, Eci};
20
21// static ENCODERS: Lazy<Vec<CharacterSet>> = Lazy::new(|| {
22// let mut enc_vec = Vec::new();
23// for name in NAMES {
24// if let Some(enc) = CharacterSet::get_character_set_by_name(name) {
25// enc_vec.push(enc);
26// }
27// }
28// enc_vec
29// });
30
31// const NAMES: [&str; 20] = [
32// "IBM437",
33// "ISO-8859-2",
34// "ISO-8859-3",
35// "ISO-8859-4",
36// "ISO-8859-5",
37// "ISO-8859-6",
38// "ISO-8859-7",
39// "ISO-8859-8",
40// "ISO-8859-9",
41// "ISO-8859-10",
42// "ISO-8859-11",
43// "ISO-8859-13",
44// "ISO-8859-14",
45// "ISO-8859-15",
46// "ISO-8859-16",
47// "windows-1250",
48// "windows-1251",
49// "windows-1252",
50// "windows-1256",
51// "Shift_JIS",
52// ];
53
54const ENCODERS: [CharacterSet; 14] = [
55 CharacterSet::Cp437,
56 CharacterSet::ISO8859_2,
57 CharacterSet::ISO8859_3,
58 CharacterSet::ISO8859_4,
59 CharacterSet::ISO8859_5,
60 // CharacterSet::ISO8859_6,
61 CharacterSet::ISO8859_7,
62 // CharacterSet::ISO8859_8,
63 CharacterSet::ISO8859_9,
64 // CharacterSet::ISO8859_10,
65 // CharacterSet::ISO8859_11,
66 // CharacterSet::ISO8859_13,
67 // CharacterSet::ISO8859_14,
68 CharacterSet::ISO8859_15,
69 CharacterSet::ISO8859_16,
70 CharacterSet::Shift_JIS,
71 CharacterSet::Cp1250,
72 CharacterSet::Cp1251,
73 CharacterSet::Cp1252,
74 CharacterSet::Cp1256,
75];
76
77/**
78 * Set of CharsetEncoders for a given input string
79 *
80 * Invariants:
81 * - The list contains only encoders from CharacterSetECI (list is shorter then the list of encoders available on
82 * the platform for which ECI values are defined).
83 * - The list contains encoders at least one encoder for every character in the input.
84 * - The first encoder in the list is always the ISO-8859-1 encoder even of no character in the input can be encoded
85 * by it.
86 * - If the input contains a character that is not in ISO-8859-1 then the last two entries in the list will be the
87 * UTF-8 encoder and the UTF-16BE encoder.
88 *
89 * @author Alex Geller
90 */
91#[derive(Clone)]
92pub struct ECIEncoderSet {
93 encoders: Vec<CharacterSet>,
94 priorityEncoderIndex: Option<usize>,
95}
96
97impl ECIEncoderSet {
98 /**
99 * Constructs an encoder set
100 *
101 * @param stringToEncode the string that needs to be encoded
102 * @param priorityCharset The preferred {@link Charset} or null.
103 * @param fnc1 fnc1 denotes the character in the input that represents the FNC1 character or -1 for a non-GS1 bar
104 * code. When specified, it is considered an error to pass it as argument to the methods canEncode() or encode().
105 */
106 pub fn new(
107 stringToEncodeMain: &str,
108 priorityCharset: Option<CharacterSet>,
109 fnc1: Option<&str>,
110 ) -> Self {
111 // List of encoders that potentially encode characters not in ISO-8859-1 in one byte.
112
113 let mut encoders: Vec<CharacterSet>;
114 let mut priorityEncoderIndexValue = None;
115
116 let mut neededEncoders: Vec<CharacterSet> = Vec::new();
117
118 let stringToEncode = stringToEncodeMain.graphemes(true).collect::<Vec<&str>>();
119
120 //we always need the ISO-8859-1 encoder. It is the default encoding
121 neededEncoders.push(CharacterSet::ISO8859_1);
122 let mut needUnicodeEncoder = if let Some(pc) = priorityCharset {
123 //pc.name().starts_with("UTF") || pc.name().starts_with("utf")
124 pc == CharacterSet::UTF8 || pc == CharacterSet::UTF16BE
125 } else {
126 false
127 };
128
129 //Walk over the input string and see if all characters can be encoded with the list of encoders
130 for i in 0..stringToEncode.len() {
131 // for (int i = 0; i < stringToEncode.length(); i++) {
132 let mut canEncode = false;
133 for encoder in &neededEncoders {
134 // for (CharsetEncoder encoder : neededEncoders) {
135 let c = stringToEncode.get(i).unwrap();
136 if (fnc1.is_some() && c == fnc1.as_ref().unwrap()) || encoder.encode(c).is_ok() {
137 canEncode = true;
138 break;
139 }
140 }
141 if !canEncode {
142 //for the character at position i we don't yet have an encoder in the list
143 for encoder in ENCODERS.iter() {
144 if encoder.encode(stringToEncode.get(i).unwrap()).is_ok() {
145 //Good, we found an encoder that can encode the character. We add him to the list and continue scanning
146 //the input
147 neededEncoders.push(*encoder);
148 canEncode = true;
149 break;
150 }
151 }
152 }
153
154 if !canEncode {
155 //The character is not encodeable by any of the single byte encoders so we remember that we will need a
156 //Unicode encoder.
157 needUnicodeEncoder = true;
158 }
159 }
160
161 if neededEncoders.len() == 1 && !needUnicodeEncoder {
162 //the entire input can be encoded by the ISO-8859-1 encoder
163 encoders = vec![CharacterSet::ISO8859_1];
164 } else {
165 // we need more than one single byte encoder or we need a Unicode encoder.
166 // In this case we append a UTF-8 and UTF-16 encoder to the list
167 // encoders = [] new CharsetEncoder[neededEncoders.size() + 2];
168 encoders = Vec::with_capacity(neededEncoders.len() + 2);
169
170 encoders.extend(neededEncoders);
171
172 encoders.push(CharacterSet::UTF8);
173 encoders.push(CharacterSet::UTF16BE);
174 }
175
176 //Compute priorityEncoderIndex by looking up priorityCharset in encoders
177 if let Some(pc) = priorityCharset.as_ref() {
178 priorityEncoderIndexValue = encoders.iter().position(|enc| enc == pc);
179 }
180 //invariants
181 assert_eq!(encoders[0], CharacterSet::ISO8859_1);
182 Self {
183 encoders,
184 priorityEncoderIndex: priorityEncoderIndexValue,
185 }
186 }
187
188 pub fn len(&self) -> usize {
189 self.encoders.len()
190 }
191
192 pub fn is_empty(&self) -> bool {
193 self.encoders.is_empty()
194 }
195
196 pub fn getCharsetName(&self, index: usize) -> Option<&'static str> {
197 if index < self.len() {
198 Some(self.encoders[index].get_charset_name())
199 } else {
200 None
201 }
202 }
203
204 pub fn getCharset(&self, index: usize) -> Option<CharacterSet> {
205 if index < self.len() {
206 Some(self.encoders[index])
207 } else {
208 None
209 }
210 }
211
212 pub fn get_eci(&self, encoderIndex: usize) -> Eci {
213 self.encoders[encoderIndex].into()
214 // CharacterSetECI::getValue(
215 // &CharacterSetECI::getCharacterSetECI(self.encoders[encoderIndex]).unwrap(),
216 // )
217 }
218
219 /*
220 * returns -1 if no priority charset was defined
221 */
222 pub const fn getPriorityEncoderIndex(&self) -> Option<usize> {
223 self.priorityEncoderIndex
224 }
225
226 pub fn canEncode(&self, c: &str, encoderIndex: usize) -> Option<bool> {
227 if encoderIndex < self.len() {
228 let encoder = self.encoders[encoderIndex];
229 let enc_data = encoder.encode(c);
230
231 Some(enc_data.is_ok())
232 } else {
233 None
234 }
235 }
236
237 pub fn encode_char(&self, c: &str, encoderIndex: usize) -> Option<Vec<u8>> {
238 if encoderIndex < self.len() {
239 let encoder = self.encoders[encoderIndex];
240 let enc_data = encoder.encode(c);
241 enc_data.ok()
242 // assert!(enc_data.is_ok());
243 // enc_data.unwrap()
244 } else {
245 None
246 }
247 }
248
249 pub fn encode_string(&self, s: &str, encoderIndex: usize) -> Option<Vec<u8>> {
250 if encoderIndex < self.len() {
251 let encoder = self.encoders[encoderIndex];
252 encoder.encode(s).ok()
253 } else {
254 None
255 }
256 }
257}