Skip to main content

rxing/common/
eci_encoder_set.rs

1/*
2 * Copyright 2021 ZXing authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use unicode_segmentation::UnicodeSegmentation;
18
19use super::{CharacterSet, Eci};
20
21// static ENCODERS: Lazy<Vec<CharacterSet>> = Lazy::new(|| {
22//     let mut enc_vec = Vec::new();
23//     for name in NAMES {
24//         if let Some(enc) = CharacterSet::get_character_set_by_name(name) {
25//             enc_vec.push(enc);
26//         }
27//     }
28//     enc_vec
29// });
30
31// const NAMES: [&str; 20] = [
32//     "IBM437",
33//     "ISO-8859-2",
34//     "ISO-8859-3",
35//     "ISO-8859-4",
36//     "ISO-8859-5",
37//     "ISO-8859-6",
38//     "ISO-8859-7",
39//     "ISO-8859-8",
40//     "ISO-8859-9",
41//     "ISO-8859-10",
42//     "ISO-8859-11",
43//     "ISO-8859-13",
44//     "ISO-8859-14",
45//     "ISO-8859-15",
46//     "ISO-8859-16",
47//     "windows-1250",
48//     "windows-1251",
49//     "windows-1252",
50//     "windows-1256",
51//     "Shift_JIS",
52// ];
53
54const ENCODERS: [CharacterSet; 14] = [
55    CharacterSet::Cp437,
56    CharacterSet::ISO8859_2,
57    CharacterSet::ISO8859_3,
58    CharacterSet::ISO8859_4,
59    CharacterSet::ISO8859_5,
60    // CharacterSet::ISO8859_6,
61    CharacterSet::ISO8859_7,
62    // CharacterSet::ISO8859_8,
63    CharacterSet::ISO8859_9,
64    // CharacterSet::ISO8859_10,
65    // CharacterSet::ISO8859_11,
66    // CharacterSet::ISO8859_13,
67    // CharacterSet::ISO8859_14,
68    CharacterSet::ISO8859_15,
69    CharacterSet::ISO8859_16,
70    CharacterSet::Shift_JIS,
71    CharacterSet::Cp1250,
72    CharacterSet::Cp1251,
73    CharacterSet::Cp1252,
74    CharacterSet::Cp1256,
75];
76
77/**
78 * Set of CharsetEncoders for a given input string
79 *
80 * Invariants:
81 * - The list contains only encoders from CharacterSetECI (list is shorter then the list of encoders available on
82 *   the platform for which ECI values are defined).
83 * - The list contains encoders at least one encoder for every character in the input.
84 * - The first encoder in the list is always the ISO-8859-1 encoder even of no character in the input can be encoded
85 *   by it.
86 * - If the input contains a character that is not in ISO-8859-1 then the last two entries in the list will be the
87 *   UTF-8 encoder and the UTF-16BE encoder.
88 *
89 * @author Alex Geller
90 */
91#[derive(Clone)]
92pub struct ECIEncoderSet {
93    encoders: Vec<CharacterSet>,
94    priorityEncoderIndex: Option<usize>,
95}
96
97impl ECIEncoderSet {
98    /**
99     * Constructs an encoder set
100     *
101     * @param stringToEncode the string that needs to be encoded
102     * @param priorityCharset The preferred {@link Charset} or null.
103     * @param fnc1 fnc1 denotes the character in the input that represents the FNC1 character or -1 for a non-GS1 bar
104     * code. When specified, it is considered an error to pass it as argument to the methods canEncode() or encode().
105     */
106    pub fn new(
107        stringToEncodeMain: &str,
108        priorityCharset: Option<CharacterSet>,
109        fnc1: Option<&str>,
110    ) -> Self {
111        // List of encoders that potentially encode characters not in ISO-8859-1 in one byte.
112
113        let mut encoders: Vec<CharacterSet>;
114        let mut priorityEncoderIndexValue = None;
115
116        let mut neededEncoders: Vec<CharacterSet> = Vec::new();
117
118        let stringToEncode = stringToEncodeMain.graphemes(true).collect::<Vec<&str>>();
119
120        //we always need the ISO-8859-1 encoder. It is the default encoding
121        neededEncoders.push(CharacterSet::ISO8859_1);
122        let mut needUnicodeEncoder = if let Some(pc) = priorityCharset {
123            //pc.name().starts_with("UTF") || pc.name().starts_with("utf")
124            pc == CharacterSet::UTF8 || pc == CharacterSet::UTF16BE
125        } else {
126            false
127        };
128
129        //Walk over the input string and see if all characters can be encoded with the list of encoders
130        for i in 0..stringToEncode.len() {
131            // for (int i = 0; i < stringToEncode.length(); i++) {
132            let mut canEncode = false;
133            for encoder in &neededEncoders {
134                //   for (CharsetEncoder encoder : neededEncoders) {
135                let c = stringToEncode.get(i).unwrap();
136                if (fnc1.is_some() && c == fnc1.as_ref().unwrap()) || encoder.encode(c).is_ok() {
137                    canEncode = true;
138                    break;
139                }
140            }
141            if !canEncode {
142                //for the character at position i we don't yet have an encoder in the list
143                for encoder in ENCODERS.iter() {
144                    if encoder.encode(stringToEncode.get(i).unwrap()).is_ok() {
145                        //Good, we found an encoder that can encode the character. We add him to the list and continue scanning
146                        //the input
147                        neededEncoders.push(*encoder);
148                        canEncode = true;
149                        break;
150                    }
151                }
152            }
153
154            if !canEncode {
155                //The character is not encodeable by any of the single byte encoders so we remember that we will need a
156                //Unicode encoder.
157                needUnicodeEncoder = true;
158            }
159        }
160
161        if neededEncoders.len() == 1 && !needUnicodeEncoder {
162            //the entire input can be encoded by the ISO-8859-1 encoder
163            encoders = vec![CharacterSet::ISO8859_1];
164        } else {
165            // we need more than one single byte encoder or we need a Unicode encoder.
166            // In this case we append a UTF-8 and UTF-16 encoder to the list
167            //   encoders = [] new CharsetEncoder[neededEncoders.size() + 2];
168            encoders = Vec::with_capacity(neededEncoders.len() + 2);
169
170            encoders.extend(neededEncoders);
171
172            encoders.push(CharacterSet::UTF8);
173            encoders.push(CharacterSet::UTF16BE);
174        }
175
176        //Compute priorityEncoderIndex by looking up priorityCharset in encoders
177        if let Some(pc) = priorityCharset.as_ref() {
178            priorityEncoderIndexValue = encoders.iter().position(|enc| enc == pc);
179        }
180        //invariants
181        assert_eq!(encoders[0], CharacterSet::ISO8859_1);
182        Self {
183            encoders,
184            priorityEncoderIndex: priorityEncoderIndexValue,
185        }
186    }
187
188    pub fn len(&self) -> usize {
189        self.encoders.len()
190    }
191
192    pub fn is_empty(&self) -> bool {
193        self.encoders.is_empty()
194    }
195
196    pub fn getCharsetName(&self, index: usize) -> Option<&'static str> {
197        if index < self.len() {
198            Some(self.encoders[index].get_charset_name())
199        } else {
200            None
201        }
202    }
203
204    pub fn getCharset(&self, index: usize) -> Option<CharacterSet> {
205        if index < self.len() {
206            Some(self.encoders[index])
207        } else {
208            None
209        }
210    }
211
212    pub fn get_eci(&self, encoderIndex: usize) -> Eci {
213        self.encoders[encoderIndex].into()
214        // CharacterSetECI::getValue(
215        //     &CharacterSetECI::getCharacterSetECI(self.encoders[encoderIndex]).unwrap(),
216        // )
217    }
218
219    /*
220     *  returns -1 if no priority charset was defined
221     */
222    pub const fn getPriorityEncoderIndex(&self) -> Option<usize> {
223        self.priorityEncoderIndex
224    }
225
226    pub fn canEncode(&self, c: &str, encoderIndex: usize) -> Option<bool> {
227        if encoderIndex < self.len() {
228            let encoder = self.encoders[encoderIndex];
229            let enc_data = encoder.encode(c);
230
231            Some(enc_data.is_ok())
232        } else {
233            None
234        }
235    }
236
237    pub fn encode_char(&self, c: &str, encoderIndex: usize) -> Option<Vec<u8>> {
238        if encoderIndex < self.len() {
239            let encoder = self.encoders[encoderIndex];
240            let enc_data = encoder.encode(c);
241            enc_data.ok()
242        // assert!(enc_data.is_ok());
243        // enc_data.unwrap()
244        } else {
245            None
246        }
247    }
248
249    pub fn encode_string(&self, s: &str, encoderIndex: usize) -> Option<Vec<u8>> {
250        if encoderIndex < self.len() {
251            let encoder = self.encoders[encoderIndex];
252            encoder.encode(s).ok()
253        } else {
254            None
255        }
256    }
257}