use unicode_segmentation::UnicodeSegmentation;
use super::{CharacterSet, Eci};
use once_cell::sync::Lazy;
static ENCODERS: Lazy<Vec<CharacterSet>> = Lazy::new(|| {
vec![
CharacterSet::Cp437,
CharacterSet::ISO8859_2,
CharacterSet::ISO8859_3,
CharacterSet::ISO8859_4,
CharacterSet::ISO8859_5,
CharacterSet::ISO8859_7,
CharacterSet::ISO8859_9,
CharacterSet::ISO8859_15,
CharacterSet::ISO8859_16,
CharacterSet::Shift_JIS,
CharacterSet::Cp1250,
CharacterSet::Cp1251,
CharacterSet::Cp1252,
CharacterSet::Cp1256,
]
});
#[derive(Clone)]
pub struct ECIEncoderSet {
encoders: Vec<CharacterSet>,
priorityEncoderIndex: Option<usize>,
}
impl ECIEncoderSet {
pub fn new(
stringToEncodeMain: &str,
priorityCharset: Option<CharacterSet>,
fnc1: Option<&str>,
) -> Self {
let mut encoders: Vec<CharacterSet>;
let mut priorityEncoderIndexValue = None;
let mut neededEncoders: Vec<CharacterSet> = Vec::new();
let stringToEncode = stringToEncodeMain.graphemes(true).collect::<Vec<&str>>();
neededEncoders.push(CharacterSet::ISO8859_1);
let mut needUnicodeEncoder = if let Some(pc) = priorityCharset {
pc == CharacterSet::UTF8 || pc == CharacterSet::UTF16BE
} else {
false
};
for i in 0..stringToEncode.len() {
let mut canEncode = false;
for encoder in &neededEncoders {
let c = stringToEncode.get(i).unwrap();
if (fnc1.is_some() && c == fnc1.as_ref().unwrap()) || encoder.encode(c).is_ok() {
canEncode = true;
break;
}
}
if !canEncode {
for i_encoder in 0..ENCODERS.len() {
let encoder = ENCODERS.get(i_encoder).unwrap();
if encoder.encode(stringToEncode.get(i).unwrap()).is_ok() {
neededEncoders.push(*encoder);
canEncode = true;
break;
}
}
}
if !canEncode {
needUnicodeEncoder = true;
}
}
if neededEncoders.len() == 1 && !needUnicodeEncoder {
encoders = vec![CharacterSet::ISO8859_1];
} else {
encoders = Vec::new();
for encoder in neededEncoders {
encoders.push(encoder);
}
encoders.push(CharacterSet::UTF8);
encoders.push(CharacterSet::UTF16BE);
}
if priorityCharset.is_some() {
for (i, encoder) in encoders.iter().enumerate() {
if priorityCharset.as_ref().unwrap() == encoder {
priorityEncoderIndexValue = Some(i);
break;
}
}
}
assert_eq!(encoders[0], CharacterSet::ISO8859_1);
Self {
encoders,
priorityEncoderIndex: priorityEncoderIndexValue,
}
}
pub fn len(&self) -> usize {
self.encoders.len()
}
pub fn is_empty(&self) -> bool {
self.encoders.is_empty()
}
pub fn getCharsetName(&self, index: usize) -> Option<&'static str> {
if index < self.len() {
Some(self.encoders[index].get_charset_name())
} else {
None
}
}
pub fn getCharset(&self, index: usize) -> Option<CharacterSet> {
if index < self.len() {
Some(self.encoders[index])
} else {
None
}
}
pub fn get_eci(&self, encoderIndex: usize) -> Eci {
self.encoders[encoderIndex].into()
}
pub fn getPriorityEncoderIndex(&self) -> Option<usize> {
self.priorityEncoderIndex
}
pub fn canEncode(&self, c: &str, encoderIndex: usize) -> Option<bool> {
if encoderIndex < self.len() {
let encoder = self.encoders[encoderIndex];
let enc_data = encoder.encode(c);
Some(enc_data.is_ok())
} else {
None
}
}
pub fn encode_char(&self, c: &str, encoderIndex: usize) -> Option<Vec<u8>> {
if encoderIndex < self.len() {
let encoder = self.encoders[encoderIndex];
let enc_data = encoder.encode(c);
enc_data.ok()
} else {
None
}
}
pub fn encode_string(&self, s: &str, encoderIndex: usize) -> Option<Vec<u8>> {
if encoderIndex < self.len() {
let encoder = self.encoders[encoderIndex];
encoder.encode(s).ok()
} else {
None
}
}
}