use std::borrow::Cow;
use std::collections::HashMap;
use std::io::{Read, Seek};
use quick_xml::{escape::resolve_xml_entity, events::BytesRef};
use zip::read::ZipArchive;
const UNICODE_ESCAPE_LENGTH: usize = 7;
macro_rules! from_err {
($from:ty, $to:tt, $var:tt) => {
impl From<$from> for $to {
fn from(e: $from) -> $to {
$to::$var(e.into())
}
}
};
}
pub fn to_u32(s: &[u8]) -> impl ExactSizeIterator<Item = u32> + '_ {
assert_eq!(s.len() % 4, 0);
s.chunks_exact(4)
.map(|data| u32::from_le_bytes([data[0], data[1], data[2], data[3]]))
}
#[inline]
pub fn read_u32(s: &[u8]) -> u32 {
u32::from_le_bytes(s[..4].try_into().unwrap())
}
#[inline]
pub fn read_i32(s: &[u8]) -> i32 {
i32::from_le_bytes(s[..4].try_into().unwrap())
}
#[inline]
pub fn read_u16(s: &[u8]) -> u16 {
u16::from_le_bytes(s[..2].try_into().unwrap())
}
#[inline]
pub fn read_i16(s: &[u8]) -> i16 {
i16::from_le_bytes(s[..2].try_into().unwrap())
}
#[inline]
pub fn read_u64(s: &[u8]) -> u64 {
u64::from_le_bytes(s[..8].try_into().unwrap())
}
#[inline]
pub fn read_usize(s: &[u8]) -> usize {
read_u32(s).try_into().unwrap()
}
#[inline]
pub fn read_f64(s: &[u8]) -> f64 {
f64::from_le_bytes(s[..8].try_into().unwrap())
}
pub fn push_column(mut col: u32, buf: &mut String) {
if col < 26 {
buf.push((b'A' + col as u8) as char);
} else {
let mut rev = String::new();
while col >= 26 {
let c = col % 26;
rev.push((b'A' + c as u8) as char);
col -= c;
col /= 26;
}
buf.extend(rev.chars().rev());
}
}
#[inline]
pub(crate) fn unescape_entity_to_buffer(
entity: &BytesRef,
buffer: &mut String,
) -> Result<(), quick_xml::Error> {
let decoded = entity.decode()?;
if let Some(unescaped_xml_entity) = resolve_xml_entity(&decoded) {
buffer.push_str(unescaped_xml_entity);
return Ok(());
}
if let Some(unescaped_char) = entity.resolve_char_ref()? {
buffer.push(unescaped_char);
return Ok(());
}
Err(quick_xml::Error::Escape(
quick_xml::escape::EscapeError::UnrecognizedEntity(0..0, format!("&{decoded};")),
))
}
pub(crate) fn unescape_xml(original: &str) -> Cow<'_, str> {
if !original.contains("_x00") {
return Cow::Borrowed(original);
}
let bytes = original.as_bytes();
let mut escaped_string = String::with_capacity(original.len());
let mut i = 0;
let mut has_changes = false;
while i < bytes.len() {
if i + UNICODE_ESCAPE_LENGTH <= bytes.len()
&& bytes[i] == b'_'
&& bytes.get(i..i + 4) == Some(b"_x00")
&& bytes[i + 6] == b'_'
{
if let Ok(hex_str) = std::str::from_utf8(&bytes[i + 4..i + 6]) {
if let Ok(hex_value) = u8::from_str_radix(hex_str, 16) {
escaped_string.push(hex_value as char);
i += UNICODE_ESCAPE_LENGTH;
has_changes = true;
continue;
}
}
}
let remaining = &original[i..];
match remaining.chars().next() {
Some(ch) => {
escaped_string.push(ch);
i += ch.len_utf8();
}
None => break,
}
}
if has_changes {
Cow::Owned(escaped_string)
} else {
Cow::Borrowed(original)
}
}
pub fn build_zip_path_cache<RS: Read + Seek>(zip: &ZipArchive<RS>) -> HashMap<String, String> {
let mut cache = HashMap::with_capacity(zip.len());
for zip_path in zip.file_names() {
let normalized = zip_path.replace('\\', "/").to_ascii_lowercase();
cache.insert(normalized, zip_path.to_string());
}
cache
}
pub fn cached_zip_path<'a>(cache: &'a HashMap<String, String>, path: &'a str) -> &'a str {
let key = path.to_ascii_lowercase();
cache.get(&key).map(|s| s.as_str()).unwrap_or(path)
}
pub const FTAB_LEN: usize = 485;
pub const FTAB: [&str; FTAB_LEN] = [
"COUNT",
"IF",
"ISNA",
"ISERROR",
"SUM",
"AVERAGE",
"MIN",
"MAX",
"ROW",
"COLUMN",
"NA",
"NPV",
"STDEV",
"DOLLAR",
"FIXED",
"SIN",
"COS",
"TAN",
"ATAN",
"PI",
"SQRT",
"EXP",
"LN",
"LOG10",
"ABS",
"INT",
"SIGN",
"ROUND",
"LOOKUP",
"INDEX",
"REPT",
"MID",
"LEN",
"VALUE",
"TRUE",
"FALSE",
"AND",
"OR",
"NOT",
"MOD",
"DCOUNT",
"DSUM",
"DAVERAGE",
"DMIN",
"DMAX",
"DSTDEV",
"VAR",
"DVAR",
"TEXT",
"LINEST",
"TREND",
"LOGEST",
"GROWTH",
"GOTO",
"HALT",
"RETURN",
"PV",
"FV",
"NPER",
"PMT",
"RATE",
"MIRR",
"IRR",
"RAND",
"MATCH",
"DATE",
"TIME",
"DAY",
"MONTH",
"YEAR",
"WEEKDAY",
"HOUR",
"MINUTE",
"SECOND",
"NOW",
"AREAS",
"ROWS",
"COLUMNS",
"OFFSET",
"ABSREF",
"RELREF",
"ARGUMENT",
"SEARCH",
"TRANSPOSE",
"ERROR",
"STEP",
"TYPE",
"ECHO",
"SET.NAME",
"CALLER",
"DEREF",
"WINDOWS",
"SERIES",
"DOCUMENTS",
"ACTIVE.CELL",
"SELECTION",
"RESULT",
"ATAN2",
"ASIN",
"ACOS",
"CHOOSE",
"HLOOKUP",
"VLOOKUP",
"LINKS",
"INPUT",
"ISREF",
"GET.FORMULA",
"GET.NAME",
"SET.VALUE",
"LOG",
"EXEC",
"CHAR",
"LOWER",
"UPPER",
"PROPER",
"LEFT",
"RIGHT",
"EXACT",
"TRIM",
"REPLACE",
"SUBSTITUTE",
"CODE",
"NAMES",
"DIRECTORY",
"FIND",
"CELL",
"ISERR",
"ISTEXT",
"ISNUMBER",
"ISBLANK",
"T",
"N",
"FOPEN",
"FCLOSE",
"FSIZE",
"FREADLN",
"FREAD",
"FWRITELN",
"FWRITE",
"FPOS",
"DATEVALUE",
"TIMEVALUE",
"SLN",
"SYD",
"DDB",
"GET.DEF",
"REFTEXT",
"TEXTREF",
"INDIRECT",
"REGISTER",
"CALL",
"ADD.BAR",
"ADD.MENU",
"ADD.COMMAND",
"ENABLE.COMMAND",
"CHECK.COMMAND",
"RENAME.COMMAND",
"SHOW.BAR",
"DELETE.MENU",
"DELETE.COMMAND",
"GET.CHART.ITEM",
"DIALOG.BOX",
"CLEAN",
"MDETERM",
"MINVERSE",
"MMULT",
"FILES",
"IPMT",
"PPMT",
"COUNTA",
"CANCEL.KEY",
"FOR",
"WHILE",
"BREAK",
"NEXT",
"INITIATE",
"REQUEST",
"POKE",
"EXECUTE",
"TERMINATE",
"RESTART",
"HELP",
"GET.BAR",
"PRODUCT",
"FACT",
"GET.CELL",
"GET.WORKSPACE",
"GET.WINDOW",
"GET.DOCUMENT",
"DPRODUCT",
"ISNONTEXT",
"GET.NOTE",
"NOTE",
"STDEVP",
"VARP",
"DSTDEVP",
"DVARP",
"TRUNC",
"ISLOGICAL",
"DCOUNTA",
"DELETE.BAR",
"UNREGISTER",
"",
"",
"USDOLLAR",
"FINDB",
"SEARCHB",
"REPLACEB",
"LEFTB",
"RIGHTB",
"MIDB",
"LENB",
"ROUNDUP",
"ROUNDDOWN",
"ASC",
"DBCS",
"RANK",
"",
"",
"ADDRESS",
"DAYS360",
"TODAY",
"VDB",
"ELSE",
"ELSE.IF",
"END.IF",
"FOR.CELL",
"MEDIAN",
"SUMPRODUCT",
"SINH",
"COSH",
"TANH",
"ASINH",
"ACOSH",
"ATANH",
"DGET",
"CREATE.OBJECT",
"VOLATILE",
"LAST.ERROR",
"CUSTOM.UNDO",
"CUSTOM.REPEAT",
"FORMULA.CONVERT",
"GET.LINK.INFO",
"TEXT.BOX",
"INFO",
"GROUP",
"GET.OBJECT",
"DB",
"PAUSE",
"",
"",
"RESUME",
"FREQUENCY",
"ADD.TOOLBAR",
"DELETE.TOOLBAR",
"User",
"RESET.TOOLBAR",
"EVALUATE",
"GET.TOOLBAR",
"GET.TOOL",
"SPELLING.CHECK",
"ERROR.TYPE",
"APP.TITLE",
"WINDOW.TITLE",
"SAVE.TOOLBAR",
"ENABLE.TOOL",
"PRESS.TOOL",
"REGISTER.ID",
"GET.WORKBOOK",
"AVEDEV",
"BETADIST",
"GAMMALN",
"BETAINV",
"BINOMDIST",
"CHIDIST",
"CHIINV",
"COMBIN",
"CONFIDENCE",
"CRITBINOM",
"EVEN",
"EXPONDIST",
"FDIST",
"FINV",
"FISHER",
"FISHERINV",
"FLOOR",
"GAMMADIST",
"GAMMAINV",
"CEILING",
"HYPGEOMDIST",
"LOGNORMDIST",
"LOGINV",
"NEGBINOMDIST",
"NORMDIST",
"NORMSDIST",
"NORMINV",
"NORMSINV",
"STANDARDIZE",
"ODD",
"PERMUT",
"POISSON",
"TDIST",
"WEIBULL",
"SUMXMY2",
"SUMX2MY2",
"SUMX2PY2",
"CHITEST",
"CORREL",
"COVAR",
"FORECAST",
"FTEST",
"INTERCEPT",
"PEARSON",
"RSQ",
"STEYX",
"SLOPE",
"TTEST",
"PROB",
"DEVSQ",
"GEOMEAN",
"HARMEAN",
"SUMSQ",
"KURT",
"SKEW",
"ZTEST",
"LARGE",
"SMALL",
"QUARTILE",
"PERCENTILE",
"PERCENTRANK",
"MODE",
"TRIMMEAN",
"TINV",
"",
"MOVIE.COMMAND",
"GET.MOVIE",
"CONCATENATE",
"POWER",
"PIVOT.ADD.DATA",
"GET.PIVOT.TABLE",
"GET.PIVOT.FIELD",
"GET.PIVOT.ITEM",
"RADIANS",
"DEGREES",
"SUBTOTAL",
"SUMIF",
"COUNTIF",
"COUNTBLANK",
"SCENARIO.GET",
"OPTIONS.LISTS.GET",
"ISPMT",
"DATEDIF",
"DATESTRING",
"NUMBERSTRING",
"ROMAN",
"OPEN.DIALOG",
"SAVE.DIALOG",
"VIEW.GET",
"GETPIVOTDATA",
"HYPERLINK",
"PHONETIC",
"AVERAGEA",
"MAXA",
"MINA",
"STDEVPA",
"VARPA",
"STDEVA",
"VARA",
"BAHTTEXT",
"THAIDAYOFWEEK",
"THAIDIGIT",
"THAIMONTHOFYEAR",
"THAINUMSOUND",
"THAINUMSTRING",
"THAISTRINGLENGTH",
"ISTHAIDIGIT",
"ROUNDBAHTDOWN",
"ROUNDBAHTUP",
"THAIYEAR",
"RTD",
"CUBEVALUE",
"CUBEMEMBER",
"CUBEMEMBERPROPERTY",
"CUBERANKEDMEMBER",
"HEX2BIN",
"HEX2DEC",
"HEX2OCT",
"DEC2BIN",
"DEC2HEX",
"DEC2OCT",
"OCT2BIN",
"OCT2HEX",
"OCT2DEC",
"BIN2DEC",
"BIN2OCT",
"BIN2HEX",
"IMSUB",
"IMDIV",
"IMPOWER",
"IMABS",
"IMSQRT",
"IMLN",
"IMLOG2",
"IMLOG10",
"IMSIN",
"IMCOS",
"IMEXP",
"IMARGUMENT",
"IMCONJUGATE",
"IMAGINARY",
"IMREAL",
"COMPLEX",
"IMSUM",
"IMPRODUCT",
"SERIESSUM",
"FACTDOUBLE",
"SQRTPI",
"QUOTIENT",
"DELTA",
"GESTEP",
"ISEVEN",
"ISODD",
"MROUND",
"ERF",
"ERFC",
"BESSELJ",
"BESSELK",
"BESSELY",
"BESSELI",
"XIRR",
"XNPV",
"PRICEMAT",
"YIELDMAT",
"INTRATE",
"RECEIVED",
"DISC",
"PRICEDISC",
"YIELDDISC",
"TBILLEQ",
"TBILLPRICE",
"TBILLYIELD",
"PRICE",
"YIELD",
"DOLLARDE",
"DOLLARFR",
"NOMINAL",
"EFFECT",
"CUMPRINC",
"CUMIPMT",
"EDATE",
"EOMONTH",
"YEARFRAC",
"COUPDAYBS",
"COUPDAYS",
"COUPDAYSNC",
"COUPNCD",
"COUPNUM",
"COUPPCD",
"DURATION",
"MDURATION",
"ODDLPRICE",
"ODDLYIELD",
"ODDFPRICE",
"ODDFYIELD",
"RANDBETWEEN",
"WEEKNUM",
"AMORDEGRC",
"AMORLINC",
"CONVERT",
"ACCRINT",
"ACCRINTM",
"WORKDAY",
"NETWORKDAYS",
"GCD",
"MULTINOMIAL",
"LCM",
"FVSCHEDULE",
"CUBEKPIMEMBER",
"CUBESET",
"CUBESETCOUNT",
"IFERROR",
"COUNTIFS",
"SUMIFS",
"AVERAGEIF",
"AVERAGEIFS",
];
pub const FTAB_ARGC: [u8; FTAB_LEN] = [
255, 3, 1, 1, 255, 255, 255, 255, 1, 1, 0, 254, 255, 2, 3, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 2, 3, 1, 1, 0, 0, 255, 255, 1, 2, 3, 3, 3, 3, 3, 3, 255, 3, 2, 4, 4, 4, 4, 1, 1, 1, 5, 5, 5, 5, 6, 3, 2, 0, 3, 3, 3, 1, 1, 1, 2, 1, 1, 1, 0, 1, 1, 1, 5, 2, 2, 3, 3, 1, 2, 0, 1, 1, 2, 0, 1, 2, 2, 2, 0, 0, 1, 2, 1, 1, 255, 4, 4, 2, 7, 1, 1, 2, 2, 2, 4, 1, 1, 1, 1, 2, 2, 2, 1, 4, 4, 1, 3, 1, 3, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 3, 4, 5, 3, 2, 2, 2, 255, 255, 1, 4, 5, 5, 5, 5, 1, 3, 4, 3, 1, 1, 1, 1, 1, 2, 6, 6, 255, 2, 4, 1, 0, 0, 2, 2, 3, 2, 1, 1, 1, 4, 255, 1, 2, 1, 2, 2, 3, 1, 3, 4, 255, 255, 3, 3, 2, 1, 3, 1, 1, 0, 0, 2, 3, 3, 4, 2, 2, 3, 3, 2, 2, 1, 1, 3, 0, 0, 5, 3, 0, 7, 0, 1, 0, 3, 255, 255, 1, 1, 1, 1, 1, 1, 3, 11, 1, 0, 2, 3, 5, 4, 4, 1, 0, 5, 5, 1, 0, 0, 1, 2, 2, 1, 255, 1, 1, 2, 3, 3, 1, 1, 1, 2, 3, 3, 3, 2, 255, 5, 1, 5, 4, 2, 2, 2, 3, 3, 1, 3, 3, 3, 1, 1, 2, 4, 3, 2, 4, 3, 3, 3, 4, 1, 3, 1, 3, 1, 2, 3, 3, 4, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 4, 4, 255, 255, 255, 255, 255, 255, 3, 2, 2, 2, 2, 3, 255, 2, 2, 4, 4, 3, 255, 2, 9, 2, 3, 4, 1, 1, 255, 3, 2, 1, 2, 1, 4, 3, 1, 2, 2, 4, 5, 2, 128, 2, 1, 255, 255, 255, 255, 255, 255, 255, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 255, 255, 3, 3, 4, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 255, 255, 4, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 6, 6, 5, 5, 5, 5, 5, 3, 3, 3, 7, 7, 2, 2, 2, 2, 6, 6, 2, 2, 3, 4, 4, 4, 4, 4, 4, 6, 6, 8, 8, 8, 8, 2, 2, 7, 7, 8, 8, 5, 3, 3, 255, 255, 255, 2, 4, 5, 1, 2, 128, 129, 3, 129, ];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sound_to_u32() {
let data = b"ABCDEFGH";
assert_eq!(
to_u32(data).collect::<Vec<_>>(),
[u32::from_le_bytes(*b"ABCD"), u32::from_le_bytes(*b"EFGH")]
);
}
#[test]
fn unescape_entity() {
let mut buffer = String::new();
let entity = BytesRef::new("amp");
unescape_entity_to_buffer(&entity, &mut buffer).unwrap();
assert_eq!(buffer, "&");
buffer.clear();
let entity = BytesRef::new("lt");
unescape_entity_to_buffer(&entity, &mut buffer).unwrap();
assert_eq!(buffer, "<");
buffer.clear();
let entity = BytesRef::new("#xA");
let result = unescape_entity_to_buffer(&entity, &mut buffer);
assert_eq!(buffer, "\n");
assert!(result.is_ok());
buffer.clear();
let entity = BytesRef::new("#x41");
let result = unescape_entity_to_buffer(&entity, &mut buffer);
assert!(result.is_ok());
assert_eq!(buffer, "A");
buffer.clear();
let entity = BytesRef::new("#65"); let result = unescape_entity_to_buffer(&entity, &mut buffer);
assert!(result.is_ok());
assert_eq!(buffer, "A");
buffer.clear();
let entity = BytesRef::new("");
let result = unescape_entity_to_buffer(&entity, &mut buffer);
assert!(result.is_err());
let entity = BytesRef::new("not_a_real_entity");
let result = unescape_entity_to_buffer(&entity, &mut buffer);
assert!(result.is_err());
if let Err(quick_xml::Error::Escape(quick_xml::escape::EscapeError::UnrecognizedEntity(
_,
msg,
))) = result
{
assert!(msg.contains("not_a_real_entity"));
}
}
#[test]
fn xml_with_escapes() {
let test_cases = vec![
("_", "_"),
("_x", "_x"),
("_x0", "_x0"),
("_x00", "_x00"),
("_x005F_", "_"),
("_x000D_", "\r"),
("_x000", "_x000"),
("_x001F_", "\x1F"),
("_x000D", "_x000D"),
("_x00ZZ_", "_x00ZZ_"),
("_x_x_x", "_x_x_x"),
("_x00½_", "_x00½_"),
("_x000G_", "_x000G_"),
("_x597G_", "_x597G_"),
("😀_x000D_😀", "😀\r😀"),
("_x005F_x0000_", "_x0000_"),
("_x005F_x000a_", "_x000a_"),
("_x005F_x000A_", "_x000A_"),
("_x005F_x005F_", "_x005F_"),
("_x005F_x597D_", "_x597D_"),
("_x005F_x597d_", "_x597d_"),
("__x005F_x0000__", "__x0000__"),
("Hello_x000D_World", "Hello\rWorld"),
("Control_x001F_Char", "Control\x1FChar"),
("Hello_x000D_World_x000D_", "Hello\rWorld\r"),
("Just_a_normal_string", "Just_a_normal_string"),
("Hello_x000D_World_x000D__x000D_", "Hello\rWorld\r\r"),
("Multiple_x000D__x000D__x000D_Chars", "Multiple\r\r\rChars"),
];
for (input, expected) in test_cases {
assert_eq!(unescape_xml(input), expected);
}
}
}