use std::collections::HashMap;
use std::fs::File;
use std::io::Read;
use quick_xml::events::{BytesStart, Event};
use quick_xml::Reader as XmlReader;
use zip::ZipArchive;
use crate::error::{Error, Result};
pub fn attr_value(e: &BytesStart<'_>, key: &[u8]) -> Option<String> {
for a in e.attributes().with_checks(false).flatten() {
if a.key.as_ref() == key {
if let Ok(v) = a.unescape_value() {
return Some(v.to_string());
}
return Some(String::from_utf8_lossy(a.value.as_ref()).into_owned());
}
}
None
}
pub fn zip_read_to_string_opt(zip: &mut ZipArchive<File>, name: &str) -> Result<Option<String>> {
match zip.by_name(name) {
Ok(mut f) => {
let mut out = String::new();
f.read_to_string(&mut out)
.map_err(|e| Error::Xlsx(format!("failed to read {name}: {e}")))?;
Ok(Some(out))
}
Err(zip::result::ZipError::FileNotFound) => Ok(None),
Err(e) => Err(Error::Xlsx(format!("zip error reading {name}: {e}"))),
}
}
pub fn zip_read_to_string(zip: &mut ZipArchive<File>, name: &str) -> Result<String> {
zip_read_to_string_opt(zip, name)?
.ok_or_else(|| Error::Xlsx(format!("missing zip entry: {name}")))
}
pub fn parse_workbook_sheet_rids(xml: &str) -> Result<Vec<(String, String)>> {
let mut reader = XmlReader::from_str(xml);
reader.config_mut().trim_text(true);
let mut buf: Vec<u8> = Vec::new();
let mut out: Vec<(String, String)> = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
if e.local_name().as_ref() == b"sheet" {
let name = attr_value(&e, b"name");
let rid = attr_value(&e, b"r:id");
if let (Some(n), Some(r)) = (name, rid) {
out.push((n, r));
}
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(Error::Xlsx(format!("failed to parse workbook.xml: {e}"))),
_ => {}
}
buf.clear();
}
Ok(out)
}
pub fn parse_relationship_targets(xml: &str) -> Result<HashMap<String, String>> {
let mut reader = XmlReader::from_str(xml);
reader.config_mut().trim_text(true);
let mut buf: Vec<u8> = Vec::new();
let mut out: HashMap<String, String> = HashMap::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
if e.local_name().as_ref() == b"Relationship" {
let id = attr_value(&e, b"Id");
let target = attr_value(&e, b"Target");
if let (Some(i), Some(t)) = (id, target) {
out.insert(i, t);
}
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(Error::Xlsx(format!("failed to parse rels: {e}"))),
_ => {}
}
buf.clear();
}
Ok(out)
}
pub fn normalize_zip_path(path: &str) -> String {
let mut stack: Vec<&str> = Vec::new();
for part in path.split('/') {
if part.is_empty() || part == "." {
continue;
}
if part == ".." {
stack.pop();
continue;
}
stack.push(part);
}
stack.join("/")
}
pub fn join_and_normalize(base_dir: &str, target: &str) -> String {
let t = target.trim_start_matches('/');
let combined = if t.starts_with("xl/") {
t.to_string()
} else {
format!("{base_dir}{t}")
};
normalize_zip_path(&combined)
}
pub fn a1_to_row_col(a1: &str) -> std::result::Result<(u32, u32), String> {
let mut col: u32 = 0;
let mut row_digits = String::new();
for ch in a1.chars() {
if ch.is_ascii_alphabetic() {
let uc = ch.to_ascii_uppercase() as u8;
let val = (uc - b'A' + 1) as u32;
col = col * 26 + val;
} else if ch.is_ascii_digit() {
row_digits.push(ch);
} else {
return Err(format!("Invalid cell reference: {a1}"));
}
}
if col == 0 || row_digits.is_empty() {
return Err(format!("Invalid cell reference: {a1}"));
}
let row_1: u32 = row_digits
.parse()
.map_err(|_| format!("Invalid cell reference: {a1}"))?;
if row_1 == 0 {
return Err(format!("Invalid cell reference: {a1}"));
}
Ok((row_1 - 1, col - 1))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn a1_basics() {
assert_eq!(a1_to_row_col("A1").unwrap(), (0, 0));
assert_eq!(a1_to_row_col("B7").unwrap(), (6, 1));
assert_eq!(a1_to_row_col("AA10").unwrap(), (9, 26));
assert_eq!(a1_to_row_col("ZZ100").unwrap(), (99, 701));
}
#[test]
fn a1_case_insensitive() {
assert_eq!(a1_to_row_col("ab3").unwrap(), a1_to_row_col("AB3").unwrap());
}
#[test]
fn a1_rejects_garbage() {
assert!(a1_to_row_col("A0").is_err());
assert!(a1_to_row_col("$A$1").is_err());
assert!(a1_to_row_col("").is_err());
}
#[test]
fn normalize_collapses_dots() {
assert_eq!(
normalize_zip_path("xl/./worksheets/../sheet1.xml"),
"xl/sheet1.xml"
);
assert_eq!(
normalize_zip_path("/xl/worksheets/sheet1.xml"),
"xl/worksheets/sheet1.xml"
);
}
#[test]
fn join_handles_rooted_target() {
assert_eq!(
join_and_normalize("xl/", "worksheets/sheet1.xml"),
"xl/worksheets/sheet1.xml"
);
assert_eq!(
join_and_normalize("xl/", "/xl/worksheets/sheet1.xml"),
"xl/worksheets/sheet1.xml"
);
assert_eq!(
join_and_normalize("xl/", "xl/worksheets/sheet1.xml"),
"xl/worksheets/sheet1.xml"
);
}
#[test]
fn parse_workbook_sheets() {
let xml = r#"<?xml version="1.0"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheets>
<sheet name="Revenue" sheetId="1" r:id="rId1"/>
<sheet name="Balance Sheet" sheetId="2" r:id="rId2"/>
</sheets>
</workbook>"#;
let out = parse_workbook_sheet_rids(xml).unwrap();
assert_eq!(out.len(), 2);
assert_eq!(out[0], ("Revenue".to_string(), "rId1".to_string()));
assert_eq!(out[1], ("Balance Sheet".to_string(), "rId2".to_string()));
}
#[test]
fn parse_rels_happy() {
let xml = r#"<?xml version="1.0"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
<Relationship Id="rId2" Target="worksheets/sheet2.xml" Type="..."/>
</Relationships>"#;
let out = parse_relationship_targets(xml).unwrap();
assert_eq!(out.get("rId1"), Some(&"worksheets/sheet1.xml".to_string()));
assert_eq!(out.get("rId2"), Some(&"worksheets/sheet2.xml".to_string()));
}
}