use std::collections::{HashMap, HashSet};
use std::io::Cursor;
use crate::ir::Chart;
use crate::parser::chart::parse_chart_xml;
use crate::parser::xml_util;
pub(super) fn extract_charts_with_anchors(data: &[u8]) -> HashMap<String, Vec<(u32, Chart)>> {
let Ok(mut archive) = crate::parser::open_zip(data) else {
return HashMap::new();
};
let workbook_xml = read_zip_entry_string(&mut archive, "xl/workbook.xml");
let sheet_rids = parse_workbook_sheet_rids(&workbook_xml);
let workbook_rels_xml = read_zip_entry_string(&mut archive, "xl/_rels/workbook.xml.rels");
let rid_to_target = parse_rels_targets(&workbook_rels_xml);
let mut result: HashMap<String, Vec<(u32, Chart)>> = HashMap::new();
for (sheet_name, sheet_rid) in &sheet_rids {
let Some(sheet_target) = rid_to_target.get(sheet_rid) else {
continue;
};
let sheet_full_path = format!("xl/{sheet_target}");
let sheet_filename = sheet_full_path.rsplit('/').next().unwrap_or(sheet_target);
let sheet_rels_path = format!("xl/worksheets/_rels/{sheet_filename}.rels");
let sheet_rels_xml = read_zip_entry_string(&mut archive, &sheet_rels_path);
if sheet_rels_xml.is_empty() {
continue;
}
let drawing_targets = parse_rels_by_type(&sheet_rels_xml, "drawing");
for drawing_target in &drawing_targets {
let drawing_path = resolve_relative_xl_path("xl/worksheets", drawing_target);
let drawing_xml = read_zip_entry_string(&mut archive, &drawing_path);
if drawing_xml.is_empty() {
continue;
}
let anchors = parse_drawing_chart_anchors(&drawing_xml);
let drawing_filename = drawing_path.rsplit('/').next().unwrap_or(&drawing_path);
let drawing_dir = drawing_path
.rsplit_once('/')
.map(|(d, _)| d)
.unwrap_or("xl/drawings");
let drawing_rels_path = format!("{drawing_dir}/_rels/{drawing_filename}.rels");
let drawing_rels_xml = read_zip_entry_string(&mut archive, &drawing_rels_path);
let drawing_rid_targets = parse_rels_targets(&drawing_rels_xml);
for (anchor_row, chart_rid) in &anchors {
let Some(chart_target) = drawing_rid_targets.get(chart_rid) else {
continue;
};
let chart_path = resolve_relative_xl_path(drawing_dir, chart_target);
let chart_xml = read_zip_entry_string(&mut archive, &chart_path);
if let Some(chart) = parse_chart_xml(&chart_xml) {
result
.entry(sheet_name.clone())
.or_default()
.push((*anchor_row, chart));
}
}
}
}
let all_positioned_chart_paths: HashSet<String> = result
.values()
.flatten()
.filter_map(|_| None::<String>) .collect();
let _ = all_positioned_chart_paths;
let chart_paths: Vec<String> = (0..archive.len())
.filter_map(|i| {
let entry = archive.by_index(i).ok()?;
let name = entry.name().to_string();
if name.starts_with("xl/charts/chart") && name.ends_with(".xml") {
Some(name)
} else {
None
}
})
.collect();
let positioned_count: usize = result.values().map(|v| v.len()).sum();
if chart_paths.len() > positioned_count {
let positioned_charts: HashSet<String> = collect_positioned_chart_paths(&result, data);
let first_sheet = sheet_rids
.first()
.map(|(name, _)| name.clone())
.unwrap_or_else(|| "Sheet1".to_string());
for path in &chart_paths {
if positioned_charts.contains(path) {
continue;
}
let chart_xml = read_zip_entry_string(&mut archive, path);
if let Some(chart) = parse_chart_xml(&chart_xml) {
result
.entry(first_sheet.clone())
.or_default()
.push((u32::MAX, chart));
}
}
}
result
}
pub(super) fn collect_positioned_chart_paths(
chart_map: &HashMap<String, Vec<(u32, Chart)>>,
data: &[u8],
) -> HashSet<String> {
let Ok(mut archive) = crate::parser::open_zip(data) else {
return HashSet::new();
};
let mut positioned = HashSet::new();
let workbook_xml = read_zip_entry_string(&mut archive, "xl/workbook.xml");
let sheet_rids = parse_workbook_sheet_rids(&workbook_xml);
let workbook_rels_xml = read_zip_entry_string(&mut archive, "xl/_rels/workbook.xml.rels");
let rid_to_target = parse_rels_targets(&workbook_rels_xml);
for (sheet_name, sheet_rid) in &sheet_rids {
if !chart_map.contains_key(sheet_name) {
continue;
}
let Some(sheet_target) = rid_to_target.get(sheet_rid) else {
continue;
};
let sheet_full_path = format!("xl/{sheet_target}");
let sheet_filename = sheet_full_path.rsplit('/').next().unwrap_or(sheet_target);
let sheet_rels_path = format!("xl/worksheets/_rels/{sheet_filename}.rels");
let sheet_rels_xml = read_zip_entry_string(&mut archive, &sheet_rels_path);
let drawing_targets = parse_rels_by_type(&sheet_rels_xml, "drawing");
for drawing_target in &drawing_targets {
let drawing_path = resolve_relative_xl_path("xl/worksheets", drawing_target);
let drawing_xml = read_zip_entry_string(&mut archive, &drawing_path);
let anchors = parse_drawing_chart_anchors(&drawing_xml);
let drawing_filename = drawing_path.rsplit('/').next().unwrap_or(&drawing_path);
let drawing_dir = drawing_path
.rsplit_once('/')
.map(|(d, _)| d)
.unwrap_or("xl/drawings");
let drawing_rels_path = format!("{drawing_dir}/_rels/{drawing_filename}.rels");
let drawing_rels_xml = read_zip_entry_string(&mut archive, &drawing_rels_path);
let drawing_rid_targets = parse_rels_targets(&drawing_rels_xml);
for (_row, chart_rid) in &anchors {
if let Some(chart_target) = drawing_rid_targets.get(chart_rid) {
positioned.insert(resolve_relative_xl_path(drawing_dir, chart_target));
}
}
}
}
positioned
}
pub(super) fn read_zip_entry_string(
archive: &mut zip::ZipArchive<Cursor<&[u8]>>,
path: &str,
) -> String {
let Ok(mut entry) = archive.by_name(path) else {
return String::new();
};
let mut xml = String::new();
let _ = std::io::Read::read_to_string(&mut entry, &mut xml);
xml
}
pub(super) fn parse_workbook_sheet_rids(xml: &str) -> Vec<(String, String)> {
let mut result = Vec::new();
let mut reader = quick_xml::Reader::from_str(xml);
loop {
match reader.read_event() {
Ok(quick_xml::events::Event::Start(ref e))
| Ok(quick_xml::events::Event::Empty(ref e)) => {
if e.local_name().as_ref() == b"sheet" {
let mut name = None;
let mut rid = None;
for attr in e.attributes().flatten() {
match attr.key.local_name().as_ref() {
b"name" => {
if let Ok(v) = attr.unescape_value() {
name = Some(v.to_string());
}
}
b"id" => {
if let Ok(v) = attr.unescape_value() {
rid = Some(v.to_string());
}
}
_ => {}
}
}
if let (Some(n), Some(r)) = (name, rid) {
result.push((n, r));
}
}
}
Ok(quick_xml::events::Event::Eof) => break,
Err(_) => break,
_ => {}
}
}
result
}
pub(super) fn parse_rels_targets(xml: &str) -> HashMap<String, String> {
xml_util::parse_rels_id_target(xml)
}
pub(super) fn parse_rels_by_type(xml: &str, type_substring: &str) -> Vec<String> {
let mut targets = Vec::new();
let mut reader = quick_xml::Reader::from_str(xml);
loop {
match reader.read_event() {
Ok(quick_xml::events::Event::Start(ref e))
| Ok(quick_xml::events::Event::Empty(ref e)) => {
if e.local_name().as_ref() == b"Relationship" {
let mut target = None;
let mut matches_type = false;
for attr in e.attributes().flatten() {
match attr.key.local_name().as_ref() {
b"Target" => {
if let Ok(v) = attr.unescape_value() {
target = Some(v.to_string());
}
}
b"Type" => {
if let Ok(v) = attr.unescape_value()
&& v.contains(type_substring)
{
matches_type = true;
}
}
_ => {}
}
}
if matches_type && let Some(t) = target {
targets.push(t);
}
}
}
Ok(quick_xml::events::Event::Eof) => break,
Err(_) => break,
_ => {}
}
}
targets
}
pub(super) fn resolve_relative_xl_path(base_dir: &str, relative: &str) -> String {
xml_util::resolve_relative_path(base_dir, relative)
}
pub(super) fn parse_drawing_chart_anchors(xml: &str) -> Vec<(u32, String)> {
let mut result = Vec::new();
let mut reader = quick_xml::Reader::from_str(xml);
let mut in_two_cell_anchor = false;
let mut in_from = false;
let mut in_row = false;
let mut anchor_row: Option<u32> = None;
let mut chart_rid: Option<String> = None;
let mut in_graphic_data = false;
loop {
match reader.read_event() {
Ok(quick_xml::events::Event::Start(ref e)) => {
let local = e.local_name();
match local.as_ref() {
b"twoCellAnchor" | b"oneCellAnchor" => {
in_two_cell_anchor = true;
anchor_row = None;
chart_rid = None;
}
b"from" if in_two_cell_anchor => {
in_from = true;
}
b"row" if in_from => {
in_row = true;
}
b"graphicData" if in_two_cell_anchor => {
for attr in e.attributes().flatten() {
if attr.key.local_name().as_ref() == b"uri"
&& let Ok(val) = attr.unescape_value()
&& val.contains("chart")
{
in_graphic_data = true;
}
}
}
_ => {}
}
}
Ok(quick_xml::events::Event::Empty(ref e)) => {
let local = e.local_name();
if in_graphic_data && local.as_ref() == b"chart" {
for attr in e.attributes().flatten() {
if (attr.key.as_ref() == b"r:id" || attr.key.local_name().as_ref() == b"id")
&& let Ok(val) = attr.unescape_value()
{
chart_rid = Some(val.to_string());
}
}
}
}
Ok(quick_xml::events::Event::Text(ref t)) => {
if in_row
&& let Ok(s) = t.xml_content()
&& let Ok(row) = s.trim().parse::<u32>()
{
anchor_row = Some(row);
}
}
Ok(quick_xml::events::Event::End(ref e)) => {
let local = e.local_name();
match local.as_ref() {
b"twoCellAnchor" | b"oneCellAnchor" => {
if let (Some(row), Some(rid)) = (anchor_row.take(), chart_rid.take()) {
result.push((row, rid));
}
in_two_cell_anchor = false;
in_from = false;
in_graphic_data = false;
}
b"from" => {
in_from = false;
}
b"row" => {
in_row = false;
}
b"graphicData" => {
in_graphic_data = false;
}
_ => {}
}
}
Ok(quick_xml::events::Event::Eof) => break,
Err(_) => break,
_ => {}
}
}
result
}