1mod xlsx_eval;
17
18use std::collections::{BTreeMap, BTreeSet};
19use std::path::Path;
20
21use lo_core::{
22 parse_xml_document, serialize_xml_document, CellAddr, LoError, Result, Workbook,
23 XmlItem, XmlNode,
24};
25use lo_zip::{normalize_zip_path, rels_path_for, resolve_part_target, ZipArchive};
26use xlsx_eval::{translate_shared_formula, EvalValue, WorkbookEvaluator};
27
28pub fn docx_to_pdf_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
31 let doc = lo_writer::from_docx_bytes("document", bytes)?;
32 lo_writer::save_as(&doc, "pdf")
33}
34
35pub fn doc_to_docx_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
38 let doc = lo_writer::from_doc_bytes("document", bytes)?;
39 lo_writer::save_as(&doc, "docx")
40}
41
42pub fn pptx_to_pdf_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
45 let deck = lo_impress::from_pptx_bytes("presentation", bytes)?;
46 lo_impress::save_as(&deck, "pdf")
47}
48
49#[derive(Clone, Copy, Debug, PartialEq, Eq)]
54enum Family {
55 Writer,
56 Calc,
57 Impress,
58 Draw,
59 Math,
60 Base,
61}
62
63fn canonical_format_hint(format: &str) -> String {
64 let trimmed = format.trim();
65 let trimmed = trimmed.strip_prefix('.').unwrap_or(trimmed);
66 let head = trimmed.split(':').next().unwrap_or(trimmed).trim();
67 match head.to_ascii_lowercase().as_str() {
68 "text" => "txt".to_string(),
69 "markdown" => "md".to_string(),
70 "htm" => "html".to_string(),
71 "mml" => "mathml".to_string(),
72 "odfmath" | "odf-formula" => "odf".to_string(),
73 other => other.to_string(),
74 }
75}
76
77pub fn sniff_format_from_path(path: &str) -> Option<String> {
79 let ext = Path::new(path).extension()?.to_str()?;
80 Some(canonical_format_hint(ext))
81}
82
83pub fn sniff_format_from_bytes(bytes: &[u8]) -> Option<String> {
92 if bytes.len() >= 4 && &bytes[..4] == b"PK\x03\x04" {
93 let zip = ZipArchive::new(bytes).ok()?;
94 if zip.contains("[Content_Types].xml") {
95 let content_types = zip.read_string("[Content_Types].xml").ok()?;
96 let lower = content_types.to_ascii_lowercase();
97 if lower.contains("wordprocessingml") {
98 return Some("docx".to_string());
99 }
100 if lower.contains("spreadsheetml") {
101 return Some("xlsx".to_string());
102 }
103 if lower.contains("presentationml") {
104 return Some("pptx".to_string());
105 }
106 }
107 if zip.contains("mimetype") {
108 let mimetype = zip.read_string("mimetype").ok()?.to_ascii_lowercase();
109 if mimetype.contains("opendocument.text") {
110 return Some("odt".to_string());
111 }
112 if mimetype.contains("opendocument.spreadsheet") {
113 return Some("ods".to_string());
114 }
115 if mimetype.contains("opendocument.presentation") {
116 return Some("odp".to_string());
117 }
118 }
119 }
120 if bytes.len() >= 8 && bytes[..8] == [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1] {
121 if find_bytes(bytes, b"WordDocument") {
122 return Some("doc".to_string());
123 }
124 if find_bytes(bytes, b"Workbook") {
125 return Some("xls".to_string());
126 }
127 if find_bytes(bytes, b"PowerPoint Document") {
128 return Some("ppt".to_string());
129 }
130 }
131 let header_len = bytes.len().min(1024);
132 if bytes[..header_len].windows(5).any(|window| window == b"%PDF-") {
133 return Some("pdf".to_string());
134 }
135 let text = std::str::from_utf8(bytes).ok()?.trim_start_matches('\u{feff}');
136 if text.starts_with("<svg") || text.contains("<svg") {
137 return Some("svg".to_string());
138 }
139 if text.starts_with("<!doctype html") || text.starts_with("<html") || text.contains("<body") {
140 return Some("html".to_string());
141 }
142 if text.starts_with('#') || text.contains("\n# ") || text.contains("\n- ") {
143 return Some("md".to_string());
144 }
145 if text.contains(',') && text.lines().count() > 1 {
146 return Some("csv".to_string());
147 }
148 if !text.is_empty() {
149 return Some("txt".to_string());
150 }
151 None
152}
153
154fn find_bytes(haystack: &[u8], needle: &[u8]) -> bool {
155 haystack.windows(needle.len()).any(|window| window == needle)
156}
157
158fn family_for_source(source: &str) -> Option<Family> {
159 match canonical_format_hint(source).as_str() {
160 "txt" | "md" | "html" | "docx" | "doc" | "odt" | "pdf" => Some(Family::Writer),
161 "csv" | "xlsx" | "ods" | "xls" => Some(Family::Calc),
162 "pptx" | "odp" | "ppt" => Some(Family::Impress),
163 "svg" | "odg" => Some(Family::Draw),
164 "latex" | "mathml" | "odf" => Some(Family::Math),
165 "odb" => Some(Family::Base),
166 _ => None,
167 }
168}
169
170pub fn writer_convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
172 let from = canonical_format_hint(from);
173 let to = canonical_format_hint(to);
174 let doc = lo_writer::load_bytes("document", input, &from)?;
175 lo_writer::save_as(&doc, &to)
176}
177
178pub fn calc_convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
180 let from = canonical_format_hint(from);
181 let to = canonical_format_hint(to);
182 let workbook = lo_calc::load_bytes("workbook", input, &from)?;
183 lo_calc::save_as(&workbook, &to)
184}
185
186pub fn impress_convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
188 let from = canonical_format_hint(from);
189 let to = canonical_format_hint(to);
190 let deck = lo_impress::load_bytes("presentation", input, &from)?;
191 lo_impress::save_as(&deck, &to)
192}
193
194pub fn draw_convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
196 let from = canonical_format_hint(from);
197 let to = canonical_format_hint(to);
198 let drawing = lo_draw::load_bytes("drawing", input, &from)?;
199 lo_draw::save_as(&drawing, &to)
200}
201
202pub fn math_convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
208 let from = canonical_format_hint(from);
209 let to = canonical_format_hint(to);
210 let document = lo_math::load_bytes("formula", input, &from)?;
211 if to == "odf" {
212 return lo_odf::save_formula_document_bytes(&document);
213 }
214 lo_math::save_as(&document, &to)
215}
216
217pub fn base_convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
219 let from = canonical_format_hint(from);
220 let to = canonical_format_hint(to);
221 let database = lo_base::load_bytes("database", input, &from, None)?;
222 lo_base::save_as(&database, &to)
223}
224
225pub fn convert_bytes(input: &[u8], from: &str, to: &str) -> Result<Vec<u8>> {
227 let from = canonical_format_hint(from);
228 let to = canonical_format_hint(to);
229 match family_for_source(&from) {
230 Some(Family::Writer) => writer_convert_bytes(input, &from, &to),
231 Some(Family::Calc) => calc_convert_bytes(input, &from, &to),
232 Some(Family::Impress) => impress_convert_bytes(input, &from, &to),
233 Some(Family::Draw) => draw_convert_bytes(input, &from, &to),
234 Some(Family::Math) => math_convert_bytes(input, &from, &to),
235 Some(Family::Base) => base_convert_bytes(input, &from, &to),
236 None => Err(LoError::Unsupported(format!(
237 "generic conversion source format not supported: {from}"
238 ))),
239 }
240}
241
242pub fn convert_path_bytes(path: &str, input: &[u8], to: &str) -> Result<Vec<u8>> {
244 let from = sniff_format_from_path(path).ok_or_else(|| {
245 LoError::InvalidInput(format!("could not infer input format from path: {path}"))
246 })?;
247 convert_bytes(input, &from, to)
248}
249
250pub fn convert_bytes_auto(input: &[u8], to: &str) -> Result<Vec<u8>> {
253 let from = sniff_format_from_bytes(input).ok_or_else(|| {
254 LoError::InvalidInput("could not infer input format from byte stream".to_string())
255 })?;
256 convert_bytes(input, &from, to)
257}
258
259pub fn docx_to_html_bytes(input: &[u8]) -> Result<Vec<u8>> {
262 writer_convert_bytes(input, "docx", "html")
263}
264pub fn docx_to_txt_bytes(input: &[u8]) -> Result<Vec<u8>> {
265 writer_convert_bytes(input, "docx", "txt")
266}
267pub fn pdf_to_txt_bytes(input: &[u8]) -> Result<Vec<u8>> {
268 writer_convert_bytes(input, "pdf", "txt")
269}
270pub fn pdf_to_md_bytes(input: &[u8]) -> Result<Vec<u8>> {
271 writer_convert_bytes(input, "pdf", "md")
272}
273pub fn pdf_to_html_bytes(input: &[u8]) -> Result<Vec<u8>> {
274 writer_convert_bytes(input, "pdf", "html")
275}
276pub fn docx_to_odt_bytes(input: &[u8]) -> Result<Vec<u8>> {
277 writer_convert_bytes(input, "docx", "odt")
278}
279pub fn odt_to_pdf_bytes(input: &[u8]) -> Result<Vec<u8>> {
280 writer_convert_bytes(input, "odt", "pdf")
281}
282pub fn odt_to_docx_bytes(input: &[u8]) -> Result<Vec<u8>> {
283 writer_convert_bytes(input, "odt", "docx")
284}
285pub fn odt_to_html_bytes(input: &[u8]) -> Result<Vec<u8>> {
286 writer_convert_bytes(input, "odt", "html")
287}
288
289pub fn xlsx_to_pdf_bytes(input: &[u8]) -> Result<Vec<u8>> {
292 calc_convert_bytes(input, "xlsx", "pdf")
293}
294pub fn xlsx_to_html_bytes(input: &[u8]) -> Result<Vec<u8>> {
295 calc_convert_bytes(input, "xlsx", "html")
296}
297pub fn xlsx_to_csv_bytes(input: &[u8]) -> Result<Vec<u8>> {
298 calc_convert_bytes(input, "xlsx", "csv")
299}
300pub fn xlsx_to_ods_bytes(input: &[u8]) -> Result<Vec<u8>> {
301 calc_convert_bytes(input, "xlsx", "ods")
302}
303pub fn ods_to_pdf_bytes(input: &[u8]) -> Result<Vec<u8>> {
304 calc_convert_bytes(input, "ods", "pdf")
305}
306pub fn ods_to_xlsx_bytes(input: &[u8]) -> Result<Vec<u8>> {
307 calc_convert_bytes(input, "ods", "xlsx")
308}
309pub fn ods_to_csv_bytes(input: &[u8]) -> Result<Vec<u8>> {
310 calc_convert_bytes(input, "ods", "csv")
311}
312
313pub fn pptx_to_html_bytes(input: &[u8]) -> Result<Vec<u8>> {
316 impress_convert_bytes(input, "pptx", "html")
317}
318pub fn pptx_to_svg_bytes(input: &[u8]) -> Result<Vec<u8>> {
319 impress_convert_bytes(input, "pptx", "svg")
320}
321pub fn pptx_to_odp_bytes(input: &[u8]) -> Result<Vec<u8>> {
322 impress_convert_bytes(input, "pptx", "odp")
323}
324pub fn odp_to_pdf_bytes(input: &[u8]) -> Result<Vec<u8>> {
325 impress_convert_bytes(input, "odp", "pdf")
326}
327pub fn odp_to_pptx_bytes(input: &[u8]) -> Result<Vec<u8>> {
328 impress_convert_bytes(input, "odp", "pptx")
329}
330
331#[derive(Clone, Debug, Default, PartialEq, Eq)]
336pub struct RecalcErrorBucket {
337 pub count: usize,
338 pub locations: Vec<String>,
339}
340
341#[derive(Clone, Debug, Default, PartialEq, Eq)]
342pub struct RecalcCheckReport {
343 pub status: String,
344 pub total_errors: usize,
345 pub total_formulas: usize,
346 pub error_summary: BTreeMap<String, RecalcErrorBucket>,
347}
348
349impl RecalcCheckReport {
350 pub fn to_json(&self) -> String {
351 fn esc(input: &str) -> String {
352 input
353 .replace('\\', "\\\\")
354 .replace('"', "\\\"")
355 .replace('\n', "\\n")
356 }
357 let mut json = String::new();
358 json.push('{');
359 json.push_str(&format!("\"status\":\"{}\"", esc(&self.status)));
360 json.push_str(&format!(",\"total_errors\":{}", self.total_errors));
361 json.push_str(&format!(",\"total_formulas\":{}", self.total_formulas));
362 json.push_str(",\"error_summary\":{");
363 let mut first_kind = true;
364 for (kind, bucket) in &self.error_summary {
365 if !first_kind {
366 json.push(',');
367 }
368 first_kind = false;
369 json.push_str(&format!("\"{}\":{{\"count\":{},\"locations\":[", esc(kind), bucket.count));
370 for (index, location) in bucket.locations.iter().enumerate() {
371 if index > 0 {
372 json.push(',');
373 }
374 json.push_str(&format!("\"{}\"", esc(location)));
375 }
376 json.push_str("]}");
377 }
378 json.push_str("}}");
379 json
380 }
381
382 fn record_error(&mut self, kind: String, location: String) {
383 self.total_errors += 1;
384 let bucket = self.error_summary.entry(kind).or_default();
385 bucket.count += 1;
386 bucket.locations.push(location);
387 }
388}
389
390pub fn xlsx_recalc_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
395 let zip = ZipArchive::new(bytes)?;
396 let workbook = lo_calc::from_xlsx_bytes("workbook", bytes)?;
397 let evaluator = WorkbookEvaluator::new(&workbook);
398 let sheet_targets = parse_xlsx_sheet_targets(&zip)?;
399
400 let mut entries: Vec<lo_zip::ZipEntry> = Vec::new();
401 for entry_name in zip.entries() {
402 let path = normalize_zip_path(entry_name);
403 if path == "xl/calcChain.xml" {
404 continue;
405 }
406 if path == "[Content_Types].xml" {
407 let xml = zip.read_string(&path)?;
408 let mut root = parse_xml_document(&xml)?;
409 remove_content_type_override(&mut root, "/xl/calcChain.xml");
410 entries.push(lo_zip::ZipEntry::new(path, serialize_xml_document(&root).into_bytes()));
411 continue;
412 }
413 if path == "xl/_rels/workbook.xml.rels" {
414 let xml = zip.read_string(&path)?;
415 let mut root = parse_xml_document(&xml)?;
416 remove_calc_chain_relationships(&mut root);
417 entries.push(lo_zip::ZipEntry::new(path, serialize_xml_document(&root).into_bytes()));
418 continue;
419 }
420 if path == "xl/workbook.xml" {
421 let xml = zip.read_string(&path)?;
422 let mut root = parse_xml_document(&xml)?;
423 mark_workbook_recalculated(&mut root);
424 entries.push(lo_zip::ZipEntry::new(path, serialize_xml_document(&root).into_bytes()));
425 continue;
426 }
427 if let Some(sheet_index) = sheet_targets.iter().position(|(target, _)| target == &path) {
428 let xml = zip.read_string(&path)?;
429 let mut root = parse_xml_document(&xml)?;
430 patch_xlsx_sheet_formula_cache(&mut root, &workbook, sheet_index, &evaluator)?;
431 entries.push(lo_zip::ZipEntry::new(path, serialize_xml_document(&root).into_bytes()));
432 continue;
433 }
434 entries.push(lo_zip::ZipEntry::new(path, zip.read(entry_name)?));
435 }
436 lo_zip::ooxml_package(&entries)
437}
438
439pub fn xlsx_recalc_check_json(bytes: &[u8]) -> Result<String> {
441 Ok(xlsx_recalc_report(bytes)?.to_json())
442}
443
444pub fn xlsx_recalc_report(bytes: &[u8]) -> Result<RecalcCheckReport> {
447 let zip = ZipArchive::new(bytes)?;
448 let workbook = lo_calc::from_xlsx_bytes("workbook", bytes)?;
449 let evaluator = WorkbookEvaluator::new(&workbook);
450 let sheet_targets = parse_xlsx_sheet_targets(&zip)?;
451 let mut report = RecalcCheckReport {
452 status: "ok".to_string(),
453 ..RecalcCheckReport::default()
454 };
455
456 for (sheet_index, (path, sheet_name)) in sheet_targets.iter().enumerate() {
457 if !zip.contains(path) {
458 continue;
459 }
460 let xml = zip.read_string(path)?;
461 let root = parse_xml_document(&xml)?;
462 walk_formula_cells(&root, sheet_name, sheet_index, &evaluator, &mut report)?;
463 }
464
465 if report.total_errors > 0 {
466 report.status = "error".to_string();
467 }
468 Ok(report)
469}
470
471fn parse_xlsx_sheet_targets(zip: &ZipArchive) -> Result<Vec<(String, String)>> {
472 let workbook_root = parse_xml_document(&zip.read_string("xl/workbook.xml")?)?;
473 let rels = parse_relationships(zip, "xl/workbook.xml")?;
474 let mut out = Vec::new();
475 if let Some(sheets) = workbook_root.child("sheets") {
476 for (index, sheet) in sheets.children_named("sheet").enumerate() {
477 let name = sheet.attr("name").unwrap_or("Sheet").to_string();
478 let target = sheet
479 .attr("id")
480 .or_else(|| sheet.attr("r:id"))
481 .and_then(|id| rels.get(id))
482 .cloned()
483 .unwrap_or_else(|| format!("xl/worksheets/sheet{}.xml", index + 1));
484 out.push((normalize_zip_path(&target), name));
485 }
486 }
487 Ok(out)
488}
489
490fn parse_relationships(zip: &ZipArchive, part: &str) -> Result<BTreeMap<String, String>> {
491 let rels_path = rels_path_for(part);
492 if !zip.contains(&rels_path) {
493 return Ok(BTreeMap::new());
494 }
495 let root = parse_xml_document(&zip.read_string(&rels_path)?)?;
496 let mut map = BTreeMap::new();
497 for rel in root.children_named("Relationship") {
498 if let (Some(id), Some(target)) = (rel.attr("Id"), rel.attr("Target")) {
499 map.insert(id.to_string(), resolve_part_target(part, target));
500 }
501 }
502 Ok(map)
503}
504
505fn remove_content_type_override(root: &mut XmlNode, part_name: &str) {
506 root.items.retain(|item| match item {
507 XmlItem::Node(node) if node.local_name() == "Override" => node.attr("PartName") != Some(part_name),
508 _ => true,
509 });
510 sync_node_children(root);
511}
512
513fn remove_calc_chain_relationships(root: &mut XmlNode) {
514 root.items.retain(|item| match item {
515 XmlItem::Node(node) if node.local_name() == "Relationship" => {
516 let target = node.attr("Target").unwrap_or("");
517 let rel_type = node.attr("Type").unwrap_or("");
518 !target.ends_with("calcChain.xml")
519 && !rel_type.to_ascii_lowercase().contains("calcchain")
520 }
521 _ => true,
522 });
523 sync_node_children(root);
524}
525
526fn mark_workbook_recalculated(root: &mut XmlNode) {
527 let mut found = false;
528 for item in &mut root.items {
529 if let XmlItem::Node(node) = item {
530 if node.local_name() == "calcPr" {
531 node.attributes.insert("calcCompleted".to_string(), "1".to_string());
532 node.attributes.insert("fullCalcOnLoad".to_string(), "0".to_string());
533 node.attributes.remove("calcMode");
534 found = true;
535 }
536 }
537 }
538 if !found {
539 let mut attrs = BTreeMap::new();
540 attrs.insert("calcCompleted".to_string(), "1".to_string());
541 attrs.insert("fullCalcOnLoad".to_string(), "0".to_string());
542 root.items.push(XmlItem::Node(XmlNode {
543 name: "calcPr".to_string(),
544 attributes: attrs,
545 children: Vec::new(),
546 items: Vec::new(),
547 text: String::new(),
548 }));
549 }
550 sync_node_children(root);
551}
552
553fn patch_xlsx_sheet_formula_cache(
554 root: &mut XmlNode,
555 workbook: &Workbook,
556 sheet_index: usize,
557 evaluator: &WorkbookEvaluator<'_>,
558) -> Result<()> {
559 let Some(sheet_data) = child_mut(root, "sheetData") else {
560 return Ok(());
561 };
562 let mut shared_formulas: BTreeMap<String, (CellAddr, String)> = BTreeMap::new();
563 for row in &mut sheet_data.children {
564 if row.local_name() != "row" {
565 continue;
566 }
567 let row_number = row
568 .attr("r")
569 .and_then(|value| value.parse::<usize>().ok())
570 .unwrap_or(1);
571 for cell in &mut row.children {
572 if cell.local_name() == "c" {
573 patch_formula_cell(cell, row_number, workbook, sheet_index, evaluator, &mut shared_formulas)?;
574 }
575 }
576 sync_node_items_from_children(row);
577 }
578 sync_node_items_from_children(sheet_data);
579 sync_node_items_from_children(root);
580 Ok(())
581}
582
583fn walk_formula_cells(
584 root: &XmlNode,
585 sheet_name: &str,
586 sheet_index: usize,
587 evaluator: &WorkbookEvaluator<'_>,
588 report: &mut RecalcCheckReport,
589) -> Result<()> {
590 let Some(sheet_data) = root.child("sheetData") else {
591 return Ok(());
592 };
593 let mut shared_formulas: BTreeMap<String, (CellAddr, String)> = BTreeMap::new();
594 for row in &sheet_data.children {
595 if row.local_name() != "row" {
596 continue;
597 }
598 let row_number = row
599 .attr("r")
600 .and_then(|value| value.parse::<usize>().ok())
601 .unwrap_or(1);
602 for cell in &row.children {
603 if cell.local_name() != "c" {
604 continue;
605 }
606 let (row_1, col_1) = cell
607 .attr("r")
608 .and_then(parse_a1_cell_ref)
609 .unwrap_or((row_number, 1));
610 let addr = CellAddr::new(row_1.saturating_sub(1) as u32, col_1.saturating_sub(1) as u32);
611 let Some(formula) = resolve_formula_for_cell(cell, addr, &mut shared_formulas) else {
612 continue;
613 };
614 report.total_formulas += 1;
615 let value = evaluator
616 .evaluate_formula(sheet_index, &formula)
617 .unwrap_or_else(|_| EvalValue::Error("#VALUE!".to_string()));
618 if let EvalValue::Error(kind) = value {
619 report.record_error(kind, format!("{}!{}", sheet_name, addr.to_a1()));
620 }
621 }
622 }
623 Ok(())
624}
625
626fn patch_formula_cell(
627 cell: &mut XmlNode,
628 fallback_row: usize,
629 workbook: &Workbook,
630 sheet_index: usize,
631 evaluator: &WorkbookEvaluator<'_>,
632 shared_formulas: &mut BTreeMap<String, (CellAddr, String)>,
633) -> Result<()> {
634 let (row_1, col_1) = cell
635 .attr("r")
636 .and_then(parse_a1_cell_ref)
637 .unwrap_or((fallback_row, 1));
638 let addr = CellAddr::new(row_1.saturating_sub(1) as u32, col_1.saturating_sub(1) as u32);
639 let Some(formula) = resolve_formula_for_cell(cell, addr, shared_formulas) else {
640 return Ok(());
641 };
642 if formula.trim().is_empty() {
643 return Ok(());
644 }
645 let _ = workbook; let value = evaluator
647 .evaluate_formula(sheet_index, &formula)
648 .unwrap_or_else(|_| EvalValue::Error("#VALUE!".to_string()));
649 let mut new_items = Vec::new();
650 for item in &cell.items {
651 match item {
652 XmlItem::Text(text) => new_items.push(XmlItem::Text(text.clone())),
653 XmlItem::Node(node) if matches!(node.local_name(), "v" | "is") => {}
654 XmlItem::Node(node) => new_items.push(XmlItem::Node(node.clone())),
655 }
656 }
657 new_items.push(XmlItem::Node(make_value_node(&value)));
658 cell.items = new_items;
659 sync_node_children(cell);
660 apply_formula_cache_type(cell, &value);
661 Ok(())
662}
663
664fn resolve_formula_for_cell(
665 cell: &XmlNode,
666 addr: CellAddr,
667 shared_formulas: &mut BTreeMap<String, (CellAddr, String)>,
668) -> Option<String> {
669 let mut formula_text = None;
670 let mut formula_kind = None;
671 let mut shared_index = None;
672 for child in &cell.children {
673 if child.local_name() == "f" {
674 formula_text = Some(text_content(child));
675 formula_kind = child.attr("t").map(str::to_string);
676 shared_index = child.attr("si").map(str::to_string);
677 break;
678 }
679 }
680 let text = formula_text.unwrap_or_default();
681 if !text.trim().is_empty() {
682 if formula_kind.as_deref() == Some("shared") {
683 if let Some(si) = shared_index.clone() {
684 shared_formulas.insert(si, (addr, text.clone()));
685 }
686 }
687 return Some(text);
688 }
689 if formula_kind.as_deref() == Some("shared") {
690 if let Some(si) = shared_index {
691 if let Some((base_addr, base_formula)) = shared_formulas.get(&si) {
692 return Some(translate_shared_formula(base_formula, *base_addr, addr));
693 }
694 }
695 }
696 None
697}
698
699fn apply_formula_cache_type(cell: &mut XmlNode, value: &EvalValue) {
700 match value {
701 EvalValue::Number(_) | EvalValue::Blank => {
702 cell.attributes.remove("t");
703 }
704 EvalValue::Text(_) => {
705 cell.attributes.insert("t".to_string(), "str".to_string());
706 }
707 EvalValue::Bool(_) => {
708 cell.attributes.insert("t".to_string(), "b".to_string());
709 }
710 EvalValue::Error(_) => {
711 cell.attributes.insert("t".to_string(), "e".to_string());
712 }
713 }
714}
715
716fn make_value_node(value: &EvalValue) -> XmlNode {
717 let text = match value {
718 EvalValue::Blank => String::new(),
719 EvalValue::Number(number) => {
720 if number.fract() == 0.0 && number.is_finite() {
721 format!("{}", *number as i64)
722 } else {
723 number.to_string()
724 }
725 }
726 EvalValue::Text(text) => text.clone(),
727 EvalValue::Bool(value) => {
728 if *value { "1".to_string() } else { "0".to_string() }
729 }
730 EvalValue::Error(text) => text.clone(),
731 };
732 XmlNode {
733 name: "v".to_string(),
734 attributes: BTreeMap::new(),
735 children: Vec::new(),
736 items: if text.is_empty() { Vec::new() } else { vec![XmlItem::Text(text.clone())] },
737 text,
738 }
739}
740
741fn parse_a1_cell_ref(input: &str) -> Option<(usize, usize)> {
742 let mut letters = String::new();
743 let mut digits = String::new();
744 for ch in input.chars() {
745 if ch == '$' {
746 continue;
747 }
748 if ch.is_ascii_alphabetic() && digits.is_empty() {
749 letters.push(ch);
750 } else if ch.is_ascii_digit() {
751 digits.push(ch);
752 } else {
753 return None;
754 }
755 }
756 if letters.is_empty() || digits.is_empty() {
757 return None;
758 }
759 let row = digits.parse().ok()?;
760 let mut col = 0usize;
761 for ch in letters.chars() {
762 col = col * 26 + ((ch.to_ascii_uppercase() as u8 - b'A' + 1) as usize);
763 }
764 Some((row, col))
765}
766
767fn text_content(node: &XmlNode) -> String {
768 let mut out = String::new();
769 if !node.text.is_empty() {
770 out.push_str(&node.text);
771 }
772 for child in &node.children {
773 out.push_str(&text_content(child));
774 }
775 out
776}
777
778pub fn accept_all_tracked_changes_docx_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
791 let zip = ZipArchive::new(bytes)?;
792 let mut xml_parts: Vec<(String, XmlNode)> = Vec::new();
793 let mut passthrough: Vec<lo_zip::ZipEntry> = Vec::new();
794
795 for entry_name in zip.entries() {
796 let path = normalize_zip_path(entry_name);
797 if is_wordprocessing_xml(&path) {
798 let xml = zip.read_string(&path)?;
799 let root = parse_xml_document(&xml)?;
800 let accepted = accept_revision_root(&root, &path);
801 xml_parts.push((path, accepted));
802 } else {
803 passthrough.push(lo_zip::ZipEntry::new(path, zip.read(entry_name)?));
804 }
805 }
806
807 let mut live_comment_ids = BTreeSet::new();
808 for (path, root) in &xml_parts {
809 if !path.ends_with("comments.xml") {
810 collect_comment_ids(root, &mut live_comment_ids);
811 }
812 }
813
814 let mut entries: Vec<lo_zip::ZipEntry> =
815 Vec::with_capacity(xml_parts.len() + passthrough.len());
816 for (path, root) in xml_parts {
817 let root = if path.ends_with("comments.xml") {
818 filter_comment_part(&root, &live_comment_ids)
819 } else {
820 root
821 };
822 entries.push(lo_zip::ZipEntry::new(
823 path,
824 serialize_xml_document(&root).into_bytes(),
825 ));
826 }
827 entries.extend(passthrough);
828 lo_zip::ooxml_package(&entries)
829}
830
831#[deprecated(note = "use accept_all_tracked_changes_docx_bytes")]
833pub fn accept_tracked_changes_docx_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
834 accept_all_tracked_changes_docx_bytes(bytes)
835}
836
837#[deprecated(note = "use xlsx_recalc_bytes")]
839pub fn recalc_existing_xlsx_bytes(bytes: &[u8]) -> Result<Vec<u8>> {
840 xlsx_recalc_bytes(bytes)
841}
842
843fn is_wordprocessing_xml(path: &str) -> bool {
844 path.starts_with("word/")
845 && path.ends_with(".xml")
846 && !path.contains("_rels/")
847 && !path.ends_with("fontTable.xml")
848}
849
850fn accept_revision_root(root: &XmlNode, path: &str) -> XmlNode {
851 let items = accept_revision_items(&root.items);
852 let mut node = rebuild_node(root, items, root.attributes.clone());
853 if path.ends_with("settings.xml") {
854 node.items.retain(
855 |item| !matches!(item, XmlItem::Node(child) if child.local_name() == "trackRevisions"),
856 );
857 sync_node_children(&mut node);
858 }
859 node
860}
861
862fn accept_revision_items(items: &[XmlItem]) -> Vec<XmlItem> {
863 let mut out = Vec::new();
864 for item in items {
865 match item {
866 XmlItem::Text(text) => out.push(XmlItem::Text(text.clone())),
867 XmlItem::Node(node) => out.extend(accept_revision_node(node)),
868 }
869 }
870 out
871}
872
873fn accept_revision_node(node: &XmlNode) -> Vec<XmlItem> {
874 let local = node.local_name();
875 if matches!(
876 local,
877 "del"
878 | "delText"
879 | "delInstrText"
880 | "cellDel"
881 | "moveFrom"
882 | "moveFromRangeStart"
883 | "moveFromRangeEnd"
884 | "moveToRangeStart"
885 | "moveToRangeEnd"
886 | "customXmlDelRangeStart"
887 | "customXmlDelRangeEnd"
888 | "customXmlMoveFromRangeStart"
889 | "customXmlMoveFromRangeEnd"
890 | "customXmlMoveToRangeStart"
891 | "customXmlMoveToRangeEnd"
892 | "trackRevisions"
893 ) {
894 return Vec::new();
895 }
896 if matches!(
897 local,
898 "ins"
899 | "moveTo"
900 | "customXmlInsRangeStart"
901 | "customXmlInsRangeEnd"
902 | "cellIns"
903 ) {
904 return accept_revision_items(&node.items);
905 }
906 if local.ends_with("Change") || local == "numberingChange" || local == "cellMerge" {
907 return Vec::new();
908 }
909 if row_deleted(node) || cell_deleted(node) {
910 return Vec::new();
911 }
912 let items = accept_revision_items(&node.items);
913 vec![XmlItem::Node(rebuild_node(node, items, node.attributes.clone()))]
914}
915
916fn row_deleted(node: &XmlNode) -> bool {
917 if node.local_name() != "tr" {
918 return false;
919 }
920 node.child("trPr")
921 .map(|trpr| {
922 trpr.children
923 .iter()
924 .any(|child| matches!(child.local_name(), "del" | "cellDel" | "cellMerge"))
925 })
926 .unwrap_or(false)
927}
928
929fn cell_deleted(node: &XmlNode) -> bool {
930 if node.local_name() != "tc" {
931 return false;
932 }
933 node.child("tcPr")
934 .map(|tcpr| {
935 tcpr.children
936 .iter()
937 .any(|child| matches!(child.local_name(), "cellDel" | "del"))
938 })
939 .unwrap_or(false)
940}
941
942fn collect_comment_ids(node: &XmlNode, out: &mut BTreeSet<String>) {
943 let local = node.local_name();
944 if matches!(
945 local,
946 "commentRangeStart" | "commentRangeEnd" | "commentReference"
947 ) {
948 if let Some(id) = attribute_local(node, "id") {
949 out.insert(id.to_string());
950 }
951 }
952 for child in &node.children {
953 collect_comment_ids(child, out);
954 }
955}
956
957fn filter_comment_part(root: &XmlNode, live_comment_ids: &BTreeSet<String>) -> XmlNode {
958 if root.local_name() != "comments" {
959 return root.clone();
960 }
961 let items = root
962 .items
963 .iter()
964 .filter_map(|item| match item {
965 XmlItem::Text(text) => Some(XmlItem::Text(text.clone())),
966 XmlItem::Node(node) => filter_comment_node(node, live_comment_ids).map(XmlItem::Node),
967 })
968 .collect();
969 rebuild_node(root, items, root.attributes.clone())
970}
971
972fn filter_comment_node(node: &XmlNode, live_comment_ids: &BTreeSet<String>) -> Option<XmlNode> {
973 if node.local_name() == "comment" {
974 let keep = attribute_local(node, "id")
975 .map(|id| live_comment_ids.contains(id))
976 .unwrap_or(true);
977 if !keep {
978 return None;
979 }
980 }
981 let items = node
982 .items
983 .iter()
984 .filter_map(|item| match item {
985 XmlItem::Text(text) => Some(XmlItem::Text(text.clone())),
986 XmlItem::Node(child) => filter_comment_node(child, live_comment_ids).map(XmlItem::Node),
987 })
988 .collect();
989 Some(rebuild_node(node, items, node.attributes.clone()))
990}
991
992fn attribute_local<'a>(node: &'a XmlNode, local_name: &str) -> Option<&'a str> {
993 let suffix = format!(":{local_name}");
994 node.attributes.iter().find_map(|(key, value)| {
995 if key == local_name || key.ends_with(&suffix) {
996 Some(value.as_str())
997 } else {
998 None
999 }
1000 })
1001}
1002
1003fn rebuild_node(
1008 template: &XmlNode,
1009 items: Vec<XmlItem>,
1010 attributes: BTreeMap<String, String>,
1011) -> XmlNode {
1012 let mut node = XmlNode {
1013 name: template.name.clone(),
1014 attributes,
1015 children: Vec::new(),
1016 items,
1017 text: String::new(),
1018 };
1019 sync_node_children(&mut node);
1020 node
1021}
1022
1023fn sync_node_children(node: &mut XmlNode) {
1024 node.children = node
1025 .items
1026 .iter()
1027 .filter_map(|item| match item {
1028 XmlItem::Node(child) => Some(child.clone()),
1029 _ => None,
1030 })
1031 .collect();
1032 node.text = node
1033 .items
1034 .iter()
1035 .filter_map(|item| match item {
1036 XmlItem::Text(text) => Some(text.clone()),
1037 _ => None,
1038 })
1039 .collect::<Vec<_>>()
1040 .join("");
1041}
1042
1043fn sync_node_items_from_children(node: &mut XmlNode) {
1044 let mut child_index = 0usize;
1045 let mut new_items = Vec::with_capacity(node.items.len().max(node.children.len()));
1046 for item in &node.items {
1047 match item {
1048 XmlItem::Text(text) => new_items.push(XmlItem::Text(text.clone())),
1049 XmlItem::Node(_) => {
1050 if let Some(updated) = node.children.get(child_index) {
1051 new_items.push(XmlItem::Node(updated.clone()));
1052 child_index += 1;
1053 }
1054 }
1055 }
1056 }
1057 while let Some(updated) = node.children.get(child_index) {
1058 new_items.push(XmlItem::Node(updated.clone()));
1059 child_index += 1;
1060 }
1061 node.items = new_items;
1062 sync_node_children(node);
1063}
1064
1065fn child_mut<'a>(node: &'a mut XmlNode, name: &str) -> Option<&'a mut XmlNode> {
1066 node.children
1067 .iter_mut()
1068 .find(|child| child.local_name() == name || child.name == name)
1069}
1070
1071#[allow(dead_code)]
1072fn _assert_send_sync() {
1073 fn assert<T: Send + Sync>() {}
1074 assert::<Result<Vec<u8>>>();
1075 let _ = LoError::Parse(String::new());
1076}
1077
1078
1079pub fn docx_to_md_bytes(input: &[u8]) -> Result<Vec<u8>> {
1083 let doc = lo_writer::from_docx_bytes("document", input)?;
1084 Ok(lo_writer::to_markdown(&doc).into_bytes())
1085}
1086
1087pub fn pptx_to_md_bytes(input: &[u8]) -> Result<Vec<u8>> {
1089 let deck = lo_impress::from_pptx_bytes("presentation", input)?;
1090 Ok(lo_impress::to_markdown(&deck).into_bytes())
1091}
1092
1093pub fn xlsx_to_md_bytes(input: &[u8]) -> Result<Vec<u8>> {
1095 let workbook = lo_calc::from_xlsx_bytes("workbook", input)?;
1096 Ok(lo_calc::to_markdown(&workbook).into_bytes())
1097}
1098
1099pub fn docx_to_png_pages(input: &[u8], dpi: u32) -> Result<Vec<Vec<u8>>> {
1103 let doc = lo_writer::from_docx_bytes("document", input)?;
1104 Ok(lo_writer::render_png_pages(&doc, dpi.max(72)))
1105}
1106
1107pub fn docx_to_jpeg_pages(input: &[u8], dpi: u32, quality: u8) -> Result<Vec<Vec<u8>>> {
1109 let doc = lo_writer::from_docx_bytes("document", input)?;
1110 Ok(lo_writer::render_jpeg_pages(&doc, dpi.max(72), quality.max(1)))
1111}
1112
1113pub fn pptx_to_png_pages(input: &[u8], dpi: u32) -> Result<Vec<Vec<u8>>> {
1115 let deck = lo_impress::from_pptx_bytes("presentation", input)?;
1116 Ok(lo_impress::render_png_pages(&deck, dpi.max(72)))
1117}
1118
1119pub fn pptx_to_jpeg_pages(input: &[u8], dpi: u32, quality: u8) -> Result<Vec<Vec<u8>>> {
1121 let deck = lo_impress::from_pptx_bytes("presentation", input)?;
1122 Ok(lo_impress::render_jpeg_pages(&deck, dpi.max(72), quality.max(1)))
1123}