use crate::error::{KreuzbergError, Result};
use serde_json::Value;
use std::collections::HashMap;
use std::io::Read;
use zip::ZipArchive;
pub type CustomProperties = HashMap<String, Value>;
pub fn extract_custom_properties<R: Read + std::io::Seek>(archive: &mut ZipArchive<R>) -> Result<CustomProperties> {
let xml_content = match super::read_zip_entry_to_string(archive, "docProps/custom.xml", "custom.xml")? {
Some(content) => content,
None => return Ok(HashMap::new()),
};
let doc = roxmltree::Document::parse(&xml_content)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse custom.xml: {}", e)))?;
let root = doc.root_element();
let mut properties = HashMap::new();
for property_node in root.descendants().filter(|n| n.has_tag_name("property")) {
if let Some(name) = property_node.attribute("name")
&& let Some(value) = extract_vt_value(property_node)
{
properties.insert(name.to_string(), value);
}
}
Ok(properties)
}
fn extract_vt_value(node: roxmltree::Node) -> Option<Value> {
for child in node.children().filter(|n| n.is_element()) {
let tag = child.tag_name().name();
match tag {
"lpwstr" | "lpstr" => {
return child.text().map(|s| Value::String(s.to_string()));
}
"i4" => {
return child
.text()
.and_then(|s| s.trim().parse::<i64>().ok().map(|n| Value::Number(n.into())));
}
"r8" => {
return child.text().and_then(|s| {
s.trim()
.parse::<f64>()
.ok()
.and_then(|f| serde_json::Number::from_f64(f).map(Value::Number))
});
}
"bool" => {
return child.text().and_then(|s| match s.trim().to_lowercase().as_str() {
"true" | "1" => Some(Value::Bool(true)),
"false" | "0" => Some(Value::Bool(false)),
_ => None,
});
}
"filetime" => {
return child.text().map(|s| Value::String(s.to_string()));
}
_ => {
continue;
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Cursor, Write};
fn create_test_zip_with_custom_xml(custom_xml: &str) -> ZipArchive<Cursor<Vec<u8>>> {
let buffer = Vec::new();
let cursor = Cursor::new(buffer);
let mut zip = zip::ZipWriter::new(cursor);
let options = zip::write::FileOptions::<()>::default().compression_method(zip::CompressionMethod::Stored);
zip.start_file("docProps/custom.xml", options).unwrap();
zip.write_all(custom_xml.as_bytes()).unwrap();
let cursor = zip.finish().unwrap();
ZipArchive::new(cursor).unwrap()
}
#[test]
fn test_extract_custom_properties_string() {
let custom_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="2" name="ProjectName">
<vt:lpwstr>Alpha Project</vt:lpwstr>
</property>
</Properties>"#;
let mut archive = create_test_zip_with_custom_xml(custom_xml);
let props = extract_custom_properties(&mut archive).unwrap();
assert_eq!(
props.get("ProjectName"),
Some(&Value::String("Alpha Project".to_string()))
);
}
#[test]
fn test_extract_custom_properties_multiple_types() {
let custom_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="2" name="StringProp">
<vt:lpwstr>Test String</vt:lpwstr>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="3" name="IntProp">
<vt:i4>42</vt:i4>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="4" name="BoolProp">
<vt:bool>true</vt:bool>
</property>
<property fmtid="{D5CDD505-2E9C-101B-9397-08002B2CF9AE}" pid="5" name="FloatProp">
<vt:r8>3.14159</vt:r8>
</property>
</Properties>"#;
let mut archive = create_test_zip_with_custom_xml(custom_xml);
let props = extract_custom_properties(&mut archive).unwrap();
assert_eq!(props.get("StringProp"), Some(&Value::String("Test String".to_string())));
assert_eq!(props.get("IntProp"), Some(&Value::Number(42.into())));
assert_eq!(props.get("BoolProp"), Some(&Value::Bool(true)));
assert!(matches!(props.get("FloatProp"), Some(Value::Number(_))));
}
#[test]
fn test_extract_custom_properties_missing_file() {
let buffer = Vec::new();
let cursor = Cursor::new(buffer);
let zip = zip::ZipWriter::new(cursor);
let cursor = zip.finish().unwrap();
let mut archive = ZipArchive::new(cursor).unwrap();
let props = extract_custom_properties(&mut archive).unwrap();
assert_eq!(props, HashMap::new());
}
#[test]
fn test_extract_vt_value_lpstr() {
let xml = r#"<property xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<vt:lpstr>Test</vt:lpstr>
</property>"#;
let doc = roxmltree::Document::parse(xml).unwrap();
let value = extract_vt_value(doc.root_element()).unwrap();
assert_eq!(value, Value::String("Test".to_string()));
}
#[test]
fn test_extract_vt_value_bool_variants() {
let xml_true1 = r#"<property xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<vt:bool>true</vt:bool>
</property>"#;
let xml_true2 = r#"<property xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<vt:bool>1</vt:bool>
</property>"#;
let xml_false1 = r#"<property xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<vt:bool>false</vt:bool>
</property>"#;
let xml_false2 = r#"<property xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<vt:bool>0</vt:bool>
</property>"#;
let doc1 = roxmltree::Document::parse(xml_true1).unwrap();
assert_eq!(extract_vt_value(doc1.root_element()), Some(Value::Bool(true)));
let doc2 = roxmltree::Document::parse(xml_true2).unwrap();
assert_eq!(extract_vt_value(doc2.root_element()), Some(Value::Bool(true)));
let doc3 = roxmltree::Document::parse(xml_false1).unwrap();
assert_eq!(extract_vt_value(doc3.root_element()), Some(Value::Bool(false)));
let doc4 = roxmltree::Document::parse(xml_false2).unwrap();
assert_eq!(extract_vt_value(doc4.root_element()), Some(Value::Bool(false)));
}
}