use crate::error::{KreuzbergError, Result};
use std::io::Read;
use zip::ZipArchive;
#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CoreProperties {
pub title: Option<String>,
pub subject: Option<String>,
pub creator: Option<String>,
pub keywords: Option<String>,
pub description: Option<String>,
pub last_modified_by: Option<String>,
pub revision: Option<String>,
pub created: Option<String>,
pub modified: Option<String>,
pub category: Option<String>,
pub content_status: Option<String>,
pub language: Option<String>,
pub identifier: Option<String>,
pub version: Option<String>,
pub last_printed: Option<String>,
}
pub fn extract_core_properties<R: Read + std::io::Seek>(archive: &mut ZipArchive<R>) -> Result<CoreProperties> {
let xml_content = match super::read_zip_entry_to_string(archive, "docProps/core.xml", "core.xml")? {
Some(content) => content,
None => return Ok(CoreProperties::default()),
};
let doc = roxmltree::Document::parse(&xml_content)
.map_err(|e| KreuzbergError::parsing(format!("Failed to parse core.xml: {}", e)))?;
let root = doc.root_element();
let title = super::parse_xml_text(root, "title");
let subject = super::parse_xml_text(root, "subject");
let creator = super::parse_xml_text(root, "creator");
let description = super::parse_xml_text(root, "description");
let language = super::parse_xml_text(root, "language");
let identifier = super::parse_xml_text(root, "identifier");
let keywords = super::parse_xml_text(root, "keywords");
let last_modified_by = super::parse_xml_text(root, "lastModifiedBy");
let revision = super::parse_xml_text(root, "revision");
let category = super::parse_xml_text(root, "category");
let content_status = super::parse_xml_text(root, "contentStatus");
let version = super::parse_xml_text(root, "version");
let created = super::parse_xml_text(root, "created");
let modified = super::parse_xml_text(root, "modified");
let last_printed = super::parse_xml_text(root, "lastPrinted");
Ok(CoreProperties {
title,
subject,
creator,
keywords,
description,
last_modified_by,
revision,
created,
modified,
category,
content_status,
language,
identifier,
version,
last_printed,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Cursor, Write};
fn create_test_zip_with_core_xml(core_xml: &str) -> ZipArchive<Cursor<Vec<u8>>> {
let buffer = Vec::new();
let cursor = Cursor::new(buffer);
let mut zip = zip::ZipWriter::new(cursor);
let options = zip::write::FileOptions::<()>::default().compression_method(zip::CompressionMethod::Stored);
zip.start_file("docProps/core.xml", options).unwrap();
zip.write_all(core_xml.as_bytes()).unwrap();
let cursor = zip.finish().unwrap();
ZipArchive::new(cursor).unwrap()
}
#[test]
fn test_extract_core_properties_full() {
let core_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dc:title>Test Document</dc:title>
<dc:subject>Testing</dc:subject>
<dc:creator>John Doe</dc:creator>
<cp:keywords>test, metadata</cp:keywords>
<dc:description>A test document</dc:description>
<cp:lastModifiedBy>Jane Doe</cp:lastModifiedBy>
<cp:revision>5</cp:revision>
<dcterms:created xsi:type="dcterms:W3CDTF">2024-01-01T10:00:00Z</dcterms:created>
<dcterms:modified xsi:type="dcterms:W3CDTF">2024-01-02T15:30:00Z</dcterms:modified>
<cp:category>Documents</cp:category>
<cp:contentStatus>Final</cp:contentStatus>
<dc:language>en-US</dc:language>
</cp:coreProperties>"#;
let mut archive = create_test_zip_with_core_xml(core_xml);
let props = extract_core_properties(&mut archive).unwrap();
assert_eq!(props.title, Some("Test Document".to_string()));
assert_eq!(props.subject, Some("Testing".to_string()));
assert_eq!(props.creator, Some("John Doe".to_string()));
assert_eq!(props.keywords, Some("test, metadata".to_string()));
assert_eq!(props.description, Some("A test document".to_string()));
assert_eq!(props.last_modified_by, Some("Jane Doe".to_string()));
assert_eq!(props.revision, Some("5".to_string()));
assert_eq!(props.created, Some("2024-01-01T10:00:00Z".to_string()));
assert_eq!(props.modified, Some("2024-01-02T15:30:00Z".to_string()));
assert_eq!(props.category, Some("Documents".to_string()));
assert_eq!(props.content_status, Some("Final".to_string()));
assert_eq!(props.language, Some("en-US".to_string()));
}
#[test]
fn test_extract_core_properties_minimal() {
let core_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:creator>Alice</dc:creator>
</cp:coreProperties>"#;
let mut archive = create_test_zip_with_core_xml(core_xml);
let props = extract_core_properties(&mut archive).unwrap();
assert_eq!(props.creator, Some("Alice".to_string()));
assert_eq!(props.title, None);
assert_eq!(props.keywords, None);
}
#[test]
fn test_extract_core_properties_empty_elements() {
let core_xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title></dc:title>
<dc:creator>Bob</dc:creator>
</cp:coreProperties>"#;
let mut archive = create_test_zip_with_core_xml(core_xml);
let props = extract_core_properties(&mut archive).unwrap();
assert_eq!(props.title, None);
assert_eq!(props.creator, Some("Bob".to_string()));
}
#[test]
fn test_extract_core_properties_missing_file() {
let buffer = Vec::new();
let cursor = Cursor::new(buffer);
let zip = zip::ZipWriter::new(cursor);
let cursor = zip.finish().unwrap();
let mut archive = ZipArchive::new(cursor).unwrap();
let props = extract_core_properties(&mut archive).unwrap();
assert_eq!(props, CoreProperties::default());
}
#[test]
fn test_extract_core_properties_malformed_xml() {
let core_xml = "not valid xml <";
let mut archive = create_test_zip_with_core_xml(core_xml);
let result = extract_core_properties(&mut archive);
assert!(result.is_err());
}
}