Skip to main content

linch_docx_rs/document/
properties.rs

1//! Core properties (core.xml) - Dublin Core metadata
2
3use crate::error::Result;
4use crate::xml::{RawXmlElement, RawXmlNode};
5use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
6use quick_xml::{Reader, Writer};
7use std::io::Cursor;
8
9/// Core properties from core.xml (Dublin Core metadata)
10#[derive(Clone, Debug, Default)]
11pub struct CoreProperties {
12    pub title: Option<String>,
13    pub subject: Option<String>,
14    pub creator: Option<String>,
15    pub keywords: Option<String>,
16    pub description: Option<String>,
17    pub last_modified_by: Option<String>,
18    pub revision: Option<String>,
19    pub created: Option<String>,
20    pub modified: Option<String>,
21    pub category: Option<String>,
22    pub content_status: Option<String>,
23    /// Unknown children (preserved for round-trip)
24    pub unknown_children: Vec<RawXmlNode>,
25}
26
27impl CoreProperties {
28    /// Parse from XML string
29    pub fn from_xml(xml: &str) -> Result<Self> {
30        let mut reader = Reader::from_str(xml);
31        reader.config_mut().trim_text(true);
32
33        let mut props = CoreProperties::default();
34        let mut buf = Vec::new();
35        let mut current_element: Option<String> = None;
36
37        loop {
38            match reader.read_event_into(&mut buf)? {
39                Event::Start(e) => {
40                    let local = e.name().local_name();
41                    let local_str = String::from_utf8_lossy(local.as_ref()).to_string();
42
43                    match local_str.as_str() {
44                        "coreProperties" => {
45                            // Root element, continue
46                        }
47                        "title" | "subject" | "creator" | "description" | "keywords"
48                        | "lastModifiedBy" | "revision" | "created" | "modified" | "category"
49                        | "contentStatus" => {
50                            current_element = Some(local_str);
51                        }
52                        _ => {
53                            let raw = RawXmlElement::from_reader(&mut reader, &e)?;
54                            props.unknown_children.push(RawXmlNode::Element(raw));
55                        }
56                    }
57                }
58                Event::Text(t) => {
59                    if let Some(ref elem) = current_element {
60                        let text = t.unescape()?.to_string();
61                        match elem.as_str() {
62                            "title" => props.title = Some(text),
63                            "subject" => props.subject = Some(text),
64                            "creator" => props.creator = Some(text),
65                            "keywords" => props.keywords = Some(text),
66                            "description" => props.description = Some(text),
67                            "lastModifiedBy" => props.last_modified_by = Some(text),
68                            "revision" => props.revision = Some(text),
69                            "created" => props.created = Some(text),
70                            "modified" => props.modified = Some(text),
71                            "category" => props.category = Some(text),
72                            "contentStatus" => props.content_status = Some(text),
73                            _ => {}
74                        }
75                    }
76                }
77                Event::End(e) => {
78                    let local = e.name().local_name();
79                    let local_str = String::from_utf8_lossy(local.as_ref()).to_string();
80                    if current_element.as_deref() == Some(&local_str) {
81                        current_element = None;
82                    }
83                }
84                Event::Eof => break,
85                _ => {}
86            }
87            buf.clear();
88        }
89
90        Ok(props)
91    }
92
93    /// Serialize to XML string
94    pub fn to_xml(&self) -> Result<String> {
95        let mut buffer = Cursor::new(Vec::new());
96        let mut writer = Writer::new(&mut buffer);
97
98        writer.write_event(Event::Decl(BytesDecl::new(
99            "1.0",
100            Some("UTF-8"),
101            Some("yes"),
102        )))?;
103
104        let mut start = BytesStart::new("cp:coreProperties");
105        start.push_attribute(("xmlns:cp", crate::xml::CP));
106        start.push_attribute(("xmlns:dc", crate::xml::DC));
107        start.push_attribute(("xmlns:dcterms", crate::xml::DCTERMS));
108        start.push_attribute(("xmlns:dcmitype", "http://purl.org/dc/dcmitype/"));
109        start.push_attribute(("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"));
110        writer.write_event(Event::Start(start))?;
111
112        write_dc_element(&mut writer, "dc:title", &self.title)?;
113        write_dc_element(&mut writer, "dc:subject", &self.subject)?;
114        write_dc_element(&mut writer, "dc:creator", &self.creator)?;
115        write_cp_element(&mut writer, "cp:keywords", &self.keywords)?;
116        write_dc_element(&mut writer, "dc:description", &self.description)?;
117        write_cp_element(&mut writer, "cp:lastModifiedBy", &self.last_modified_by)?;
118        write_cp_element(&mut writer, "cp:revision", &self.revision)?;
119        write_datetime_element(&mut writer, "dcterms:created", &self.created)?;
120        write_datetime_element(&mut writer, "dcterms:modified", &self.modified)?;
121        write_cp_element(&mut writer, "cp:category", &self.category)?;
122        write_cp_element(&mut writer, "cp:contentStatus", &self.content_status)?;
123
124        for child in &self.unknown_children {
125            child.write_to(&mut writer)?;
126        }
127
128        writer.write_event(Event::End(BytesEnd::new("cp:coreProperties")))?;
129
130        let xml_bytes = buffer.into_inner();
131        String::from_utf8(xml_bytes)
132            .map_err(|e| crate::error::Error::InvalidDocument(e.to_string()))
133    }
134}
135
136fn write_dc_element<W: std::io::Write>(
137    writer: &mut Writer<W>,
138    name: &str,
139    value: &Option<String>,
140) -> Result<()> {
141    if let Some(ref v) = value {
142        writer.write_event(Event::Start(BytesStart::new(name)))?;
143        writer.write_event(Event::Text(BytesText::new(v)))?;
144        writer.write_event(Event::End(BytesEnd::new(name)))?;
145    }
146    Ok(())
147}
148
149fn write_cp_element<W: std::io::Write>(
150    writer: &mut Writer<W>,
151    name: &str,
152    value: &Option<String>,
153) -> Result<()> {
154    write_dc_element(writer, name, value)
155}
156
157fn write_datetime_element<W: std::io::Write>(
158    writer: &mut Writer<W>,
159    name: &str,
160    value: &Option<String>,
161) -> Result<()> {
162    if let Some(ref v) = value {
163        let mut start = BytesStart::new(name);
164        start.push_attribute(("xsi:type", "dcterms:W3CDTF"));
165        writer.write_event(Event::Start(start))?;
166        writer.write_event(Event::Text(BytesText::new(v)))?;
167        writer.write_event(Event::End(BytesEnd::new(name)))?;
168    }
169    Ok(())
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_parse_core_properties() {
178        let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
179<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/">
180  <dc:title>Test Document</dc:title>
181  <dc:creator>Test Author</dc:creator>
182  <cp:revision>3</cp:revision>
183  <dcterms:created>2024-01-15T10:30:00Z</dcterms:created>
184</cp:coreProperties>"#;
185
186        let props = CoreProperties::from_xml(xml).unwrap();
187        assert_eq!(props.title.as_deref(), Some("Test Document"));
188        assert_eq!(props.creator.as_deref(), Some("Test Author"));
189        assert_eq!(props.revision.as_deref(), Some("3"));
190        assert_eq!(props.created.as_deref(), Some("2024-01-15T10:30:00Z"));
191        assert!(props.subject.is_none());
192    }
193
194    #[test]
195    fn test_core_properties_roundtrip() {
196        let props = CoreProperties {
197            title: Some("My Doc".into()),
198            creator: Some("Author".into()),
199            modified: Some("2024-06-01T12:00:00Z".into()),
200            ..Default::default()
201        };
202
203        let xml = props.to_xml().unwrap();
204        let props2 = CoreProperties::from_xml(&xml).unwrap();
205
206        assert_eq!(props2.title.as_deref(), Some("My Doc"));
207        assert_eq!(props2.creator.as_deref(), Some("Author"));
208        assert_eq!(props2.modified.as_deref(), Some("2024-06-01T12:00:00Z"));
209    }
210}