docx_handlebars/
docx.rs

1use crate::error::{DocxHandlebarsError, Result};
2use quick_xml::events::{Event, BytesText};
3use quick_xml::{Reader, Writer};
4use std::collections::HashMap;
5use std::io::{Cursor, Read, Write};
6use zip::{ZipArchive, ZipWriter, write::FileOptions};
7
8/// DOCX 文档处理器
9pub struct DocxProcessor {
10    /// 原始 ZIP 文件内容
11    zip_content: Vec<u8>,
12    /// 文档主要内容
13    document_content: String,
14    /// 其他文件内容(如样式、关系等)
15    other_files: HashMap<String, Vec<u8>>,
16}
17
18impl DocxProcessor {
19    /// 创建新的 DOCX 处理器
20    pub fn new() -> Self {
21        Self {
22            zip_content: Vec::new(),
23            document_content: String::new(),
24            other_files: HashMap::new(),
25        }
26    }
27
28    /// 从字节数组加载 DOCX 文件
29    pub fn load_from_bytes(&mut self, bytes: &[u8]) -> Result<()> {
30        self.zip_content = bytes.to_vec();
31        self.extract_contents()?;
32        Ok(())
33    }
34
35    /// 提取 DOCX 文件内容
36    fn extract_contents(&mut self) -> Result<()> {
37        let cursor = Cursor::new(&self.zip_content);
38        let mut archive = ZipArchive::new(cursor)?;
39
40        // 清空之前的内容
41        self.other_files.clear();
42        self.document_content.clear();
43
44        for i in 0..archive.len() {
45            let mut file = archive.by_index(i)?;
46            let mut contents = Vec::new();
47            file.read_to_end(&mut contents)?;
48
49            let file_name = file.name().to_string();
50            
51            if file_name == "word/document.xml" {
52                // 提取主文档内容
53                self.document_content = String::from_utf8_lossy(&contents).to_string();
54            } else {
55                // 存储其他文件
56                self.other_files.insert(file_name, contents);
57            }
58        }
59
60        if self.document_content.is_empty() {
61            return Err(DocxHandlebarsError::document_format(
62                "无法找到 word/document.xml 文件"
63            ));
64        }
65
66        Ok(())
67    }
68
69    /// 获取文档内容
70    pub fn get_content(&self) -> &str {
71        &self.document_content
72    }
73
74    /// 使用新内容创建 DOCX 文件
75    pub fn create_docx_with_content(&self, new_content: &str) -> Result<Vec<u8>> {
76        let mut result = Vec::new();
77        {
78            let cursor = Cursor::new(&mut result);
79            let mut zip = ZipWriter::new(cursor);
80            let options = FileOptions::<()>::default()
81                .compression_method(zip::CompressionMethod::Deflated);
82
83            // 写入新的文档内容
84            zip.start_file("word/document.xml", options)?;
85            zip.write_all(new_content.as_bytes())?;
86
87            // 写入其他文件
88            for (file_name, contents) in &self.other_files {
89                zip.start_file(file_name, options)?;
90                zip.write_all(contents)?;
91            }
92
93            zip.finish()?;
94        }
95
96        Ok(result)
97    }
98
99    /// 提取纯文本内容(用于模板变量提取)
100    pub fn extract_text_content(&self) -> Result<String> {
101        let mut text_content = String::new();
102        let mut reader = Reader::from_str(&self.document_content);
103
104        let mut buf = Vec::new();
105        let mut in_text = false;
106
107        loop {
108            match reader.read_event_into(&mut buf) {
109                Ok(Event::Start(ref e)) => {
110                    if e.name().as_ref() == b"w:t" {
111                        in_text = true;
112                    }
113                }
114                Ok(Event::Text(e)) => {
115                    if in_text {
116                        text_content.push_str(e.unescape()?.as_ref());
117                    }
118                }
119                Ok(Event::End(ref e)) => {
120                    if e.name().as_ref() == b"w:t" {
121                        in_text = false;
122                    }
123                }
124                Ok(Event::Eof) => break,
125                Err(e) => return Err(DocxHandlebarsError::Xml(e)),
126                _ => {}
127            }
128            buf.clear();
129        }
130
131        Ok(text_content)
132    }
133
134    /// 处理 XML 内容中的 Handlebars 模板
135    pub fn process_template_in_xml(&self, _template_content: &str) -> Result<String> {
136        let mut result = Vec::new();
137        let mut reader = Reader::from_str(&self.document_content);
138        let mut writer = Writer::new(Cursor::new(&mut result));
139
140        let mut buf = Vec::new();
141        let mut in_text = false;
142        let mut current_text = String::new();
143
144        loop {
145            match reader.read_event_into(&mut buf) {
146                Ok(Event::Start(ref e)) => {
147                    if e.name().as_ref() == b"w:t" {
148                        in_text = true;
149                        writer.write_event(Event::Start(e.clone()))?;
150                    } else {
151                        writer.write_event(Event::Start(e.clone()))?;
152                    }
153                }
154                Ok(Event::Text(e)) => {
155                    if in_text {
156                        current_text = e.unescape()?.to_string();
157                        // 这里会在模板引擎中处理
158                        let processed_text = current_text.clone();
159                        writer.write_event(Event::Text(BytesText::new(&processed_text)))?;
160                    } else {
161                        writer.write_event(Event::Text(e))?;
162                    }
163                }
164                Ok(Event::End(ref e)) => {
165                    if e.name().as_ref() == b"w:t" {
166                        in_text = false;
167                        current_text.clear();
168                    }
169                    writer.write_event(Event::End(e.clone()))?;
170                }
171                Ok(Event::Eof) => break,
172                Err(e) => return Err(DocxHandlebarsError::Xml(e)),
173                _ => {
174                    let event = reader.read_event_into(&mut buf)?;
175                    writer.write_event(event)?;
176                }
177            }
178            buf.clear();
179        }
180
181        let result_xml = String::from_utf8_lossy(&result).to_string();
182        Ok(result_xml)
183    }
184}
185
186impl Default for DocxProcessor {
187    fn default() -> Self {
188        Self::new()
189    }
190}