linch_docx_rs/document/
mod.rs1mod body;
4mod numbering;
5mod paragraph;
6mod run;
7mod table;
8
9pub use body::{BlockContent, Body};
10pub use numbering::{AbstractNum, Level, LevelOverride, Num, NumberFormat, Numbering};
11pub use paragraph::{Hyperlink, Paragraph, ParagraphContent, ParagraphProperties};
12pub use run::{BreakType, Run, RunContent, RunProperties};
13pub use table::{GridColumn, Table, TableCell, TableCellProperties, TableRow, VMerge};
14
15use crate::error::{Error, Result};
16use crate::opc::{Package, Part, PartUri};
17use crate::xml;
18use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, Event};
19use quick_xml::{Reader, Writer};
20use std::io::{BufRead, Cursor};
21use std::path::Path;
22
23#[derive(Debug)]
25pub struct Document {
26 package: Package,
28 body: Body,
30 numbering: Option<Numbering>,
32}
33
34impl Document {
35 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
37 let package = Package::open(path)?;
38 Self::from_package(package)
39 }
40
41 pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
43 let package = Package::from_bytes(bytes)?;
44 Self::from_package(package)
45 }
46
47 fn from_package(package: Package) -> Result<Self> {
49 let doc_part = package
51 .main_document_part()
52 .ok_or_else(|| Error::MissingPart("Main document part not found".into()))?;
53
54 let xml = doc_part.data_as_str()?;
56 let body = parse_document_xml(xml)?;
57
58 let numbering = Self::load_numbering(&package);
60
61 Ok(Self {
62 package,
63 body,
64 numbering,
65 })
66 }
67
68 fn load_numbering(package: &Package) -> Option<Numbering> {
70 let doc_part = package.main_document_part()?;
72 let rels = doc_part.relationships()?;
73 let numbering_rel = rels.by_type(crate::opc::rel_types::NUMBERING)?;
74
75 let target = &numbering_rel.target;
77 let numbering_uri = if target.starts_with('/') {
78 PartUri::new(target).ok()?
79 } else {
80 PartUri::new(&format!("/word/{}", target)).ok()?
81 };
82
83 let numbering_part = package.part(&numbering_uri)?;
85 let xml = numbering_part.data_as_str().ok()?;
86
87 Numbering::from_xml(xml).ok()
89 }
90
91 pub fn new() -> Self {
93 Self {
94 package: Package::new(),
95 body: Body::default(),
96 numbering: None,
97 }
98 }
99
100 pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
102 self.update_package()?;
103 self.package.save(path)
104 }
105
106 pub fn to_bytes(&mut self) -> Result<Vec<u8>> {
108 self.update_package()?;
109 self.package.to_bytes()
110 }
111
112 fn update_package(&mut self) -> Result<()> {
114 let xml = serialize_document_xml(&self.body)?;
115 let uri = PartUri::new("/word/document.xml")?;
116
117 let part = Part::new(
119 uri.clone(),
120 crate::opc::MAIN_DOCUMENT.to_string(),
121 xml.into_bytes(),
122 );
123 self.package.add_part(part);
124
125 if self.package.main_document_part().is_none() {
127 use crate::opc::rel_types;
128 self.package
129 .add_relationship(rel_types::OFFICE_DOCUMENT, uri.as_str());
130 }
131
132 if let Some(ref numbering) = self.numbering {
134 let numbering_xml = numbering.to_xml()?;
135 let numbering_uri = PartUri::new("/word/numbering.xml")?;
136 let numbering_part = Part::new(
137 numbering_uri,
138 crate::opc::NUMBERING.to_string(),
139 numbering_xml.into_bytes(),
140 );
141 self.package.add_part(numbering_part);
142 }
143
144 Ok(())
145 }
146
147 pub fn paragraphs(&self) -> impl Iterator<Item = &Paragraph> {
149 self.body.paragraphs()
150 }
151
152 pub fn paragraph_count(&self) -> usize {
154 self.body
155 .content
156 .iter()
157 .filter(|c| matches!(c, BlockContent::Paragraph(_)))
158 .count()
159 }
160
161 pub fn paragraph(&self, index: usize) -> Option<&Paragraph> {
163 self.body.paragraphs().nth(index)
164 }
165
166 pub fn tables(&self) -> impl Iterator<Item = &Table> {
168 self.body.tables()
169 }
170
171 pub fn table_count(&self) -> usize {
173 self.body
174 .content
175 .iter()
176 .filter(|c| matches!(c, BlockContent::Table(_)))
177 .count()
178 }
179
180 pub fn table(&self, index: usize) -> Option<&Table> {
182 self.body.tables().nth(index)
183 }
184
185 pub fn text(&self) -> String {
187 self.body
188 .paragraphs()
189 .map(|p| p.text())
190 .collect::<Vec<_>>()
191 .join("\n")
192 }
193
194 pub fn package(&self) -> &Package {
196 &self.package
197 }
198
199 pub fn body_mut(&mut self) -> &mut Body {
201 &mut self.body
202 }
203
204 pub fn add_paragraph(&mut self, text: impl Into<String>) -> &mut Paragraph {
206 let para = Paragraph::new(text);
207 self.body.add_paragraph(para);
208 self.body
210 .content
211 .iter_mut()
212 .rev()
213 .find_map(|c| {
214 if let BlockContent::Paragraph(p) = c {
215 Some(p)
216 } else {
217 None
218 }
219 })
220 .expect("Just added paragraph")
221 }
222
223 pub fn add_empty_paragraph(&mut self) -> &mut Paragraph {
225 self.body.add_paragraph(Paragraph::default());
226 self.body
227 .content
228 .iter_mut()
229 .rev()
230 .find_map(|c| {
231 if let BlockContent::Paragraph(p) = c {
232 Some(p)
233 } else {
234 None
235 }
236 })
237 .expect("Just added paragraph")
238 }
239
240 pub fn numbering(&self) -> Option<&Numbering> {
242 self.numbering.as_ref()
243 }
244
245 pub fn numbering_mut(&mut self) -> Option<&mut Numbering> {
247 self.numbering.as_mut()
248 }
249
250 pub fn is_list_item(&self, para: &Paragraph) -> bool {
252 para.properties.as_ref().and_then(|p| p.num_id).is_some()
253 }
254
255 pub fn is_bullet_list_item(&self, para: &Paragraph) -> bool {
257 if let Some(num_id) = para.properties.as_ref().and_then(|p| p.num_id) {
258 if let Some(ref numbering) = self.numbering {
259 return numbering.is_bullet_list(num_id);
260 }
261 }
262 false
263 }
264
265 pub fn list_level(&self, para: &Paragraph) -> Option<u32> {
267 para.properties.as_ref().and_then(|p| {
268 if p.num_id.is_some() {
269 Some(p.num_level.unwrap_or(0))
270 } else {
271 None
272 }
273 })
274 }
275
276 pub fn list_format(&self, para: &Paragraph) -> Option<&NumberFormat> {
278 let props = para.properties.as_ref()?;
279 let num_id = props.num_id?;
280 let level = props.num_level.unwrap_or(0) as u8;
281 self.numbering.as_ref()?.get_format(num_id, level)
282 }
283
284 pub fn add_table(&mut self, table: Table) -> &mut Table {
286 self.body.add_table(table);
287 self.body
289 .content
290 .iter_mut()
291 .rev()
292 .find_map(|c| {
293 if let BlockContent::Table(t) = c {
294 Some(t)
295 } else {
296 None
297 }
298 })
299 .expect("Just added table")
300 }
301
302 pub fn add_table_with_size(&mut self, rows: usize, cols: usize) -> &mut Table {
304 self.add_table(Table::new(rows, cols))
305 }
306
307 pub fn table_mut(&mut self, index: usize) -> Option<&mut Table> {
309 self.body
310 .content
311 .iter_mut()
312 .filter_map(|c| {
313 if let BlockContent::Table(t) = c {
314 Some(t)
315 } else {
316 None
317 }
318 })
319 .nth(index)
320 }
321}
322
323impl Default for Document {
324 fn default() -> Self {
325 Self::new()
326 }
327}
328
329fn parse_document_xml(xml: &str) -> Result<Body> {
331 let mut reader = Reader::from_str(xml);
332 reader.config_mut().trim_text(true);
333
334 let mut buf = Vec::new();
335 let mut body = None;
336
337 loop {
338 match reader.read_event_into(&mut buf)? {
339 Event::Start(e) => {
340 let name = e.name();
341 let local = name.local_name();
342
343 match local.as_ref() {
344 b"body" => {
345 body = Some(Body::from_reader(&mut reader)?);
346 }
347 b"document" => {
348 }
350 _ => {
351 skip_element(&mut reader, &e)?;
353 }
354 }
355 }
356 Event::Eof => break,
357 _ => {}
358 }
359 buf.clear();
360 }
361
362 body.ok_or_else(|| Error::InvalidDocument("Missing w:body element".into()))
363}
364
365fn serialize_document_xml(body: &Body) -> Result<String> {
367 let mut buffer = Cursor::new(Vec::new());
368 let mut writer = Writer::new(&mut buffer);
369
370 writer.write_event(Event::Decl(BytesDecl::new(
372 "1.0",
373 Some("UTF-8"),
374 Some("yes"),
375 )))?;
376
377 let mut doc_start = BytesStart::new("w:document");
379 for (attr, value) in xml::document_namespaces() {
380 doc_start.push_attribute((attr, value));
381 }
382 writer.write_event(Event::Start(doc_start))?;
383
384 body.write_to(&mut writer)?;
386
387 writer.write_event(Event::End(BytesEnd::new("w:document")))?;
389
390 let xml_bytes = buffer.into_inner();
391 String::from_utf8(xml_bytes).map_err(|e| Error::InvalidDocument(e.to_string()))
392}
393
394fn skip_element<R: BufRead>(
396 reader: &mut Reader<R>,
397 start: &quick_xml::events::BytesStart,
398) -> Result<()> {
399 let target = start.name().as_ref().to_vec();
400 let mut depth = 1;
401 let mut buf = Vec::new();
402
403 loop {
404 match reader.read_event_into(&mut buf)? {
405 Event::Start(e) if e.name().as_ref() == target => depth += 1,
406 Event::End(e) if e.name().as_ref() == target => {
407 depth -= 1;
408 if depth == 0 {
409 break;
410 }
411 }
412 Event::Eof => break,
413 _ => {}
414 }
415 buf.clear();
416 }
417
418 Ok(())
419}
420
421#[cfg(test)]
422mod tests {
423 use super::*;
424
425 const SIMPLE_DOC: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
426<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
427 <w:body>
428 <w:p>
429 <w:r>
430 <w:t>Hello, World!</w:t>
431 </w:r>
432 </w:p>
433 <w:p>
434 <w:pPr>
435 <w:pStyle w:val="Heading1"/>
436 </w:pPr>
437 <w:r>
438 <w:rPr>
439 <w:b/>
440 </w:rPr>
441 <w:t>This is a heading</w:t>
442 </w:r>
443 </w:p>
444 </w:body>
445</w:document>"#;
446
447 #[test]
448 fn test_parse_simple_document() {
449 let body = parse_document_xml(SIMPLE_DOC).unwrap();
450
451 let paras: Vec<_> = body.paragraphs().collect();
453 assert_eq!(paras.len(), 2);
454
455 assert_eq!(paras[0].text(), "Hello, World!");
457
458 assert_eq!(paras[1].text(), "This is a heading");
460 assert_eq!(paras[1].style(), Some("Heading1"));
461
462 let runs: Vec<_> = paras[1].runs().collect();
464 assert_eq!(runs.len(), 1);
465 assert!(runs[0].bold());
466 }
467
468 #[test]
469 fn test_parse_with_formatting() {
470 let xml = r#"<?xml version="1.0"?>
471<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
472 <w:body>
473 <w:p>
474 <w:r>
475 <w:rPr>
476 <w:b/>
477 <w:i/>
478 <w:sz w:val="28"/>
479 <w:color w:val="FF0000"/>
480 </w:rPr>
481 <w:t>Formatted text</w:t>
482 </w:r>
483 </w:p>
484 </w:body>
485</w:document>"#;
486
487 let body = parse_document_xml(xml).unwrap();
488 let para = body.paragraphs().next().unwrap();
489 let run = para.runs().next().unwrap();
490
491 assert!(run.bold());
492 assert!(run.italic());
493 assert_eq!(run.font_size_pt(), Some(14.0)); assert_eq!(run.color(), Some("FF0000"));
495 }
496}