1use std::fs::File;
11use std::io::{Cursor, Read, Seek};
12use std::path::Path;
13
14use extractor::anchors::apply_comment_anchors;
15use extractor::apply_text_spans;
16use extractor::changes::resolve_tracked_changes;
17use parser::body::{
18 BodyParseResult, parse_document_body, parse_endnotes_part, parse_footer_part,
19 parse_footnotes_part, parse_header_part,
20};
21use parser::comments::{apply_comment_metadata, parse_comments_part};
22use parser::relationships::Relationships;
23use zip::{MAIN_DOCUMENT_PART, require_part, unpack_docx};
24
25pub mod error;
26pub mod extractor;
27pub mod model;
28pub mod parser;
29pub mod zip;
30
31pub use error::Error;
32pub use model::{
33 Block, BlockId, BlockKind, ChangeId, Comment, CommentId, DocMetadata, Document, RawRevisionIds,
34 SpanChangeKind, SpanTrackedChange, Story, TextAnchor, TextSpan, TrackedChange,
35 TrackedChangeKind,
36};
37
38#[derive(Clone, Debug)]
39#[non_exhaustive]
40pub struct ExtractOptions {
41 pub track_changes_mode: TrackChangesMode,
43 pub include_comments: bool,
45 pub include_text_spans: bool,
47 pub include_raw_ids: bool,
49}
50
51#[derive(Clone, Debug, Default)]
52#[non_exhaustive]
53pub enum TrackChangesMode {
54 #[default]
59 Paired,
60 Raw,
62 Both,
65}
66
67pub fn extract(input: impl Read + Seek) -> Result<Document, Error> {
69 extract_with_opts(input, ExtractOptions::default())
70}
71
72pub fn extract_with_opts(input: impl Read + Seek, opts: ExtractOptions) -> Result<Document, Error> {
74 let files = unpack_docx(input)?;
75 let relationships = Relationships::from_files(&files)?;
76 let main_document_path = relationships.main_document_path().unwrap_or(MAIN_DOCUMENT_PART);
77 let document_xml = require_part(&files, main_document_path)?;
78 let document_xml =
79 std::str::from_utf8(document_xml).map_err(|_| Error::Unsupported("non-utf8 document xml"))?;
80 let mut parsed = parse_document_body(document_xml)?;
81
82 if let Some(footnotes_path) = relationships.find_footnotes_part() {
83 if let Some(footnotes_xml) = get_utf8_part(&files, footnotes_path)? {
84 merge_parse_results(&mut parsed, parse_footnotes_part(&footnotes_xml)?);
85 }
86 } else {
87 tracing::debug!("no footnotes part in document");
88 }
89
90 if let Some(endnotes_path) = relationships.find_endnotes_part() {
91 if let Some(endnotes_xml) = get_utf8_part(&files, endnotes_path)? {
92 merge_parse_results(&mut parsed, parse_endnotes_part(&endnotes_xml)?);
93 }
94 } else {
95 tracing::debug!("no endnotes part in document");
96 }
97
98 for (index, header_path) in relationships.find_header_parts().into_iter().enumerate() {
99 if let Some(header_xml) = get_utf8_part(&files, header_path)? {
100 merge_parse_results(&mut parsed, parse_header_part(&header_xml, index as u32 + 1)?);
101 }
102 }
103
104 for (index, footer_path) in relationships.find_footer_parts().into_iter().enumerate() {
105 if let Some(footer_xml) = get_utf8_part(&files, footer_path)? {
106 merge_parse_results(&mut parsed, parse_footer_part(&footer_xml, index as u32 + 1)?);
107 }
108 }
109
110 let parser_raw_changes = parsed.raw_tracked_changes.clone();
111 let (tracked_changes, raw_changes) = resolve_tracked_changes(parsed.raw_tracked_changes, &opts);
112 let mut blocks = parsed.blocks;
113 apply_text_spans(
114 &mut blocks,
115 &tracked_changes,
116 &parser_raw_changes,
117 &opts.track_changes_mode,
118 opts.include_text_spans,
119 );
120 let mut comments = if opts.include_comments {
121 if let Some(comments_path) = relationships.find_comments_part() {
122 let comments_xml = require_part(&files, comments_path)?;
123 let comments_xml = std::str::from_utf8(comments_xml)
124 .map_err(|_| Error::Unsupported("non-utf8 comments xml"))?;
125 let parsed_comments = parse_comments_part(comments_xml)?;
126 let mut comments = parsed_comments.comments;
127
128 let comments_extended_xml = relationships
129 .find_comments_extended_part()
130 .and_then(|path| get_utf8_part(&files, path).transpose())
131 .transpose()?;
132 if comments_extended_xml.is_none() {
133 tracing::debug!("no commentsExtended part in document");
134 }
135
136 apply_comment_metadata(
137 &mut comments,
138 &parsed_comments.para_id_by_comment_id,
139 comments_extended_xml.as_deref(),
140 )?;
141 comments
142 } else {
143 tracing::debug!("no comments part in document");
144 Vec::new()
145 }
146 } else {
147 Vec::new()
148 };
149 if opts.include_comments {
150 apply_comment_anchors(&mut comments, &parsed.comment_anchors, &blocks);
151 }
152
153 Ok(Document {
154 metadata: DocMetadata::default(),
155 blocks,
156 tracked_changes,
157 comments,
158 raw_changes,
159 })
160}
161
162fn get_utf8_part(
163 files: &zip::FileRegistry,
164 path: &str,
165) -> Result<Option<String>, Error> {
166 match zip::get_part(files, path) {
167 Some(bytes) => {
168 let xml =
169 std::str::from_utf8(bytes).map_err(|_| Error::Unsupported("non-utf8 xml part"))?;
170 Ok(Some(xml.to_string()))
171 }
172 None => Ok(None),
173 }
174}
175
176fn merge_parse_results(into: &mut BodyParseResult, mut other: BodyParseResult) {
177 into.blocks.append(&mut other.blocks);
178 into.raw_tracked_changes.append(&mut other.raw_tracked_changes);
179 into.comment_anchors.append(&mut other.comment_anchors);
180}
181
182pub fn extract_from_path(path: impl AsRef<Path>) -> Result<Document, Error> {
184 extract_from_path_with_opts(path, ExtractOptions::default())
185}
186
187pub fn extract_from_path_with_opts(
189 path: impl AsRef<Path>,
190 opts: ExtractOptions,
191) -> Result<Document, Error> {
192 let file = File::open(path)?;
193 extract_with_opts(file, opts)
194}
195
196pub fn extract_from_bytes(bytes: &[u8]) -> Result<Document, Error> {
198 extract_from_bytes_with_opts(bytes, ExtractOptions::default())
199}
200
201pub fn extract_from_bytes_with_opts(bytes: &[u8], opts: ExtractOptions) -> Result<Document, Error> {
203 let cursor = Cursor::new(bytes);
204 extract_with_opts(cursor, opts)
205}
206
207impl Default for ExtractOptions {
208 fn default() -> Self {
209 Self {
210 track_changes_mode: TrackChangesMode::Paired,
211 include_comments: true,
212 include_text_spans: true,
213 include_raw_ids: false,
214 }
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use chrono::{TimeZone, Utc};
221 use std::io::Write;
222 use ::zip::ZipWriter;
223 use ::zip::write::SimpleFileOptions;
224
225 use super::*;
226
227 #[test]
228 fn document_json_roundtrip() {
229 let document = Document {
230 metadata: DocMetadata {
231 title: Some("Example".to_string()),
232 author: Some("OpenAI".to_string()),
233 created: Some(Utc.with_ymd_and_hms(2026, 5, 29, 10, 0, 0).unwrap()),
234 modified: None,
235 revision: Some(3),
236 },
237 blocks: vec![Block {
238 id: "body:block:000001".to_string(),
239 story: Story::Body,
240 kind: BlockKind::Paragraph,
241 text: "Hello".to_string(),
242 style: Some("Normal".to_string()),
243 text_spans: Some(vec![TextSpan {
244 text: "Hello".to_string(),
245 tracked_changes: vec![SpanTrackedChange {
246 id: "body:tc:ins:000001".to_string(),
247 kind: SpanChangeKind::Insert,
248 }],
249 }]),
250 footnote_refs: Vec::new(),
251 endnote_refs: Vec::new(),
252 }],
253 tracked_changes: vec![TrackedChange {
254 id: "body:tc:ins:000001".to_string(),
255 kind: TrackedChangeKind::Insert {
256 text: "Hello".to_string(),
257 },
258 author: Some("OpenAI".to_string()),
259 date: Some(Utc.with_ymd_and_hms(2026, 5, 29, 10, 0, 0).unwrap()),
260 block_ids: vec!["body:block:000001".to_string()],
261 excerpt: Some("Hello".to_string()),
262 move_pair_id: None,
263 raw_revision_ids: RawRevisionIds::default(),
264 }],
265 comments: vec![Comment {
266 id: "comment:1".to_string(),
267 imported_id: Some("1".to_string()),
268 author: Some("Reviewer".to_string()),
269 date: None,
270 text: "Looks good".to_string(),
271 anchors: vec![TextAnchor {
272 block_id: "body:block:000001".to_string(),
273 char_start: 0,
274 char_end: 5,
275 }],
276 anchored_text: Some("Hello".to_string()),
277 resolved: Some(false),
278 parent_id: None,
279 replies: Vec::new(),
280 }],
281 raw_changes: Vec::new(),
282 };
283
284 let json = serde_json::to_string(&document).unwrap();
285 let reparsed: Document = serde_json::from_str(&json).unwrap();
286
287 assert_eq!(reparsed.blocks[0].text, "Hello");
288 assert_eq!(reparsed.blocks[0].text_spans.as_ref().unwrap()[0].tracked_changes.len(), 1);
289 assert_eq!(reparsed.comments[0].anchored_text.as_deref(), Some("Hello"));
290 assert!(matches!(
291 reparsed.tracked_changes[0].kind,
292 TrackedChangeKind::Insert { .. }
293 ));
294 }
295
296 #[test]
297 fn extracts_blocks_from_docx_bytes() {
298 let mut buffer = Cursor::new(Vec::new());
299 let mut writer = ZipWriter::new(&mut buffer);
300 let options = SimpleFileOptions::default();
301
302 writer
303 .start_file("_rels/.rels", options)
304 .unwrap();
305 writer
306 .write_all(
307 br#"<?xml version="1.0" encoding="UTF-8"?>
308 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
309 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
310 </Relationships>"#,
311 )
312 .unwrap();
313 writer
314 .start_file("word/document.xml", options)
315 .unwrap();
316 writer
317 .write_all(
318 br#"<?xml version="1.0" encoding="UTF-8"?>
319 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
320 <w:body>
321 <w:p>
322 <w:r><w:t>Hello </w:t></w:r>
323 <w:del><w:r><w:delText>old</w:delText></w:r></w:del>
324 <w:ins><w:r><w:t>new</w:t></w:r></w:ins>
325 </w:p>
326 </w:body>
327 </w:document>"#,
328 )
329 .unwrap();
330 writer.finish().unwrap();
331
332 let document = extract_from_bytes(&buffer.into_inner()).unwrap();
333
334 assert_eq!(document.blocks.len(), 1);
335 assert_eq!(document.blocks[0].text, "Hello new");
336 assert_eq!(
337 document.blocks[0].text_spans.as_ref().unwrap().iter().map(|span| span.text.as_str()).collect::<String>(),
338 document.blocks[0].text
339 );
340 assert_eq!(document.tracked_changes.len(), 1);
341 assert!(matches!(
342 document.tracked_changes[0].kind,
343 TrackedChangeKind::Replacement { .. }
344 ));
345 assert!(matches!(
346 document.blocks[0].text_spans.as_ref().unwrap()[1].tracked_changes[0].kind,
347 SpanChangeKind::Replacement
348 ));
349 assert!(document.comments.is_empty());
350 }
351
352 #[test]
353 fn extracts_comments_with_anchors_from_docx_bytes() {
354 let mut buffer = Cursor::new(Vec::new());
355 let mut writer = ZipWriter::new(&mut buffer);
356 let options = SimpleFileOptions::default();
357
358 writer.start_file("_rels/.rels", options).unwrap();
359 writer
360 .write_all(
361 br#"<?xml version="1.0" encoding="UTF-8"?>
362 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
363 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
364 </Relationships>"#,
365 )
366 .unwrap();
367 writer.start_file("word/_rels/document.xml.rels", options).unwrap();
368 writer
369 .write_all(
370 br#"<?xml version="1.0" encoding="UTF-8"?>
371 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
372 <Relationship Id="rIdComments" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>
373 </Relationships>"#,
374 )
375 .unwrap();
376 writer.start_file("word/document.xml", options).unwrap();
377 writer
378 .write_all(
379 br#"<?xml version="1.0" encoding="UTF-8"?>
380 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
381 <w:body>
382 <w:p>
383 <w:r><w:t>Hello </w:t></w:r>
384 <w:commentRangeStart w:id="1"/>
385 <w:r><w:t>world</w:t></w:r>
386 <w:commentRangeEnd w:id="1"/>
387 <w:r><w:commentReference w:id="1"/></w:r>
388 </w:p>
389 </w:body>
390 </w:document>"#,
391 )
392 .unwrap();
393 writer.start_file("word/comments.xml", options).unwrap();
394 writer
395 .write_all(
396 br#"<?xml version="1.0" encoding="UTF-8"?>
397 <w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
398 <w:comment w:id="1" w:author="Reviewer" w:date="2026-05-29T10:00:00Z">
399 <w:p><w:r><w:t>Looks good</w:t></w:r></w:p>
400 </w:comment>
401 </w:comments>"#,
402 )
403 .unwrap();
404 writer.finish().unwrap();
405
406 let document = extract_from_bytes(&buffer.into_inner()).unwrap();
407
408 assert_eq!(document.comments.len(), 1);
409 assert_eq!(document.comments[0].text, "Looks good");
410 assert_eq!(document.comments[0].anchors.len(), 1);
411 assert_eq!(document.comments[0].anchors[0].char_start, 6);
412 assert_eq!(document.comments[0].anchors[0].char_end, 11);
413 assert_eq!(document.comments[0].anchored_text.as_deref(), Some("world"));
414 }
415
416 #[test]
417 fn extracts_footnote_and_endnote_blocks_from_docx_bytes() {
418 let mut buffer = Cursor::new(Vec::new());
419 let mut writer = ZipWriter::new(&mut buffer);
420 let options = SimpleFileOptions::default();
421
422 writer.start_file("_rels/.rels", options).unwrap();
423 writer
424 .write_all(
425 br#"<?xml version="1.0" encoding="UTF-8"?>
426 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
427 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
428 </Relationships>"#,
429 )
430 .unwrap();
431 writer.start_file("word/_rels/document.xml.rels", options).unwrap();
432 writer
433 .write_all(
434 br#"<?xml version="1.0" encoding="UTF-8"?>
435 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
436 <Relationship Id="rIdFootnotes" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes" Target="footnotes.xml"/>
437 <Relationship Id="rIdEndnotes" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes" Target="endnotes.xml"/>
438 </Relationships>"#,
439 )
440 .unwrap();
441 writer.start_file("word/document.xml", options).unwrap();
442 writer
443 .write_all(
444 br#"<?xml version="1.0" encoding="UTF-8"?>
445 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
446 <w:body>
447 <w:p>
448 <w:r><w:t>Body text</w:t></w:r>
449 <w:r><w:footnoteReference w:id="2"/></w:r>
450 <w:r><w:endnoteReference w:id="5"/></w:r>
451 </w:p>
452 </w:body>
453 </w:document>"#,
454 )
455 .unwrap();
456 writer.start_file("word/footnotes.xml", options).unwrap();
457 writer
458 .write_all(
459 br#"<?xml version="1.0" encoding="UTF-8"?>
460 <w:footnotes xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
461 <w:footnote w:type="separator" w:id="-1"><w:p><w:r><w:t>skip</w:t></w:r></w:p></w:footnote>
462 <w:footnote w:id="2">
463 <w:p><w:r><w:t>Footnote text</w:t></w:r></w:p>
464 </w:footnote>
465 </w:footnotes>"#,
466 )
467 .unwrap();
468 writer.start_file("word/endnotes.xml", options).unwrap();
469 writer
470 .write_all(
471 br#"<?xml version="1.0" encoding="UTF-8"?>
472 <w:endnotes xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
473 <w:endnote w:id="5">
474 <w:p><w:r><w:t>Endnote text</w:t></w:r></w:p>
475 </w:endnote>
476 </w:endnotes>"#,
477 )
478 .unwrap();
479 writer.finish().unwrap();
480
481 let document = extract_from_bytes(&buffer.into_inner()).unwrap();
482
483 assert_eq!(document.blocks.len(), 3);
484 assert_eq!(document.blocks[0].story, Story::Body);
485 assert_eq!(document.blocks[0].footnote_refs, vec![2]);
486 assert_eq!(document.blocks[0].endnote_refs, vec![5]);
487 assert_eq!(document.blocks[1].story, Story::Footnote { index: 2 });
488 assert_eq!(document.blocks[1].id, "footnote:2:block:000001");
489 assert_eq!(document.blocks[1].text, "Footnote text");
490 assert_eq!(document.blocks[2].story, Story::Endnote { index: 5 });
491 assert_eq!(document.blocks[2].id, "endnote:5:block:000001");
492 assert_eq!(document.blocks[2].text, "Endnote text");
493 }
494
495 #[test]
496 fn extracts_header_and_footer_blocks_from_docx_bytes() {
497 let mut buffer = Cursor::new(Vec::new());
498 let mut writer = ZipWriter::new(&mut buffer);
499 let options = SimpleFileOptions::default();
500
501 writer.start_file("_rels/.rels", options).unwrap();
502 writer
503 .write_all(
504 br#"<?xml version="1.0" encoding="UTF-8"?>
505 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
506 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
507 </Relationships>"#,
508 )
509 .unwrap();
510 writer.start_file("word/_rels/document.xml.rels", options).unwrap();
511 writer
512 .write_all(
513 br#"<?xml version="1.0" encoding="UTF-8"?>
514 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
515 <Relationship Id="rIdHeader" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header1.xml"/>
516 <Relationship Id="rIdFooter" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" Target="footer1.xml"/>
517 </Relationships>"#,
518 )
519 .unwrap();
520 writer.start_file("word/document.xml", options).unwrap();
521 writer
522 .write_all(
523 br#"<?xml version="1.0" encoding="UTF-8"?>
524 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
525 <w:body>
526 <w:p><w:r><w:t>Body text</w:t></w:r></w:p>
527 </w:body>
528 </w:document>"#,
529 )
530 .unwrap();
531 writer.start_file("word/header1.xml", options).unwrap();
532 writer
533 .write_all(
534 br#"<?xml version="1.0" encoding="UTF-8"?>
535 <w:hdr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
536 <w:p><w:r><w:t>Header text</w:t></w:r></w:p>
537 </w:hdr>"#,
538 )
539 .unwrap();
540 writer.start_file("word/footer1.xml", options).unwrap();
541 writer
542 .write_all(
543 br#"<?xml version="1.0" encoding="UTF-8"?>
544 <w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
545 <w:p><w:r><w:t>Footer text</w:t></w:r></w:p>
546 </w:ftr>"#,
547 )
548 .unwrap();
549 writer.finish().unwrap();
550
551 let document = extract_from_bytes(&buffer.into_inner()).unwrap();
552
553 assert_eq!(document.blocks.len(), 3);
554 assert_eq!(document.blocks[0].story, Story::Body);
555 assert_eq!(document.blocks[1].story, Story::Header { index: 1 });
556 assert_eq!(document.blocks[1].id, "header:1:block:000001");
557 assert_eq!(document.blocks[1].text, "Header text");
558 assert_eq!(document.blocks[2].story, Story::Footer { index: 1 });
559 assert_eq!(document.blocks[2].id, "footer:1:block:000001");
560 assert_eq!(document.blocks[2].text, "Footer text");
561 }
562
563 #[test]
564 fn raw_mode_keeps_insert_span_kind() {
565 let mut buffer = Cursor::new(Vec::new());
566 let mut writer = ZipWriter::new(&mut buffer);
567 let options = SimpleFileOptions::default();
568
569 writer.start_file("_rels/.rels", options).unwrap();
570 writer
571 .write_all(
572 br#"<?xml version="1.0" encoding="UTF-8"?>
573 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
574 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
575 </Relationships>"#,
576 )
577 .unwrap();
578 writer.start_file("word/document.xml", options).unwrap();
579 writer
580 .write_all(
581 br#"<?xml version="1.0" encoding="UTF-8"?>
582 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
583 <w:body>
584 <w:p>
585 <w:r><w:t>Hello </w:t></w:r>
586 <w:ins w:id="4"><w:r><w:t>new</w:t></w:r></w:ins>
587 </w:p>
588 </w:body>
589 </w:document>"#,
590 )
591 .unwrap();
592 writer.finish().unwrap();
593
594 let document = extract_from_bytes_with_opts(
595 &buffer.into_inner(),
596 ExtractOptions {
597 track_changes_mode: TrackChangesMode::Raw,
598 ..ExtractOptions::default()
599 },
600 )
601 .unwrap();
602
603 assert!(matches!(
604 document.blocks[0].text_spans.as_ref().unwrap()[1].tracked_changes[0].kind,
605 SpanChangeKind::Insert
606 ));
607 }
608
609 #[test]
610 fn extracts_format_changes_from_docx_bytes() {
611 let mut buffer = Cursor::new(Vec::new());
612 let mut writer = ZipWriter::new(&mut buffer);
613 let options = SimpleFileOptions::default();
614
615 writer.start_file("_rels/.rels", options).unwrap();
616 writer
617 .write_all(
618 br#"<?xml version="1.0" encoding="UTF-8"?>
619 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
620 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
621 </Relationships>"#,
622 )
623 .unwrap();
624 writer.start_file("word/document.xml", options).unwrap();
625 writer
626 .write_all(
627 br#"<?xml version="1.0" encoding="UTF-8"?>
628 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
629 <w:body>
630 <w:p>
631 <w:r>
632 <w:rPr>
633 <w:rPrChange w:id="9" w:author="Alice" w:date="2026-05-29T10:00:00Z">
634 <w:rPr><w:b/></w:rPr>
635 </w:rPrChange>
636 </w:rPr>
637 <w:t>Styled</w:t>
638 </w:r>
639 </w:p>
640 </w:body>
641 </w:document>"#,
642 )
643 .unwrap();
644 writer.finish().unwrap();
645
646 let document = extract_from_bytes(&buffer.into_inner()).unwrap();
647
648 assert_eq!(document.tracked_changes.len(), 1);
649 assert!(matches!(
650 &document.tracked_changes[0].kind,
651 TrackedChangeKind::Format { previous_props_summary }
652 if previous_props_summary == "b"
653 ));
654 assert_eq!(document.tracked_changes[0].excerpt.as_deref(), Some("b"));
655 assert_eq!(
656 document.blocks[0].text_spans.as_ref().unwrap()[0].tracked_changes[0].kind,
657 SpanChangeKind::Format
658 );
659 }
660
661 #[test]
662 fn no_text_spans_option_omits_spans() {
663 let mut buffer = Cursor::new(Vec::new());
664 let mut writer = ZipWriter::new(&mut buffer);
665 let options = SimpleFileOptions::default();
666
667 writer.start_file("_rels/.rels", options).unwrap();
668 writer
669 .write_all(
670 br#"<?xml version="1.0" encoding="UTF-8"?>
671 <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
672 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
673 </Relationships>"#,
674 )
675 .unwrap();
676 writer.start_file("word/document.xml", options).unwrap();
677 writer
678 .write_all(
679 br#"<?xml version="1.0" encoding="UTF-8"?>
680 <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
681 <w:body>
682 <w:p><w:r><w:t>Hello</w:t></w:r></w:p>
683 </w:body>
684 </w:document>"#,
685 )
686 .unwrap();
687 writer.finish().unwrap();
688
689 let document = extract_from_bytes_with_opts(
690 &buffer.into_inner(),
691 ExtractOptions {
692 include_text_spans: false,
693 ..ExtractOptions::default()
694 },
695 )
696 .unwrap();
697
698 assert!(document.blocks[0].text_spans.is_none());
699 }
700}