docx_review_core/
lib.rs

1//! Review-oriented DOCX extraction for Rust.
2//!
3//! `docx-review-core` reads `.docx` files directly from OOXML and returns a
4//! normalized [`Document`] model with structural blocks, tracked changes,
5//! comments, and optional raw revision metadata.
6//!
7//! The crate is intended for automation, review workflows, and downstream
8//! tooling that needs more than plain text extraction.
9
10use std::fs::File;
11use std::io::{Cursor, Read, Seek};
12use std::path::Path;
13
14use extractor::anchors::apply_comment_anchors;
15use extractor::apply_text_spans;
16use extractor::changes::resolve_tracked_changes;
17use parser::body::{
18    BodyParseResult, parse_document_body, parse_endnotes_part, parse_footer_part,
19    parse_footnotes_part, parse_header_part,
20};
21use parser::comments::{apply_comment_metadata, parse_comments_part};
22use parser::relationships::Relationships;
23use zip::{MAIN_DOCUMENT_PART, require_part, unpack_docx};
24
25pub mod error;
26pub mod extractor;
27pub mod model;
28pub mod parser;
29pub mod zip;
30
31pub use error::Error;
32pub use model::{
33    Block, BlockId, BlockKind, ChangeId, Comment, CommentId, DocMetadata, Document, RawRevisionIds,
34    SpanChangeKind, SpanTrackedChange, Story, TextAnchor, TextSpan, TrackedChange,
35    TrackedChangeKind,
36};
37
38#[derive(Clone, Debug)]
39#[non_exhaustive]
40pub struct ExtractOptions {
41    /// How tracked changes should be represented in the output.
42    pub track_changes_mode: TrackChangesMode,
43    /// Whether comments and their anchor metadata should be extracted.
44    pub include_comments: bool,
45    /// Whether blocks should include [`TextSpan`] markers.
46    pub include_text_spans: bool,
47    /// Whether raw OOXML revision ids should be preserved.
48    pub include_raw_ids: bool,
49}
50
51#[derive(Clone, Debug, Default)]
52#[non_exhaustive]
53pub enum TrackChangesMode {
54    /// Prefer review-oriented paired changes when possible.
55    ///
56    /// In this mode adjacent delete/insert pairs may become a single
57    /// replacement change.
58    #[default]
59    Paired,
60    /// Emit raw tracked changes as they appear in the OOXML stream.
61    Raw,
62    /// Emit paired tracked changes and also retain raw tracked changes in
63    /// [`Document::raw_changes`].
64    Both,
65}
66
67/// Extract a [`Document`] from any seekable DOCX reader using default options.
68pub fn extract(input: impl Read + Seek) -> Result<Document, Error> {
69    extract_with_opts(input, ExtractOptions::default())
70}
71
72/// Extract a [`Document`] from any seekable DOCX reader with explicit options.
73pub fn extract_with_opts(input: impl Read + Seek, opts: ExtractOptions) -> Result<Document, Error> {
74    let files = unpack_docx(input)?;
75    let relationships = Relationships::from_files(&files)?;
76    let main_document_path = relationships.main_document_path().unwrap_or(MAIN_DOCUMENT_PART);
77    let document_xml = require_part(&files, main_document_path)?;
78    let document_xml =
79        std::str::from_utf8(document_xml).map_err(|_| Error::Unsupported("non-utf8 document xml"))?;
80    let mut parsed = parse_document_body(document_xml)?;
81
82    if let Some(footnotes_path) = relationships.find_footnotes_part() {
83        if let Some(footnotes_xml) = get_utf8_part(&files, footnotes_path)? {
84            merge_parse_results(&mut parsed, parse_footnotes_part(&footnotes_xml)?);
85        }
86    } else {
87        tracing::debug!("no footnotes part in document");
88    }
89
90    if let Some(endnotes_path) = relationships.find_endnotes_part() {
91        if let Some(endnotes_xml) = get_utf8_part(&files, endnotes_path)? {
92            merge_parse_results(&mut parsed, parse_endnotes_part(&endnotes_xml)?);
93        }
94    } else {
95        tracing::debug!("no endnotes part in document");
96    }
97
98    for (index, header_path) in relationships.find_header_parts().into_iter().enumerate() {
99        if let Some(header_xml) = get_utf8_part(&files, header_path)? {
100            merge_parse_results(&mut parsed, parse_header_part(&header_xml, index as u32 + 1)?);
101        }
102    }
103
104    for (index, footer_path) in relationships.find_footer_parts().into_iter().enumerate() {
105        if let Some(footer_xml) = get_utf8_part(&files, footer_path)? {
106            merge_parse_results(&mut parsed, parse_footer_part(&footer_xml, index as u32 + 1)?);
107        }
108    }
109
110    let parser_raw_changes = parsed.raw_tracked_changes.clone();
111    let (tracked_changes, raw_changes) = resolve_tracked_changes(parsed.raw_tracked_changes, &opts);
112    let mut blocks = parsed.blocks;
113    apply_text_spans(
114        &mut blocks,
115        &tracked_changes,
116        &parser_raw_changes,
117        &opts.track_changes_mode,
118        opts.include_text_spans,
119    );
120    let mut comments = if opts.include_comments {
121        if let Some(comments_path) = relationships.find_comments_part() {
122            let comments_xml = require_part(&files, comments_path)?;
123            let comments_xml = std::str::from_utf8(comments_xml)
124                .map_err(|_| Error::Unsupported("non-utf8 comments xml"))?;
125            let parsed_comments = parse_comments_part(comments_xml)?;
126            let mut comments = parsed_comments.comments;
127
128            let comments_extended_xml = relationships
129                .find_comments_extended_part()
130                .and_then(|path| get_utf8_part(&files, path).transpose())
131                .transpose()?;
132            if comments_extended_xml.is_none() {
133                tracing::debug!("no commentsExtended part in document");
134            }
135
136            apply_comment_metadata(
137                &mut comments,
138                &parsed_comments.para_id_by_comment_id,
139                comments_extended_xml.as_deref(),
140            )?;
141            comments
142        } else {
143            tracing::debug!("no comments part in document");
144            Vec::new()
145        }
146    } else {
147        Vec::new()
148    };
149    if opts.include_comments {
150        apply_comment_anchors(&mut comments, &parsed.comment_anchors, &blocks);
151    }
152
153    Ok(Document {
154        metadata: DocMetadata::default(),
155        blocks,
156        tracked_changes,
157        comments,
158        raw_changes,
159    })
160}
161
162fn get_utf8_part(
163    files: &zip::FileRegistry,
164    path: &str,
165) -> Result<Option<String>, Error> {
166    match zip::get_part(files, path) {
167        Some(bytes) => {
168            let xml =
169                std::str::from_utf8(bytes).map_err(|_| Error::Unsupported("non-utf8 xml part"))?;
170            Ok(Some(xml.to_string()))
171        }
172        None => Ok(None),
173    }
174}
175
176fn merge_parse_results(into: &mut BodyParseResult, mut other: BodyParseResult) {
177    into.blocks.append(&mut other.blocks);
178    into.raw_tracked_changes.append(&mut other.raw_tracked_changes);
179    into.comment_anchors.append(&mut other.comment_anchors);
180}
181
182/// Extract a [`Document`] from a DOCX file path using default options.
183pub fn extract_from_path(path: impl AsRef<Path>) -> Result<Document, Error> {
184    extract_from_path_with_opts(path, ExtractOptions::default())
185}
186
187/// Extract a [`Document`] from a DOCX file path with explicit options.
188pub fn extract_from_path_with_opts(
189    path: impl AsRef<Path>,
190    opts: ExtractOptions,
191) -> Result<Document, Error> {
192    let file = File::open(path)?;
193    extract_with_opts(file, opts)
194}
195
196/// Extract a [`Document`] from in-memory DOCX bytes using default options.
197pub fn extract_from_bytes(bytes: &[u8]) -> Result<Document, Error> {
198    extract_from_bytes_with_opts(bytes, ExtractOptions::default())
199}
200
201/// Extract a [`Document`] from in-memory DOCX bytes with explicit options.
202pub fn extract_from_bytes_with_opts(bytes: &[u8], opts: ExtractOptions) -> Result<Document, Error> {
203    let cursor = Cursor::new(bytes);
204    extract_with_opts(cursor, opts)
205}
206
207impl Default for ExtractOptions {
208    fn default() -> Self {
209        Self {
210            track_changes_mode: TrackChangesMode::Paired,
211            include_comments: true,
212            include_text_spans: true,
213            include_raw_ids: false,
214        }
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use chrono::{TimeZone, Utc};
221    use std::io::Write;
222    use ::zip::ZipWriter;
223    use ::zip::write::SimpleFileOptions;
224
225    use super::*;
226
227    #[test]
228    fn document_json_roundtrip() {
229        let document = Document {
230            metadata: DocMetadata {
231                title: Some("Example".to_string()),
232                author: Some("OpenAI".to_string()),
233                created: Some(Utc.with_ymd_and_hms(2026, 5, 29, 10, 0, 0).unwrap()),
234                modified: None,
235                revision: Some(3),
236            },
237            blocks: vec![Block {
238                id: "body:block:000001".to_string(),
239                story: Story::Body,
240                kind: BlockKind::Paragraph,
241                text: "Hello".to_string(),
242                style: Some("Normal".to_string()),
243                text_spans: Some(vec![TextSpan {
244                    text: "Hello".to_string(),
245                    tracked_changes: vec![SpanTrackedChange {
246                        id: "body:tc:ins:000001".to_string(),
247                        kind: SpanChangeKind::Insert,
248                    }],
249                }]),
250                footnote_refs: Vec::new(),
251                endnote_refs: Vec::new(),
252            }],
253            tracked_changes: vec![TrackedChange {
254                id: "body:tc:ins:000001".to_string(),
255                kind: TrackedChangeKind::Insert {
256                    text: "Hello".to_string(),
257                },
258                author: Some("OpenAI".to_string()),
259                date: Some(Utc.with_ymd_and_hms(2026, 5, 29, 10, 0, 0).unwrap()),
260                block_ids: vec!["body:block:000001".to_string()],
261                excerpt: Some("Hello".to_string()),
262                move_pair_id: None,
263                raw_revision_ids: RawRevisionIds::default(),
264            }],
265            comments: vec![Comment {
266                id: "comment:1".to_string(),
267                imported_id: Some("1".to_string()),
268                author: Some("Reviewer".to_string()),
269                date: None,
270                text: "Looks good".to_string(),
271                anchors: vec![TextAnchor {
272                    block_id: "body:block:000001".to_string(),
273                    char_start: 0,
274                    char_end: 5,
275                }],
276                anchored_text: Some("Hello".to_string()),
277                resolved: Some(false),
278                parent_id: None,
279                replies: Vec::new(),
280            }],
281            raw_changes: Vec::new(),
282        };
283
284        let json = serde_json::to_string(&document).unwrap();
285        let reparsed: Document = serde_json::from_str(&json).unwrap();
286
287        assert_eq!(reparsed.blocks[0].text, "Hello");
288        assert_eq!(reparsed.blocks[0].text_spans.as_ref().unwrap()[0].tracked_changes.len(), 1);
289        assert_eq!(reparsed.comments[0].anchored_text.as_deref(), Some("Hello"));
290        assert!(matches!(
291            reparsed.tracked_changes[0].kind,
292            TrackedChangeKind::Insert { .. }
293        ));
294    }
295
296    #[test]
297    fn extracts_blocks_from_docx_bytes() {
298        let mut buffer = Cursor::new(Vec::new());
299        let mut writer = ZipWriter::new(&mut buffer);
300        let options = SimpleFileOptions::default();
301
302        writer
303            .start_file("_rels/.rels", options)
304            .unwrap();
305        writer
306            .write_all(
307                br#"<?xml version="1.0" encoding="UTF-8"?>
308                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
309                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
310                </Relationships>"#,
311            )
312            .unwrap();
313        writer
314            .start_file("word/document.xml", options)
315            .unwrap();
316        writer
317            .write_all(
318                br#"<?xml version="1.0" encoding="UTF-8"?>
319                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
320                  <w:body>
321                    <w:p>
322                      <w:r><w:t>Hello </w:t></w:r>
323                      <w:del><w:r><w:delText>old</w:delText></w:r></w:del>
324                      <w:ins><w:r><w:t>new</w:t></w:r></w:ins>
325                    </w:p>
326                  </w:body>
327                </w:document>"#,
328            )
329            .unwrap();
330        writer.finish().unwrap();
331
332        let document = extract_from_bytes(&buffer.into_inner()).unwrap();
333
334        assert_eq!(document.blocks.len(), 1);
335        assert_eq!(document.blocks[0].text, "Hello new");
336        assert_eq!(
337            document.blocks[0].text_spans.as_ref().unwrap().iter().map(|span| span.text.as_str()).collect::<String>(),
338            document.blocks[0].text
339        );
340        assert_eq!(document.tracked_changes.len(), 1);
341        assert!(matches!(
342            document.tracked_changes[0].kind,
343            TrackedChangeKind::Replacement { .. }
344        ));
345        assert!(matches!(
346            document.blocks[0].text_spans.as_ref().unwrap()[1].tracked_changes[0].kind,
347            SpanChangeKind::Replacement
348        ));
349        assert!(document.comments.is_empty());
350    }
351
352    #[test]
353    fn extracts_comments_with_anchors_from_docx_bytes() {
354        let mut buffer = Cursor::new(Vec::new());
355        let mut writer = ZipWriter::new(&mut buffer);
356        let options = SimpleFileOptions::default();
357
358        writer.start_file("_rels/.rels", options).unwrap();
359        writer
360            .write_all(
361                br#"<?xml version="1.0" encoding="UTF-8"?>
362                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
363                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
364                </Relationships>"#,
365            )
366            .unwrap();
367        writer.start_file("word/_rels/document.xml.rels", options).unwrap();
368        writer
369            .write_all(
370                br#"<?xml version="1.0" encoding="UTF-8"?>
371                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
372                  <Relationship Id="rIdComments" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>
373                </Relationships>"#,
374            )
375            .unwrap();
376        writer.start_file("word/document.xml", options).unwrap();
377        writer
378            .write_all(
379                br#"<?xml version="1.0" encoding="UTF-8"?>
380                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
381                  <w:body>
382                    <w:p>
383                      <w:r><w:t>Hello </w:t></w:r>
384                      <w:commentRangeStart w:id="1"/>
385                      <w:r><w:t>world</w:t></w:r>
386                      <w:commentRangeEnd w:id="1"/>
387                      <w:r><w:commentReference w:id="1"/></w:r>
388                    </w:p>
389                  </w:body>
390                </w:document>"#,
391            )
392            .unwrap();
393        writer.start_file("word/comments.xml", options).unwrap();
394        writer
395            .write_all(
396                br#"<?xml version="1.0" encoding="UTF-8"?>
397                <w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
398                  <w:comment w:id="1" w:author="Reviewer" w:date="2026-05-29T10:00:00Z">
399                    <w:p><w:r><w:t>Looks good</w:t></w:r></w:p>
400                  </w:comment>
401                </w:comments>"#,
402            )
403            .unwrap();
404        writer.finish().unwrap();
405
406        let document = extract_from_bytes(&buffer.into_inner()).unwrap();
407
408        assert_eq!(document.comments.len(), 1);
409        assert_eq!(document.comments[0].text, "Looks good");
410        assert_eq!(document.comments[0].anchors.len(), 1);
411        assert_eq!(document.comments[0].anchors[0].char_start, 6);
412        assert_eq!(document.comments[0].anchors[0].char_end, 11);
413        assert_eq!(document.comments[0].anchored_text.as_deref(), Some("world"));
414    }
415
416    #[test]
417    fn extracts_footnote_and_endnote_blocks_from_docx_bytes() {
418        let mut buffer = Cursor::new(Vec::new());
419        let mut writer = ZipWriter::new(&mut buffer);
420        let options = SimpleFileOptions::default();
421
422        writer.start_file("_rels/.rels", options).unwrap();
423        writer
424            .write_all(
425                br#"<?xml version="1.0" encoding="UTF-8"?>
426                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
427                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
428                </Relationships>"#,
429            )
430            .unwrap();
431        writer.start_file("word/_rels/document.xml.rels", options).unwrap();
432        writer
433            .write_all(
434                br#"<?xml version="1.0" encoding="UTF-8"?>
435                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
436                  <Relationship Id="rIdFootnotes" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes" Target="footnotes.xml"/>
437                  <Relationship Id="rIdEndnotes" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes" Target="endnotes.xml"/>
438                </Relationships>"#,
439            )
440            .unwrap();
441        writer.start_file("word/document.xml", options).unwrap();
442        writer
443            .write_all(
444                br#"<?xml version="1.0" encoding="UTF-8"?>
445                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
446                  <w:body>
447                    <w:p>
448                      <w:r><w:t>Body text</w:t></w:r>
449                      <w:r><w:footnoteReference w:id="2"/></w:r>
450                      <w:r><w:endnoteReference w:id="5"/></w:r>
451                    </w:p>
452                  </w:body>
453                </w:document>"#,
454            )
455            .unwrap();
456        writer.start_file("word/footnotes.xml", options).unwrap();
457        writer
458            .write_all(
459                br#"<?xml version="1.0" encoding="UTF-8"?>
460                <w:footnotes xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
461                  <w:footnote w:type="separator" w:id="-1"><w:p><w:r><w:t>skip</w:t></w:r></w:p></w:footnote>
462                  <w:footnote w:id="2">
463                    <w:p><w:r><w:t>Footnote text</w:t></w:r></w:p>
464                  </w:footnote>
465                </w:footnotes>"#,
466            )
467            .unwrap();
468        writer.start_file("word/endnotes.xml", options).unwrap();
469        writer
470            .write_all(
471                br#"<?xml version="1.0" encoding="UTF-8"?>
472                <w:endnotes xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
473                  <w:endnote w:id="5">
474                    <w:p><w:r><w:t>Endnote text</w:t></w:r></w:p>
475                  </w:endnote>
476                </w:endnotes>"#,
477            )
478            .unwrap();
479        writer.finish().unwrap();
480
481        let document = extract_from_bytes(&buffer.into_inner()).unwrap();
482
483        assert_eq!(document.blocks.len(), 3);
484        assert_eq!(document.blocks[0].story, Story::Body);
485        assert_eq!(document.blocks[0].footnote_refs, vec![2]);
486        assert_eq!(document.blocks[0].endnote_refs, vec![5]);
487        assert_eq!(document.blocks[1].story, Story::Footnote { index: 2 });
488        assert_eq!(document.blocks[1].id, "footnote:2:block:000001");
489        assert_eq!(document.blocks[1].text, "Footnote text");
490        assert_eq!(document.blocks[2].story, Story::Endnote { index: 5 });
491        assert_eq!(document.blocks[2].id, "endnote:5:block:000001");
492        assert_eq!(document.blocks[2].text, "Endnote text");
493    }
494
495    #[test]
496    fn extracts_header_and_footer_blocks_from_docx_bytes() {
497        let mut buffer = Cursor::new(Vec::new());
498        let mut writer = ZipWriter::new(&mut buffer);
499        let options = SimpleFileOptions::default();
500
501        writer.start_file("_rels/.rels", options).unwrap();
502        writer
503            .write_all(
504                br#"<?xml version="1.0" encoding="UTF-8"?>
505                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
506                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
507                </Relationships>"#,
508            )
509            .unwrap();
510        writer.start_file("word/_rels/document.xml.rels", options).unwrap();
511        writer
512            .write_all(
513                br#"<?xml version="1.0" encoding="UTF-8"?>
514                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
515                  <Relationship Id="rIdHeader" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" Target="header1.xml"/>
516                  <Relationship Id="rIdFooter" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" Target="footer1.xml"/>
517                </Relationships>"#,
518            )
519            .unwrap();
520        writer.start_file("word/document.xml", options).unwrap();
521        writer
522            .write_all(
523                br#"<?xml version="1.0" encoding="UTF-8"?>
524                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
525                  <w:body>
526                    <w:p><w:r><w:t>Body text</w:t></w:r></w:p>
527                  </w:body>
528                </w:document>"#,
529            )
530            .unwrap();
531        writer.start_file("word/header1.xml", options).unwrap();
532        writer
533            .write_all(
534                br#"<?xml version="1.0" encoding="UTF-8"?>
535                <w:hdr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
536                  <w:p><w:r><w:t>Header text</w:t></w:r></w:p>
537                </w:hdr>"#,
538            )
539            .unwrap();
540        writer.start_file("word/footer1.xml", options).unwrap();
541        writer
542            .write_all(
543                br#"<?xml version="1.0" encoding="UTF-8"?>
544                <w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
545                  <w:p><w:r><w:t>Footer text</w:t></w:r></w:p>
546                </w:ftr>"#,
547            )
548            .unwrap();
549        writer.finish().unwrap();
550
551        let document = extract_from_bytes(&buffer.into_inner()).unwrap();
552
553        assert_eq!(document.blocks.len(), 3);
554        assert_eq!(document.blocks[0].story, Story::Body);
555        assert_eq!(document.blocks[1].story, Story::Header { index: 1 });
556        assert_eq!(document.blocks[1].id, "header:1:block:000001");
557        assert_eq!(document.blocks[1].text, "Header text");
558        assert_eq!(document.blocks[2].story, Story::Footer { index: 1 });
559        assert_eq!(document.blocks[2].id, "footer:1:block:000001");
560        assert_eq!(document.blocks[2].text, "Footer text");
561    }
562
563    #[test]
564    fn raw_mode_keeps_insert_span_kind() {
565        let mut buffer = Cursor::new(Vec::new());
566        let mut writer = ZipWriter::new(&mut buffer);
567        let options = SimpleFileOptions::default();
568
569        writer.start_file("_rels/.rels", options).unwrap();
570        writer
571            .write_all(
572                br#"<?xml version="1.0" encoding="UTF-8"?>
573                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
574                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
575                </Relationships>"#,
576            )
577            .unwrap();
578        writer.start_file("word/document.xml", options).unwrap();
579        writer
580            .write_all(
581                br#"<?xml version="1.0" encoding="UTF-8"?>
582                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
583                  <w:body>
584                    <w:p>
585                      <w:r><w:t>Hello </w:t></w:r>
586                      <w:ins w:id="4"><w:r><w:t>new</w:t></w:r></w:ins>
587                    </w:p>
588                  </w:body>
589                </w:document>"#,
590            )
591            .unwrap();
592        writer.finish().unwrap();
593
594        let document = extract_from_bytes_with_opts(
595            &buffer.into_inner(),
596            ExtractOptions {
597                track_changes_mode: TrackChangesMode::Raw,
598                ..ExtractOptions::default()
599            },
600        )
601        .unwrap();
602
603        assert!(matches!(
604            document.blocks[0].text_spans.as_ref().unwrap()[1].tracked_changes[0].kind,
605            SpanChangeKind::Insert
606        ));
607    }
608
609    #[test]
610    fn extracts_format_changes_from_docx_bytes() {
611        let mut buffer = Cursor::new(Vec::new());
612        let mut writer = ZipWriter::new(&mut buffer);
613        let options = SimpleFileOptions::default();
614
615        writer.start_file("_rels/.rels", options).unwrap();
616        writer
617            .write_all(
618                br#"<?xml version="1.0" encoding="UTF-8"?>
619                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
620                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
621                </Relationships>"#,
622            )
623            .unwrap();
624        writer.start_file("word/document.xml", options).unwrap();
625        writer
626            .write_all(
627                br#"<?xml version="1.0" encoding="UTF-8"?>
628                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
629                  <w:body>
630                    <w:p>
631                      <w:r>
632                        <w:rPr>
633                          <w:rPrChange w:id="9" w:author="Alice" w:date="2026-05-29T10:00:00Z">
634                            <w:rPr><w:b/></w:rPr>
635                          </w:rPrChange>
636                        </w:rPr>
637                        <w:t>Styled</w:t>
638                      </w:r>
639                    </w:p>
640                  </w:body>
641                </w:document>"#,
642            )
643            .unwrap();
644        writer.finish().unwrap();
645
646        let document = extract_from_bytes(&buffer.into_inner()).unwrap();
647
648        assert_eq!(document.tracked_changes.len(), 1);
649        assert!(matches!(
650            &document.tracked_changes[0].kind,
651            TrackedChangeKind::Format { previous_props_summary }
652                if previous_props_summary == "b"
653        ));
654        assert_eq!(document.tracked_changes[0].excerpt.as_deref(), Some("b"));
655        assert_eq!(
656            document.blocks[0].text_spans.as_ref().unwrap()[0].tracked_changes[0].kind,
657            SpanChangeKind::Format
658        );
659    }
660
661    #[test]
662    fn no_text_spans_option_omits_spans() {
663        let mut buffer = Cursor::new(Vec::new());
664        let mut writer = ZipWriter::new(&mut buffer);
665        let options = SimpleFileOptions::default();
666
667        writer.start_file("_rels/.rels", options).unwrap();
668        writer
669            .write_all(
670                br#"<?xml version="1.0" encoding="UTF-8"?>
671                <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
672                  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
673                </Relationships>"#,
674            )
675            .unwrap();
676        writer.start_file("word/document.xml", options).unwrap();
677        writer
678            .write_all(
679                br#"<?xml version="1.0" encoding="UTF-8"?>
680                <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
681                  <w:body>
682                    <w:p><w:r><w:t>Hello</w:t></w:r></w:p>
683                  </w:body>
684                </w:document>"#,
685            )
686            .unwrap();
687        writer.finish().unwrap();
688
689        let document = extract_from_bytes_with_opts(
690            &buffer.into_inner(),
691            ExtractOptions {
692                include_text_spans: false,
693                ..ExtractOptions::default()
694            },
695        )
696        .unwrap();
697
698        assert!(document.blocks[0].text_spans.is_none());
699    }
700}
docx_review_core/lib.rs

docx_review_core/
lib.rs