Skip to main content

docspec_docx_reader/
lib.rs

1#![forbid(unsafe_code)]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3//! DOCX to `DocSpec` event stream reader.
4//!
5//! This crate provides a [`DocxReader`] that implements [`EventSource`] to convert
6//! DOCX documents into the `DocSpec` event stream format. It uses `quick-xml` for
7//! streaming XML parsing and `zip` for archive extraction.
8//!
9//! # Scope
10//!
11//! **In scope**: Paragraphs (`<w:p>`), direct text (`<w:t>` inside `<w:r>`),
12//! line breaks (`<w:br>` — including `w:type="page"` and `w:type="column"`, all
13//! emitted as `LineBreak`), tabs (`<w:tab>`, emitted as a `Text` event whose
14//! content is the single character `"\t"`), and tables (`<w:tbl>`, `<w:tr>`,
15//! `<w:tc>` — emitted as structural events only; cell merging, header rows, and
16//! table styles are not represented).
17//! Emits exactly: `StartDocument`, `StartParagraph`, `Text`, `LineBreak`,
18//! `EndParagraph`, `StartTable`, `StartTableRow`, `StartTableCell`,
19//! `EndTableCell`, `EndTableRow`, `EndTable`, `EndDocument`.
20//!
21//! **Out of scope (silently dropped)**:
22//! - Run styling (`<w:rPr>`, bold, italic, etc.)
23//! - Headings (any `<w:pStyle>` value — every paragraph is `StartParagraph`)
24//! - Cell merging (`<w:gridSpan>`, `<w:vMerge>`) — every cell emits with
25//!   `colspan: None` and `rowspan: None`
26//! - Header rows (`<w:tblHeader>`) — every cell emits as `StartTableCell`,
27//!   never `StartTableHeader`
28//! - Table, row, and cell properties (`<w:tblPr>`, `<w:trPr>`, `<w:tcPr>`,
29//!   `<w:tblGrid>`)
30//! - Lists
31//! - Hyperlinks (`<w:hyperlink>`)
32//! - Drawings and images (`<w:drawing>`, `<w:pict>`)
33//! - Structured document tags (`<w:sdt>`)
34//! - Comments, footnotes, headers, footers
35//! - Document metadata
36//! - Tracked changes (`<w:ins>`, `<w:del>`, `<w:moveFrom>`, `<w:moveTo>`)
37//!
38//! # Streaming Guarantee
39//!
40//! `DocxReader` streams `document.xml` event by event using constant memory
41//! regardless of document size. Only `_rels/.rels` (a few hundred bytes) is
42//! fully read into memory to discover the document target path.
43//!
44//! # Quick Start
45//!
46//! ```no_run
47//! use docspec_docx_reader::{DocxReader, EventSource};
48//!
49//! let mut reader = DocxReader::from_path("document.docx")?;
50//! while let Some(event) = reader.next_event()? {
51//!     println!("{event:?}");
52//! }
53//! # Ok::<(), docspec_core::Error>(())
54//! ```
55
56extern crate alloc;
57
58mod properties;
59mod rels;
60
61use alloc::collections::VecDeque;
62use core::fmt;
63use std::io::{BufReader, Read, Seek};
64use std::path::Path;
65
66pub use docspec_core::EventSource;
67use docspec_core::{Error, Event, Result, TextAlignment, TextStyle};
68use quick_xml::events::{BytesCData, BytesRef, BytesStart, BytesText};
69
70/// Document processing phase.
71#[derive(Clone, Copy, PartialEq, Eq)]
72enum Phase {
73    /// `EndDocument` has been emitted.
74    Finished,
75    /// `StartDocument` not yet emitted.
76    NotStarted,
77    /// Processing events between `StartDocument` and `EndDocument`.
78    Running,
79}
80
81/// A streaming DOCX reader that implements [`EventSource`].
82///
83/// `DocxReader` parses a DOCX archive and emits `DocSpec` events one at a time.
84/// `<w:p>` paragraphs, `<w:t>` text, `<w:br>` line breaks, `<w:tab>` tabs, and
85/// table elements (`<w:tbl>`, `<w:tr>`, `<w:tc>`) are recognized; all other
86/// elements are silently ignored.
87///
88/// # Streaming
89///
90/// The reader streams `document.xml` event by event using constant memory.
91/// Only `_rels/.rels` (a few hundred bytes) is buffered to discover the
92/// document target path.
93///
94/// # Errors
95///
96/// Returns [`Error::Io`] for I/O failures and [`Error::Parse`] for malformed
97/// archives or XML.
98#[expect(
99    clippy::struct_excessive_bools,
100    reason = "DocxReader tracks six independent boolean parser states; grouping them would obscure the streaming state machine"
101)]
102pub struct DocxReader {
103    /// Reusable buffer for quick-xml event reading.
104    buf: Vec<u8>,
105    /// Depth counter for ignored subtrees (tracked changes, hyperlinks,
106    /// drawings, table/row/cell property containers, etc.).
107    /// Incremented on Start of an ignored container, decremented on End.
108    in_ignored_subtree: u32,
109    /// Whether the reader is currently inside a `<w:p>` element.
110    in_paragraph: bool,
111    /// Whether the reader is currently inside a `<w:t>` element.
112    in_text: bool,
113    /// Whether currently inside a `<w:pPr>` element that is still legal (first child of paragraph).
114    in_ppr: bool,
115    /// Paragraph alignment captured from `<w:jc>` while inside `<w:pPr>`.
116    pending_paragraph_alignment: Option<TextAlignment>,
117    /// True once `StartParagraph` has been queued for the current paragraph.
118    paragraph_started_emitted: bool,
119    /// Whether currently inside a `<w:rPr>` element that is still legal (first child of run).
120    in_rpr: bool,
121    /// Run style accumulated while inside `<w:rPr>`.
122    pending_run_style: TextStyle,
123    /// Text collected for the current `<w:t>` element.
124    pending_text: String,
125    /// Run style frozen at `</w:rPr>`, applied to subsequent text emissions in the same run.
126    current_run_style: TextStyle,
127    /// Document processing phase.
128    phase: Phase,
129    /// Queue of `DocSpec` events to emit.
130    queue: VecDeque<Event>,
131    /// True once the first content event of the current run has been queued.
132    run_content_emitted: bool,
133    /// The quick-xml reader streaming from the document entry.
134    xml: quick_xml::Reader<BufReader<Box<dyn Read + Send>>>,
135}
136
137impl fmt::Debug for DocxReader {
138    #[inline]
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        f.debug_struct("DocxReader")
141            .field("buf", &self.buf)
142            .field("in_ignored_subtree", &self.in_ignored_subtree)
143            .field("in_paragraph", &self.in_paragraph)
144            .field("in_text", &self.in_text)
145            .field("in_ppr", &self.in_ppr)
146            .field(
147                "pending_paragraph_alignment",
148                &self.pending_paragraph_alignment,
149            )
150            .field("paragraph_started_emitted", &self.paragraph_started_emitted)
151            .field("in_rpr", &self.in_rpr)
152            .field("pending_run_style", &self.pending_run_style)
153            .field("pending_text", &self.pending_text)
154            .field("current_run_style", &self.current_run_style)
155            .field("phase", &"<phase>")
156            .field("queue", &self.queue)
157            .field("run_content_emitted", &self.run_content_emitted)
158            .field("xml", &"<quick_xml::Reader>")
159            .finish()
160    }
161}
162
163impl DocxReader {
164    /// Creates a `DocxReader` from a file path.
165    ///
166    /// # Errors
167    ///
168    /// Returns [`Error::Io`] if the file cannot be opened. See [`from_reader`](Self::from_reader)
169    /// for additional error conditions.
170    #[inline]
171    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
172        let file = std::fs::File::open(path.as_ref()).map_err(Error::from)?;
173        Self::from_reader(file)
174    }
175
176    /// Creates a `DocxReader` from any `Read + Seek` source.
177    ///
178    /// The reader must be positioned at the start of a valid DOCX (ZIP) archive.
179    ///
180    /// # Errors
181    ///
182    /// Returns [`Error::Parse`] if the input is not a valid ZIP archive, if
183    /// `_rels/.rels` is missing or malformed, or if the document target entry
184    /// cannot be opened. Returns [`Error::Io`] for I/O failures.
185    #[inline]
186    pub fn from_reader<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self> {
187        let mut archive = zip::ZipArchive::new(&mut reader).map_err(|err| match err {
188            zip::result::ZipError::InvalidArchive(_)
189            | zip::result::ZipError::UnsupportedArchive(_) => Error::Parse {
190                message: "not a valid ZIP archive".to_string(),
191                position: None,
192            },
193            zip::result::ZipError::Io(source) => Error::Io { source },
194            zip::result::ZipError::FileNotFound
195            | zip::result::ZipError::InvalidPassword
196            | zip::result::ZipError::CompressionMethodNotSupported(_)
197            | _ => parse_error(format!("not a valid ZIP archive: {err}")),
198        })?;
199
200        let document_path = rels::find_document_path(&mut archive)?;
201
202        let (data_start, compressed_size, method) = {
203            let entry = archive
204                .by_name(&document_path)
205                .map_err(|_err| Error::Parse {
206                    message: format!("document target not found: {document_path}"),
207                    position: None,
208                })?;
209            let data_start = entry
210                .data_start()
211                .ok_or_else(|| parse_error("document.xml has no data offset".to_string()))?;
212            (data_start, entry.compressed_size(), entry.compression())
213        };
214        drop(archive);
215
216        reader
217            .seek(std::io::SeekFrom::Start(data_start))
218            .map_err(Error::from)?;
219
220        let limited = reader.take(compressed_size);
221
222        let stream: Box<dyn Read + Send> = if method == zip::CompressionMethod::Stored {
223            Box::new(limited)
224        } else if method == zip::CompressionMethod::Deflated {
225            Box::new(flate2::read::DeflateDecoder::new(limited))
226        } else {
227            return Err(Error::Parse {
228                message: format!("unsupported compression: {method:?}"),
229                position: None,
230            });
231        };
232
233        let xml = quick_xml::Reader::from_reader(BufReader::new(stream));
234
235        Ok(Self {
236            buf: Vec::with_capacity(4096),
237            in_ignored_subtree: 0,
238            in_paragraph: false,
239            in_text: false,
240            in_ppr: false,
241            pending_paragraph_alignment: None,
242            paragraph_started_emitted: false,
243            in_rpr: false,
244            pending_run_style: TextStyle::default(),
245            pending_text: String::new(),
246            current_run_style: TextStyle::default(),
247            phase: Phase::NotStarted,
248            queue: VecDeque::new(),
249            run_content_emitted: false,
250            xml,
251        })
252    }
253}
254
255impl DocxReader {
256    fn can_collect_text(&self) -> bool {
257        self.in_ignored_subtree == 0 && self.in_paragraph && self.in_text
258    }
259
260    fn emit_line_break(&mut self) {
261        self.ensure_paragraph_started();
262        self.flush_pending_text();
263        self.run_content_emitted = true;
264        self.queue.push_back(Event::LineBreak);
265    }
266
267    fn emit_tab(&mut self) {
268        self.ensure_paragraph_started();
269        self.flush_pending_text();
270        self.run_content_emitted = true;
271        self.queue.push_back(Event::Text {
272            content: "\t".to_string(),
273            style: TextStyle::default(),
274        });
275    }
276
277    fn end_paragraph(&mut self) {
278        self.ensure_paragraph_started();
279        self.queue.push_back(Event::EndParagraph);
280        self.in_paragraph = false;
281        self.in_text = false;
282        self.pending_text.clear();
283        self.in_ppr = false;
284        self.pending_paragraph_alignment = None;
285        self.paragraph_started_emitted = false;
286    }
287
288    fn flush_pending_text(&mut self) {
289        if !self.pending_text.is_empty() {
290            self.queue.push_back(Event::Text {
291                content: core::mem::take(&mut self.pending_text),
292                style: self.current_run_style.clone(),
293            });
294        }
295    }
296
297    fn handle_cdata(&mut self, cdata: BytesCData<'_>) -> Result<()> {
298        if self.can_collect_text() {
299            let bytes = cdata.into_inner();
300            let content = core::str::from_utf8(&bytes)
301                .map_err(|err| parse_error(format!("malformed document.xml: {err}")))?;
302            self.pending_text.push_str(content);
303        }
304        Ok(())
305    }
306
307    fn handle_empty(&mut self, tag: &BytesStart<'_>) {
308        let local_name = tag.local_name();
309        let local = local_name.as_ref();
310        match local {
311            value if self.in_ignored_subtree > 0 || is_ignored_container(value) => {}
312            b"pPr" if self.in_paragraph && !self.paragraph_started_emitted => {
313                self.ensure_paragraph_started();
314            }
315            b"jc" if self.in_ppr => {
316                let val = read_val_attribute(tag);
317                self.pending_paragraph_alignment =
318                    val.as_deref().and_then(properties::parse_alignment);
319            }
320            b"rPr" if self.in_ppr => {}
321            b"rPr" if self.in_paragraph && !self.in_ppr && !self.in_rpr => {}
322            b"b" if self.in_rpr => {
323                self.pending_run_style.bold = parse_on_off_attribute(tag);
324            }
325            b"i" if self.in_rpr => {
326                self.pending_run_style.italic = parse_on_off_attribute(tag);
327            }
328            b"strike" | b"dstrike" if self.in_rpr => {
329                self.pending_run_style.strikethrough = parse_on_off_attribute(tag);
330            }
331            b"u" if self.in_rpr => {
332                let val = read_val_attribute(tag);
333                self.pending_run_style.underline = properties::parse_underline_on(val.as_deref());
334            }
335            b"vertAlign" if self.in_rpr => {
336                let val = read_val_attribute(tag);
337                match properties::parse_vert_align(val.as_deref()) {
338                    properties::VertAlign::Subscript => {
339                        self.pending_run_style.subscript = true;
340                        self.pending_run_style.superscript = false;
341                    }
342                    properties::VertAlign::Superscript => {
343                        self.pending_run_style.superscript = true;
344                        self.pending_run_style.subscript = false;
345                    }
346                    properties::VertAlign::None => {
347                        self.pending_run_style.subscript = false;
348                        self.pending_run_style.superscript = false;
349                    }
350                }
351            }
352            b"p" if !self.in_paragraph => {
353                self.queue.push_back(Event::StartParagraph {
354                    alignment: None,
355                    id: None,
356                });
357                self.queue.push_back(Event::EndParagraph);
358            }
359            b"br" if self.in_paragraph => self.emit_line_break(),
360            b"tab" if self.in_paragraph => self.emit_tab(),
361            _ => {}
362        }
363    }
364
365    fn handle_end(&mut self, local: &[u8]) {
366        if self.in_ignored_subtree > 0 {
367            self.in_ignored_subtree = self.in_ignored_subtree.saturating_sub(1);
368            return;
369        }
370
371        match local {
372            b"p" if self.in_paragraph => self.end_paragraph(),
373            b"pPr" if self.in_ppr => {
374                self.ensure_paragraph_started();
375                self.in_ppr = false;
376            }
377            b"rPr" if self.in_rpr => {
378                self.current_run_style = self.pending_run_style.clone();
379                self.in_rpr = false;
380            }
381            b"r" => {
382                self.current_run_style = TextStyle::default();
383                self.pending_run_style = TextStyle::default();
384                self.run_content_emitted = false;
385                self.in_rpr = false;
386            }
387            b"t" if self.in_text => {
388                self.flush_pending_text();
389                self.in_text = false;
390            }
391            b"tbl" => self.queue.push_back(Event::EndTable),
392            b"tr" => self.queue.push_back(Event::EndTableRow),
393            b"tc" => self.queue.push_back(Event::EndTableCell),
394            _ => {}
395        }
396    }
397
398    fn handle_eof(&mut self) {
399        if self.in_text {
400            self.flush_pending_text();
401        }
402        if self.in_paragraph {
403            self.end_paragraph();
404        }
405        self.queue.push_back(Event::EndDocument);
406        self.phase = Phase::Finished;
407    }
408
409    fn handle_general_ref(&mut self, reference: &BytesRef<'_>) -> Result<()> {
410        if self.can_collect_text() {
411            let decoded = reference
412                .decode()
413                .map_err(|err| parse_error(format!("malformed document.xml: {err}")))?;
414            let escaped = format!("&{decoded};");
415            let unescaped = quick_xml::escape::unescape(&escaped)
416                .map_err(|err| parse_error(format!("malformed document.xml: {err}")))?;
417            self.pending_text.push_str(&unescaped);
418        }
419        Ok(())
420    }
421
422    fn handle_start(&mut self, tag: &BytesStart<'_>) {
423        let local_name = tag.local_name();
424        let local = local_name.as_ref();
425        if self.in_ignored_subtree > 0 {
426            self.in_ignored_subtree = self.in_ignored_subtree.saturating_add(1);
427            return;
428        }
429
430        match local {
431            value if is_ignored_container(value) => self.in_ignored_subtree = 1,
432            b"pPr" if self.in_paragraph => {
433                if self.paragraph_started_emitted {
434                    // Out-of-order pPr: StartParagraph already emitted; silently consume
435                    self.in_ignored_subtree = 1;
436                } else {
437                    self.in_ppr = true;
438                    self.pending_paragraph_alignment = None;
439                }
440            }
441            b"jc" if self.in_ppr => {
442                let val = read_val_attribute(tag);
443                self.pending_paragraph_alignment =
444                    val.as_deref().and_then(properties::parse_alignment);
445            }
446            b"rPr" if self.in_ppr => {
447                self.in_ignored_subtree = 1;
448            }
449            b"rPr" if self.in_paragraph && !self.in_ppr && !self.in_rpr => {
450                if self.run_content_emitted {
451                    // Out-of-order rPr: content already emitted in this run; silently consume
452                    self.in_ignored_subtree = 1;
453                } else {
454                    self.in_rpr = true;
455                    self.pending_run_style = TextStyle::default();
456                }
457            }
458            b"b" if self.in_rpr => {
459                self.pending_run_style.bold = parse_on_off_attribute(tag);
460            }
461            b"i" if self.in_rpr => {
462                self.pending_run_style.italic = parse_on_off_attribute(tag);
463            }
464            b"strike" | b"dstrike" if self.in_rpr => {
465                self.pending_run_style.strikethrough = parse_on_off_attribute(tag);
466            }
467            b"u" if self.in_rpr => {
468                let val = read_val_attribute(tag);
469                self.pending_run_style.underline = properties::parse_underline_on(val.as_deref());
470            }
471            b"vertAlign" if self.in_rpr => {
472                let val = read_val_attribute(tag);
473                match properties::parse_vert_align(val.as_deref()) {
474                    properties::VertAlign::Subscript => {
475                        self.pending_run_style.subscript = true;
476                        self.pending_run_style.superscript = false;
477                    }
478                    properties::VertAlign::Superscript => {
479                        self.pending_run_style.superscript = true;
480                        self.pending_run_style.subscript = false;
481                    }
482                    properties::VertAlign::None => {
483                        self.pending_run_style.subscript = false;
484                        self.pending_run_style.superscript = false;
485                    }
486                }
487            }
488            b"p" if !self.in_paragraph => self.start_paragraph(),
489            b"r" if self.in_paragraph => {
490                self.ensure_paragraph_started();
491            }
492            b"t" if self.in_paragraph => {
493                self.ensure_paragraph_started();
494                self.in_text = true;
495                self.pending_text.clear();
496                self.run_content_emitted = true;
497            }
498            b"br" if self.in_paragraph => self.emit_line_break(),
499            b"tab" if self.in_paragraph => self.emit_tab(),
500            b"tbl" => self.queue.push_back(Event::StartTable { id: None }),
501            b"tr" => self.queue.push_back(Event::StartTableRow { id: None }),
502            b"tc" => self.queue.push_back(Event::StartTableCell {
503                colspan: None,
504                id: None,
505                rowspan: None,
506            }),
507            _ => {}
508        }
509    }
510
511    fn handle_text(&mut self, text: &BytesText<'_>) -> Result<()> {
512        if self.can_collect_text() {
513            let decoded = text
514                .decode()
515                .map_err(|err| parse_error(format!("malformed document.xml: {err}")))?;
516            let unescaped = quick_xml::escape::unescape(&decoded)
517                .map_err(|err| parse_error(format!("malformed document.xml: {err}")))?;
518            self.pending_text.push_str(&unescaped);
519        }
520        Ok(())
521    }
522
523    fn read_until_event(&mut self) -> Result<()> {
524        let event = self
525            .xml
526            .read_event_into(&mut self.buf)
527            .map_err(|err| match err {
528                quick_xml::Error::Io(source) => Error::Io {
529                    source: std::io::Error::new(source.kind(), source.to_string()),
530                },
531                other => Error::Parse {
532                    message: format!("malformed document.xml: {other}"),
533                    position: None,
534                },
535            })?
536            .into_owned();
537
538        match event {
539            quick_xml::events::Event::Start(tag) => self.handle_start(&tag),
540            quick_xml::events::Event::End(tag) => self.handle_end(tag.local_name().as_ref()),
541            quick_xml::events::Event::Empty(tag) => self.handle_empty(&tag),
542            quick_xml::events::Event::Text(text) => {
543                self.handle_text(&text)?;
544            }
545            quick_xml::events::Event::GeneralRef(reference) => {
546                self.handle_general_ref(&reference)?;
547            }
548            quick_xml::events::Event::CData(cdata) => self.handle_cdata(cdata)?,
549            quick_xml::events::Event::Eof => self.handle_eof(),
550            quick_xml::events::Event::Comment(_)
551            | quick_xml::events::Event::Decl(_)
552            | quick_xml::events::Event::PI(_)
553            | quick_xml::events::Event::DocType(_) => {}
554        }
555
556        self.buf.clear();
557        Ok(())
558    }
559
560    fn start_paragraph(&mut self) {
561        self.in_paragraph = true;
562        self.in_text = false;
563        self.pending_text.clear();
564        self.paragraph_started_emitted = false;
565        self.pending_paragraph_alignment = None;
566    }
567
568    /// Queues `StartParagraph` once paragraph properties have been parsed.
569    fn ensure_paragraph_started(&mut self) {
570        if self.in_paragraph && !self.paragraph_started_emitted {
571            self.queue.push_back(Event::StartParagraph {
572                alignment: self.pending_paragraph_alignment.clone(),
573                id: None,
574            });
575            self.paragraph_started_emitted = true;
576        }
577    }
578}
579
580impl EventSource for DocxReader {
581    #[inline]
582    fn next_event(&mut self) -> Result<Option<Event>> {
583        loop {
584            if let Some(event) = self.queue.pop_front() {
585                return Ok(Some(event));
586            }
587
588            match self.phase {
589                Phase::NotStarted => {
590                    self.phase = Phase::Running;
591                    self.queue.push_back(Event::StartDocument {
592                        id: None,
593                        language: None,
594                        metadata: None,
595                    });
596                }
597                Phase::Finished => return Ok(None),
598                Phase::Running => self.read_until_event()?,
599            }
600        }
601    }
602}
603
604fn is_ignored_container(local: &[u8]) -> bool {
605    matches!(
606        local,
607        b"sdt"
608            | b"hyperlink"
609            | b"drawing"
610            | b"pict"
611            | b"object"
612            | b"ins"
613            | b"del"
614            | b"moveFrom"
615            | b"moveTo"
616            | b"tblPr"
617            | b"trPr"
618            | b"tcPr"
619            | b"tblGrid"
620    )
621}
622
623fn read_val_attribute(tag: &BytesStart<'_>) -> Option<String> {
624    let a = tag.try_get_attribute(b"w:val").ok().flatten()?;
625    core::str::from_utf8(a.value.as_ref())
626        .ok()
627        .map(str::to_owned)
628}
629
630fn parse_on_off_attribute(tag: &BytesStart<'_>) -> bool {
631    let val = read_val_attribute(tag);
632    properties::parse_on_off(val.as_deref())
633}
634
635fn parse_error(message: String) -> Error {
636    Error::Parse {
637        message,
638        position: None,
639    }
640}
641
642#[cfg(test)]
643#[cfg(not(coverage))]
644mod tests {
645    use std::io::{Cursor, Write as _};
646
647    use zip::{write::SimpleFileOptions, CompressionMethod, ZipWriter};
648
649    use super::*;
650
651    const SIMPLE_RELS: &str = r#"<?xml version="1.0"?><Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/></Relationships>"#;
652
653    #[test]
654    fn docx_reader_is_send_static() {
655        fn assert_send_static<T: Send + 'static>() {}
656        assert_send_static::<DocxReader>();
657    }
658
659    fn synth_docx_for_unit_test(
660        rels_xml: &str,
661        document_xml: &str,
662    ) -> core::result::Result<Vec<u8>, Box<dyn core::error::Error>> {
663        let buf = Cursor::new(Vec::new());
664        let mut writer = ZipWriter::new(buf);
665        let rels_options =
666            SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
667        writer.start_file("_rels/.rels", rels_options)?;
668        writer.write_all(rels_xml.as_bytes())?;
669        let document_options =
670            SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
671        writer.start_file("word/document.xml", document_options)?;
672        writer.write_all(document_xml.as_bytes())?;
673        Ok(writer.finish()?.into_inner())
674    }
675
676    fn make_reader(
677        document_xml: &str,
678    ) -> core::result::Result<DocxReader, Box<dyn core::error::Error>> {
679        let bytes = synth_docx_for_unit_test(SIMPLE_RELS, document_xml)?;
680        Ok(DocxReader::from_reader(Cursor::new(bytes))?)
681    }
682
683    #[test]
684    fn queue_length_never_exceeds_three() -> core::result::Result<(), Box<dyn core::error::Error>> {
685        let doc = {
686            let mut content = String::from(
687                r#"<?xml version="1.0"?><w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:body>"#,
688            );
689            for _ in 0..1000 {
690                content.push_str("<w:p><w:r><w:t>hello</w:t></w:r></w:p>");
691            }
692            content.push_str("</w:body></w:document>");
693            content
694        };
695        let mut reader = make_reader(&doc)?;
696        loop {
697            if reader.queue.len() > 3 {
698                return Err(Box::new(Error::Other {
699                    message: format!("queue grew to {}", reader.queue.len()),
700                }));
701            }
702            if reader.next_event()?.is_none() {
703                break;
704            }
705        }
706        Ok(())
707    }
708
709    #[test]
710    fn buf_is_cleared_per_iteration() -> core::result::Result<(), Box<dyn core::error::Error>> {
711        let doc = r#"<?xml version="1.0"?><w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:body><w:p><w:r><w:t>hello</w:t></w:r></w:p></w:body></w:document>"#;
712        let mut reader = make_reader(doc)?;
713        while reader.next_event()?.is_some() {
714            if !reader.buf.is_empty() {
715                return Err(Box::new(Error::Other {
716                    message: "buf not cleared after event".to_string(),
717                }));
718            }
719        }
720        Ok(())
721    }
722}