docspec_docx_reader/
asset_provider.rs1use std::borrow::Cow;
4use std::io::{self, Read, Seek, Write};
5use std::path::Path;
6use std::sync::Mutex;
7
8use docspec_core::{AssetProvider, Error, Result};
9use zip::result::ZipError;
10use zip::ZipArchive;
11
12use crate::content_types::{self, ContentTypes};
13
14trait ReadSeek: Read + Seek + Send {}
16impl<T: Read + Seek + Send> ReadSeek for T {}
17
18pub struct DocxAssetProvider {
23 archive: Mutex<ZipArchive<Box<dyn ReadSeek + 'static>>>,
24 content_types: ContentTypes,
25}
26
27impl DocxAssetProvider {
28 #[inline]
38 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
39 let file = std::fs::File::open(path.as_ref()).map_err(Error::from)?;
40 Self::from_reader(file)
41 }
42
43 #[inline]
53 pub fn from_reader<R: Read + Seek + Send + 'static>(reader: R) -> Result<Self> {
54 let boxed: Box<dyn ReadSeek + 'static> = Box::new(reader);
55 let mut archive = ZipArchive::new(boxed).map_err(|err| match err {
56 ZipError::InvalidArchive(_) | ZipError::UnsupportedArchive(_) => Error::Parse {
57 message: "not a valid ZIP archive".to_string(),
58 position: None,
59 },
60 ZipError::Io(source) => Error::Io { source },
61 ZipError::FileNotFound
62 | ZipError::InvalidPassword
63 | ZipError::CompressionMethodNotSupported(_)
64 | _ => Error::Parse {
65 message: format!("not a valid ZIP archive: {err}"),
66 position: None,
67 },
68 })?;
69
70 let ct_bytes = match archive.by_name("[Content_Types].xml") {
71 Ok(mut entry) => {
72 let mut bytes: Vec<u8> = Vec::new();
73 io::copy(&mut entry, &mut bytes).map_err(Error::from)?;
74 bytes
75 }
76 Err(_) => Vec::new(),
77 };
78
79 let content_types = content_types::parse(&ct_bytes)?;
80
81 Ok(Self {
82 archive: Mutex::new(archive),
83 content_types,
84 })
85 }
86}
87
88impl AssetProvider for DocxAssetProvider {
89 #[inline]
95 fn content_type(&self, asset_id: &str) -> Option<Cow<'_, str>> {
96 asset_id
97 .strip_prefix("zip://")
98 .and_then(|p| self.content_types.lookup(p))
99 .map(Cow::Borrowed)
100 }
101
102 #[inline]
113 fn stream_to(&self, asset_id: &str, writer: &mut dyn Write) -> Option<io::Result<u64>> {
114 let path = asset_id.strip_prefix("zip://")?;
115 let mut archive = self.archive.lock().ok()?;
116 let mut entry = archive.by_name(path).ok()?;
117 Some(io::copy(&mut entry, writer))
118 }
119}
120
121#[cfg(test)]
122#[cfg(not(coverage))]
123mod tests {
124 #![allow(
125 clippy::unwrap_used,
126 clippy::expect_used,
127 clippy::separated_literal_suffix,
128 clippy::unseparated_literal_suffix
129 )]
130 use std::borrow::Cow;
131 use std::io::{Cursor, Write as _};
132 use zip::write::SimpleFileOptions;
133 use zip::CompressionMethod;
134
135 use super::DocxAssetProvider;
136 use docspec_core::AssetProvider as _;
137
138 fn synth_zip(entries: &[(&str, &[u8])]) -> Vec<u8> {
139 let buf = Cursor::new(Vec::new());
140 let mut writer = zip::ZipWriter::new(buf);
141 let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
142 for (name, data) in entries {
143 writer.start_file(*name, options).unwrap();
144 writer.write_all(data).unwrap();
145 }
146 writer.finish().unwrap().into_inner()
147 }
148
149 fn content_types_png_xml() -> &'static [u8] {
150 br#"<?xml version="1.0"?>
151<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
152 <Default Extension="png" ContentType="image/png"/>
153</Types>"#
154 }
155
156 fn synth_png_docx() -> Vec<u8> {
157 synth_zip(&[
158 ("[Content_Types].xml", content_types_png_xml()),
159 ("word/media/image1.png", &[0x89, 0x50, 0x4E, 0x47]),
160 ])
161 }
162
163 #[test]
164 fn is_send_sync() {
165 fn assert_send_sync<T: Send + Sync>() {}
166 assert_send_sync::<DocxAssetProvider>();
167 }
168
169 #[test]
170 fn stream_to_exact_bytes() {
171 let zip_bytes = synth_png_docx();
172 let provider = DocxAssetProvider::from_reader(Cursor::new(zip_bytes)).expect("should open");
173 let mut buf = Vec::new();
174 let result = provider.stream_to("zip://word/media/image1.png", &mut buf);
175 assert_eq!(
176 result.expect("should return Some").expect("should be Ok"),
177 4u64
178 );
179 assert_eq!(buf, &[0x89, 0x50, 0x4E, 0x47]);
180 }
181
182 #[test]
183 fn content_type_from_default() {
184 let zip_bytes = synth_png_docx();
185 let provider = DocxAssetProvider::from_reader(Cursor::new(zip_bytes)).expect("should open");
186 let ct = provider.content_type("zip://word/media/image1.png");
187 assert_eq!(ct, Some(Cow::Borrowed("image/png")));
188 }
189
190 #[test]
191 fn non_zip_scheme_returns_none() {
192 let zip_bytes = synth_png_docx();
193 let provider = DocxAssetProvider::from_reader(Cursor::new(zip_bytes)).expect("should open");
194 assert_eq!(provider.content_type("rId99"), None);
195 let mut buf = Vec::new();
196 assert!(provider.stream_to("rId99", &mut buf).is_none());
197 }
198
199 #[test]
200 fn missing_asset_stream_returns_none() {
201 let zip_bytes = synth_png_docx();
202 let provider = DocxAssetProvider::from_reader(Cursor::new(zip_bytes)).expect("should open");
203 let mut buf = Vec::new();
204 assert!(provider
205 .stream_to("zip://word/media/noexist.png", &mut buf)
206 .is_none());
207 }
208
209 #[test]
210 fn content_type_returns_none_for_unregistered_extension() {
211 let zip_bytes = synth_png_docx();
212 let provider = DocxAssetProvider::from_reader(Cursor::new(zip_bytes)).expect("should open");
213 assert_eq!(provider.content_type("zip://word/document.xml"), None);
214 }
215
216 #[test]
217 fn from_path_opens_file() {
218 let dir = tempfile::tempdir().expect("tempdir");
219 let path = dir.path().join("test.docx");
220 let zip_bytes = synth_png_docx();
221 std::fs::write(&path, &zip_bytes).expect("write file");
222 let provider = DocxAssetProvider::from_path(&path).expect("should open");
223 let ct = provider.content_type("zip://word/media/image1.png");
224 assert_eq!(ct, Some(Cow::Borrowed("image/png")));
225 }
226
227 #[test]
228 fn missing_content_types_yields_empty_lookup() {
229 let zip_bytes = synth_zip(&[("word/media/image1.png", &[0x89, 0x50, 0x4E, 0x47])]);
230 let provider = DocxAssetProvider::from_reader(Cursor::new(zip_bytes)).expect("should open");
231 assert_eq!(provider.content_type("zip://word/media/image1.png"), None);
232 let mut buf = Vec::new();
233 let result = provider.stream_to("zip://word/media/image1.png", &mut buf);
234 assert_eq!(
235 result.expect("should return Some").expect("should be Ok"),
236 4u64
237 );
238 assert_eq!(buf, &[0x89, 0x50, 0x4E, 0x47]);
239 }
240}