1mod error;
19pub mod models;
20mod parsers;
21pub mod vfs;
22
23use std::fs::File;
24use std::io::{Cursor, Read, Seek, Write};
25use std::path::Path;
26
27pub use error::{OxdocError, Result};
28pub use models::{
29 AuditSignal, DocumentAudit, DocumentInfo, DocumentType, Extraction, OutputWarning,
30 StructuredText, TextBlock, XlsxCsvOptions, XlsxSheet, XlsxSheetVisibility, XlsxValueMode,
31};
32#[doc(hidden)]
33pub use parsers::docx::fuzz_extract_text as fuzz_docx_text;
34#[doc(hidden)]
35pub use parsers::fuzz_parse_relationships as fuzz_relationships;
36#[doc(hidden)]
37pub use parsers::metadata::fuzz_parse_metadata as fuzz_metadata;
38#[doc(hidden)]
39pub use parsers::pptx::fuzz_extract_text as fuzz_pptx_text;
40#[doc(hidden)]
41pub use parsers::xlsx::{fuzz_parse_shared_strings, fuzz_parse_sheet};
42use parsers::{docx, metadata, pptx, xlsx};
43use vfs::OoxmlPackage;
44
45pub fn extract_docx_text(path: impl AsRef<Path>) -> Result<Extraction<String>> {
46 let file = File::open(path)?;
47 extract_docx_text_from_reader(file)
48}
49
50pub fn extract_docx_text_from_reader<R: Read + Seek>(reader: R) -> Result<Extraction<String>> {
51 let mut package = OoxmlPackage::new(reader)?;
52 docx::extract_text(&mut package)
53}
54
55pub fn extract_docx_structured_text(path: impl AsRef<Path>) -> Result<Extraction<StructuredText>> {
56 let file = File::open(path)?;
57 extract_docx_structured_text_from_reader(file)
58}
59
60pub fn extract_docx_structured_text_from_reader<R: Read + Seek>(
61 reader: R,
62) -> Result<Extraction<StructuredText>> {
63 let mut package = OoxmlPackage::new(reader)?;
64 docx::extract_structured_text(&mut package)
65}
66
67pub fn extract_pptx_text(path: impl AsRef<Path>) -> Result<Extraction<String>> {
68 let file = File::open(path)?;
69 extract_pptx_text_from_reader(file)
70}
71
72pub fn extract_pptx_text_from_reader<R: Read + Seek>(reader: R) -> Result<Extraction<String>> {
73 let mut package = OoxmlPackage::new(reader)?;
74 pptx::extract_text(&mut package)
75}
76
77pub fn extract_pptx_structured_text(path: impl AsRef<Path>) -> Result<Extraction<StructuredText>> {
78 let file = File::open(path)?;
79 extract_pptx_structured_text_from_reader(file)
80}
81
82pub fn extract_pptx_structured_text_from_reader<R: Read + Seek>(
83 reader: R,
84) -> Result<Extraction<StructuredText>> {
85 let mut package = OoxmlPackage::new(reader)?;
86 pptx::extract_structured_text(&mut package)
87}
88
89pub fn extract_xlsx_csv<W: Write>(
90 path: impl AsRef<Path>,
91 options: XlsxCsvOptions<'_>,
92 writer: W,
93) -> Result<Extraction<()>> {
94 let file = File::open(path)?;
95 extract_xlsx_csv_from_reader(file, options, writer)
96}
97
98pub fn extract_xlsx_csv_with_value_mode<W: Write>(
99 path: impl AsRef<Path>,
100 options: XlsxCsvOptions<'_>,
101 value_mode: XlsxValueMode,
102 writer: W,
103) -> Result<Extraction<()>> {
104 let file = File::open(path)?;
105 extract_xlsx_csv_from_reader_with_value_mode(file, options, value_mode, writer)
106}
107
108pub fn extract_xlsx_csv_from_reader<R: Read + Seek, W: Write>(
109 reader: R,
110 options: XlsxCsvOptions<'_>,
111 writer: W,
112) -> Result<Extraction<()>> {
113 extract_xlsx_csv_from_reader_with_value_mode(reader, options, XlsxValueMode::Raw, writer)
114}
115
116pub fn extract_xlsx_csv_from_reader_with_value_mode<R: Read + Seek, W: Write>(
117 reader: R,
118 options: XlsxCsvOptions<'_>,
119 value_mode: XlsxValueMode,
120 writer: W,
121) -> Result<Extraction<()>> {
122 let mut package = OoxmlPackage::new(reader)?;
123 xlsx::write_csv(&mut package, options, value_mode, writer)
124}
125
126pub fn list_xlsx_sheets(path: impl AsRef<Path>) -> Result<Extraction<Vec<XlsxSheet>>> {
127 let file = File::open(path)?;
128 list_xlsx_sheets_from_reader(file)
129}
130
131pub fn list_xlsx_sheets_from_reader<R: Read + Seek>(
132 reader: R,
133) -> Result<Extraction<Vec<XlsxSheet>>> {
134 list_xlsx_sheets_from_reader_with_hidden(reader, false)
135}
136
137pub fn list_xlsx_sheets_with_hidden(
138 path: impl AsRef<Path>,
139 include_hidden: bool,
140) -> Result<Extraction<Vec<XlsxSheet>>> {
141 let file = File::open(path)?;
142 list_xlsx_sheets_from_reader_with_hidden(file, include_hidden)
143}
144
145pub fn list_xlsx_sheets_from_reader_with_hidden<R: Read + Seek>(
146 reader: R,
147 include_hidden: bool,
148) -> Result<Extraction<Vec<XlsxSheet>>> {
149 let mut package = OoxmlPackage::new(reader)?;
150 xlsx::list_sheets(&mut package, include_hidden)
151}
152
153pub fn detect_document_type(path: impl AsRef<Path>) -> Result<DocumentType> {
154 let file = File::open(path)?;
155 detect_document_type_from_reader(file)
156}
157
158pub fn detect_document_type_from_reader<R: Read>(mut reader: R) -> Result<DocumentType> {
159 let mut bytes = Vec::new();
160 reader.read_to_end(&mut bytes)?;
161 let mut package = OoxmlPackage::new(Cursor::new(bytes))?;
162 parsers::detect_document_type(&mut package)
163}
164
165pub fn read_info(path: impl AsRef<Path>) -> Result<Extraction<DocumentInfo>> {
166 let path = path.as_ref();
167 let file_name = path
168 .file_name()
169 .and_then(|name| name.to_str())
170 .unwrap_or_default()
171 .to_owned();
172
173 let file = File::open(path)?;
174 read_info_from_reader(file, file_name)
175}
176
177pub fn read_info_from_reader<R: Read + Seek>(
178 reader: R,
179 file_name: impl Into<String>,
180) -> Result<Extraction<DocumentInfo>> {
181 let mut package = OoxmlPackage::new(reader)?;
182 metadata::read_info(&mut package, file_name.into())
183}
184
185pub fn read_audit(path: impl AsRef<Path>) -> Result<Extraction<DocumentAudit>> {
186 let path = path.as_ref();
187 let file_name = path
188 .file_name()
189 .and_then(|name| name.to_str())
190 .unwrap_or_default()
191 .to_owned();
192
193 let file = File::open(path)?;
194 read_audit_from_reader(file, file_name)
195}
196
197pub fn read_audit_from_reader<R: Read + Seek>(
198 reader: R,
199 file_name: impl Into<String>,
200) -> Result<Extraction<DocumentAudit>> {
201 let mut package = OoxmlPackage::new(reader)?;
202 parsers::audit::read_audit(&mut package, file_name.into())
203}