Skip to main content

oxdoc_core/
models.rs

1use std::collections::BTreeMap;
2
3use serde::Serialize;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct OutputWarning {
7    pub path: String,
8    pub message: String,
9}
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum WarningCategory {
13    Parser,
14    Data,
15    Custom,
16}
17
18impl WarningCategory {
19    pub fn as_str(self) -> &'static str {
20        match self {
21            WarningCategory::Parser => "parser",
22            WarningCategory::Data => "data",
23            WarningCategory::Custom => "custom",
24        }
25    }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum WarningCode {
30    MalformedXml,
31    IgnoredWorkbookSheet,
32    SharedStringIndexOutOfBounds,
33    InvalidSharedStringIndex,
34    Custom,
35}
36
37impl WarningCode {
38    pub fn as_str(self) -> &'static str {
39        match self {
40            WarningCode::MalformedXml => "W001",
41            WarningCode::IgnoredWorkbookSheet => "W002",
42            WarningCode::SharedStringIndexOutOfBounds => "W003",
43            WarningCode::InvalidSharedStringIndex => "W004",
44            WarningCode::Custom => "W999",
45        }
46    }
47}
48
49impl OutputWarning {
50    pub fn new(path: impl Into<String>, message: impl Into<String>) -> Self {
51        Self {
52            path: path.into(),
53            message: message.into(),
54        }
55    }
56
57    pub fn malformed_xml(path: impl Into<String>, source: impl std::fmt::Display) -> Self {
58        Self::new(path, format!("stopped after malformed XML: {source}"))
59    }
60
61    pub fn ignored_workbook_sheet(path: impl Into<String>) -> Self {
62        Self::new(
63            path,
64            "ignored workbook sheet without name or relationship id",
65        )
66    }
67
68    pub fn shared_string_index_out_of_bounds(path: impl Into<String>, index: usize) -> Self {
69        Self::new(
70            path,
71            format!("shared string index {index} is out of bounds"),
72        )
73    }
74
75    pub fn invalid_shared_string_index(path: impl Into<String>, value: impl Into<String>) -> Self {
76        Self::new(
77            path,
78            format!("invalid shared string index '{}'", value.into()),
79        )
80    }
81
82    pub fn category(&self) -> WarningCategory {
83        match self.code() {
84            WarningCode::MalformedXml => WarningCategory::Parser,
85            WarningCode::IgnoredWorkbookSheet
86            | WarningCode::SharedStringIndexOutOfBounds
87            | WarningCode::InvalidSharedStringIndex => WarningCategory::Data,
88            WarningCode::Custom => WarningCategory::Custom,
89        }
90    }
91
92    pub fn code(&self) -> WarningCode {
93        match self.message.as_str() {
94            message if message.starts_with("stopped after malformed XML: ") => {
95                WarningCode::MalformedXml
96            }
97            "ignored workbook sheet without name or relationship id" => {
98                WarningCode::IgnoredWorkbookSheet
99            }
100            message
101                if message.starts_with("shared string index ")
102                    && message.ends_with(" is out of bounds") =>
103            {
104                WarningCode::SharedStringIndexOutOfBounds
105            }
106            message if message.starts_with("invalid shared string index '") => {
107                WarningCode::InvalidSharedStringIndex
108            }
109            _ => WarningCode::Custom,
110        }
111    }
112}
113
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum DocumentType {
116    Docx,
117    Pptx,
118    Xlsx,
119    Unknown,
120}
121
122#[derive(Debug, Clone, PartialEq, Eq)]
123pub struct XlsxSheet {
124    pub index: usize,
125    pub name: String,
126}
127
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct Extraction<T> {
130    pub value: T,
131    pub warnings: Vec<OutputWarning>,
132}
133
134impl<T> Extraction<T> {
135    pub fn new(value: T) -> Self {
136        Self {
137            value,
138            warnings: Vec::new(),
139        }
140    }
141
142    pub fn with_warnings(value: T, warnings: Vec<OutputWarning>) -> Self {
143        Self { value, warnings }
144    }
145
146    pub fn map<U>(self, f: impl FnOnce(T) -> U) -> Extraction<U> {
147        Extraction {
148            value: f(self.value),
149            warnings: self.warnings,
150        }
151    }
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub struct XlsxCsvOptions<'a> {
156    pub sheet_name: Option<&'a str>,
157    pub sheet_index: Option<usize>,
158    pub delimiter: u8,
159}
160
161impl Default for XlsxCsvOptions<'_> {
162    fn default() -> Self {
163        Self {
164            sheet_name: None,
165            sheet_index: None,
166            delimiter: b',',
167        }
168    }
169}
170
171#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
172pub struct DocumentInfo {
173    pub file: String,
174    #[serde(skip_serializing_if = "Option::is_none")]
175    pub author: Option<String>,
176    #[serde(skip_serializing_if = "Option::is_none")]
177    pub last_modified_by: Option<String>,
178    #[serde(skip_serializing_if = "Option::is_none")]
179    pub created_at: Option<String>,
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub modified_at: Option<String>,
182    #[serde(skip_serializing_if = "Option::is_none")]
183    pub application: Option<String>,
184    #[serde(skip_serializing_if = "Option::is_none")]
185    pub company: Option<String>,
186    #[serde(skip_serializing_if = "Option::is_none")]
187    pub custom_properties: Option<BTreeMap<String, String>>,
188    pub has_macros: bool,
189    #[serde(skip_serializing_if = "Option::is_none")]
190    pub word_count: Option<u64>,
191    #[serde(skip_serializing_if = "Option::is_none")]
192    pub page_count: Option<u64>,
193    #[serde(skip_serializing_if = "Option::is_none")]
194    pub slide_count: Option<u64>,
195    #[serde(skip_serializing_if = "Option::is_none")]
196    pub worksheet_count: Option<u64>,
197    #[serde(skip_serializing_if = "Option::is_none")]
198    pub revision: Option<String>,
199}
200
201#[cfg(test)]
202mod tests {
203    use super::{Extraction, OutputWarning, WarningCategory, WarningCode, XlsxCsvOptions};
204
205    #[test]
206    fn builds_and_maps_extractions() {
207        let warning = OutputWarning::malformed_xml("word/document.xml", "parse error");
208        let extraction = Extraction::with_warnings("hello".to_owned(), vec![warning.clone()]);
209
210        let mapped = extraction.map(|value| value.len());
211
212        assert_eq!(mapped.value, 5);
213        assert_eq!(mapped.warnings, vec![warning]);
214        assert!(Extraction::new(()).warnings.is_empty());
215    }
216
217    #[test]
218    fn classifies_warning_codes_and_categories() {
219        let malformed = OutputWarning::malformed_xml("word/document.xml", "parse error");
220        let sheet = OutputWarning::ignored_workbook_sheet("xl/workbook.xml");
221        let shared = OutputWarning::shared_string_index_out_of_bounds("xl/sheet.xml", 7);
222        let invalid = OutputWarning::invalid_shared_string_index("xl/sheet.xml", "abc");
223
224        assert_eq!(malformed.category(), WarningCategory::Parser);
225        assert_eq!(malformed.code(), WarningCode::MalformedXml);
226        assert_eq!(sheet.category(), WarningCategory::Data);
227        assert_eq!(sheet.code(), WarningCode::IgnoredWorkbookSheet);
228        assert_eq!(shared.code(), WarningCode::SharedStringIndexOutOfBounds);
229        assert_eq!(invalid.code(), WarningCode::InvalidSharedStringIndex);
230        assert_eq!(WarningCategory::Parser.as_str(), "parser");
231        assert_eq!(WarningCategory::Data.as_str(), "data");
232        assert_eq!(WarningCode::MalformedXml.as_str(), "W001");
233        assert_eq!(WarningCode::IgnoredWorkbookSheet.as_str(), "W002");
234        assert_eq!(WarningCode::SharedStringIndexOutOfBounds.as_str(), "W003");
235        assert_eq!(WarningCode::InvalidSharedStringIndex.as_str(), "W004");
236    }
237
238    #[test]
239    fn classifies_unknown_warnings_as_custom() {
240        let warning = OutputWarning::new("custom.xml", "partial extraction");
241
242        assert_eq!(warning.category(), WarningCategory::Custom);
243        assert_eq!(warning.code(), WarningCode::Custom);
244        assert_eq!(warning.category().as_str(), "custom");
245        assert_eq!(warning.code().as_str(), "W999");
246    }
247
248    #[test]
249    fn defaults_xlsx_csv_options() {
250        let options = XlsxCsvOptions::default();
251
252        assert_eq!(options.sheet_name, None);
253        assert_eq!(options.sheet_index, None);
254        assert_eq!(options.delimiter, b',');
255    }
256}