Skip to main content

oxdoc_core/
models.rs

1use std::collections::BTreeMap;
2
3use serde::Serialize;
4
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct OutputWarning {
7    pub path: String,
8    pub message: String,
9}
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum WarningCategory {
13    Parser,
14    Data,
15    Custom,
16}
17
18impl WarningCategory {
19    pub fn as_str(self) -> &'static str {
20        match self {
21            WarningCategory::Parser => "parser",
22            WarningCategory::Data => "data",
23            WarningCategory::Custom => "custom",
24        }
25    }
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum WarningCode {
30    MalformedXml,
31    IgnoredWorkbookSheet,
32    SharedStringIndexOutOfBounds,
33    InvalidSharedStringIndex,
34    Custom,
35}
36
37impl WarningCode {
38    pub fn as_str(self) -> &'static str {
39        match self {
40            WarningCode::MalformedXml => "W001",
41            WarningCode::IgnoredWorkbookSheet => "W002",
42            WarningCode::SharedStringIndexOutOfBounds => "W003",
43            WarningCode::InvalidSharedStringIndex => "W004",
44            WarningCode::Custom => "W999",
45        }
46    }
47}
48
49impl OutputWarning {
50    pub fn new(path: impl Into<String>, message: impl Into<String>) -> Self {
51        Self {
52            path: path.into(),
53            message: message.into(),
54        }
55    }
56
57    pub fn malformed_xml(path: impl Into<String>, source: impl std::fmt::Display) -> Self {
58        Self::new(path, format!("stopped after malformed XML: {source}"))
59    }
60
61    pub fn ignored_workbook_sheet(path: impl Into<String>) -> Self {
62        Self::new(
63            path,
64            "ignored workbook sheet without name or relationship id",
65        )
66    }
67
68    pub fn shared_string_index_out_of_bounds(path: impl Into<String>, index: usize) -> Self {
69        Self::new(
70            path,
71            format!("shared string index {index} is out of bounds"),
72        )
73    }
74
75    pub fn invalid_shared_string_index(path: impl Into<String>, value: impl Into<String>) -> Self {
76        Self::new(
77            path,
78            format!("invalid shared string index '{}'", value.into()),
79        )
80    }
81
82    pub fn category(&self) -> WarningCategory {
83        match self.code() {
84            WarningCode::MalformedXml => WarningCategory::Parser,
85            WarningCode::IgnoredWorkbookSheet
86            | WarningCode::SharedStringIndexOutOfBounds
87            | WarningCode::InvalidSharedStringIndex => WarningCategory::Data,
88            WarningCode::Custom => WarningCategory::Custom,
89        }
90    }
91
92    pub fn code(&self) -> WarningCode {
93        match self.message.as_str() {
94            message if message.starts_with("stopped after malformed XML: ") => {
95                WarningCode::MalformedXml
96            }
97            "ignored workbook sheet without name or relationship id" => {
98                WarningCode::IgnoredWorkbookSheet
99            }
100            message
101                if message.starts_with("shared string index ")
102                    && message.ends_with(" is out of bounds") =>
103            {
104                WarningCode::SharedStringIndexOutOfBounds
105            }
106            message if message.starts_with("invalid shared string index '") => {
107                WarningCode::InvalidSharedStringIndex
108            }
109            _ => WarningCode::Custom,
110        }
111    }
112}
113
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum DocumentType {
116    Docx,
117    Pptx,
118    Xlsx,
119    Unknown,
120}
121
122#[derive(Debug, Clone, PartialEq, Eq)]
123pub struct XlsxSheet {
124    pub index: usize,
125    pub name: String,
126    pub visibility: XlsxSheetVisibility,
127}
128
129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
130pub enum XlsxSheetVisibility {
131    Visible,
132    Hidden,
133    VeryHidden,
134}
135
136impl XlsxSheetVisibility {
137    pub fn as_str(self) -> &'static str {
138        match self {
139            Self::Visible => "visible",
140            Self::Hidden => "hidden",
141            Self::VeryHidden => "veryHidden",
142        }
143    }
144}
145
146#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
147pub struct StructuredText {
148    pub document_type: String,
149    pub blocks: Vec<TextBlock>,
150}
151
152#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
153pub struct TextBlock {
154    pub part_type: String,
155    pub part_path: String,
156    pub ordinal: usize,
157    pub text: String,
158}
159
160impl TextBlock {
161    pub fn new(
162        part_type: impl Into<String>,
163        part_path: impl Into<String>,
164        ordinal: usize,
165        text: impl Into<String>,
166    ) -> Self {
167        Self {
168            part_type: part_type.into(),
169            part_path: part_path.into(),
170            ordinal,
171            text: text.into(),
172        }
173    }
174}
175
176#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
177pub enum XlsxValueMode {
178    #[default]
179    Raw,
180    Formatted,
181}
182
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct Extraction<T> {
185    pub value: T,
186    pub warnings: Vec<OutputWarning>,
187}
188
189impl<T> Extraction<T> {
190    pub fn new(value: T) -> Self {
191        Self {
192            value,
193            warnings: Vec::new(),
194        }
195    }
196
197    pub fn with_warnings(value: T, warnings: Vec<OutputWarning>) -> Self {
198        Self { value, warnings }
199    }
200
201    pub fn map<U>(self, f: impl FnOnce(T) -> U) -> Extraction<U> {
202        Extraction {
203            value: f(self.value),
204            warnings: self.warnings,
205        }
206    }
207}
208
209#[derive(Debug, Clone, Copy, PartialEq, Eq)]
210pub struct XlsxCsvOptions<'a> {
211    pub sheet_name: Option<&'a str>,
212    pub sheet_index: Option<usize>,
213    pub include_hidden: bool,
214    pub delimiter: u8,
215}
216
217impl Default for XlsxCsvOptions<'_> {
218    fn default() -> Self {
219        Self {
220            sheet_name: None,
221            sheet_index: None,
222            include_hidden: false,
223            delimiter: b',',
224        }
225    }
226}
227
228#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
229pub struct DocumentInfo {
230    pub file: String,
231    #[serde(skip_serializing_if = "Option::is_none")]
232    pub author: Option<String>,
233    #[serde(skip_serializing_if = "Option::is_none")]
234    pub last_modified_by: Option<String>,
235    #[serde(skip_serializing_if = "Option::is_none")]
236    pub created_at: Option<String>,
237    #[serde(skip_serializing_if = "Option::is_none")]
238    pub modified_at: Option<String>,
239    #[serde(skip_serializing_if = "Option::is_none")]
240    pub application: Option<String>,
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub company: Option<String>,
243    #[serde(skip_serializing_if = "Option::is_none")]
244    pub custom_properties: Option<BTreeMap<String, String>>,
245    pub has_macros: bool,
246    #[serde(skip_serializing_if = "Option::is_none")]
247    pub word_count: Option<u64>,
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub page_count: Option<u64>,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub slide_count: Option<u64>,
252    #[serde(skip_serializing_if = "Option::is_none")]
253    pub worksheet_count: Option<u64>,
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub revision: Option<String>,
256}
257
258#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
259pub struct DocumentAudit {
260    pub file: String,
261    pub document_type: String,
262    pub metadata: DocumentInfo,
263    pub signals: Vec<AuditSignal>,
264}
265
266#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
267pub struct AuditSignal {
268    pub kind: String,
269    pub severity: String,
270    pub path: String,
271    pub message: String,
272}
273
274impl AuditSignal {
275    pub fn new(
276        kind: impl Into<String>,
277        severity: impl Into<String>,
278        path: impl Into<String>,
279        message: impl Into<String>,
280    ) -> Self {
281        Self {
282            kind: kind.into(),
283            severity: severity.into(),
284            path: path.into(),
285            message: message.into(),
286        }
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::{Extraction, OutputWarning, WarningCategory, WarningCode, XlsxCsvOptions};
293
294    #[test]
295    fn builds_and_maps_extractions() {
296        let warning = OutputWarning::malformed_xml("word/document.xml", "parse error");
297        let extraction = Extraction::with_warnings("hello".to_owned(), vec![warning.clone()]);
298
299        let mapped = extraction.map(|value| value.len());
300
301        assert_eq!(mapped.value, 5);
302        assert_eq!(mapped.warnings, vec![warning]);
303        assert!(Extraction::new(()).warnings.is_empty());
304    }
305
306    #[test]
307    fn classifies_warning_codes_and_categories() {
308        let malformed = OutputWarning::malformed_xml("word/document.xml", "parse error");
309        let sheet = OutputWarning::ignored_workbook_sheet("xl/workbook.xml");
310        let shared = OutputWarning::shared_string_index_out_of_bounds("xl/sheet.xml", 7);
311        let invalid = OutputWarning::invalid_shared_string_index("xl/sheet.xml", "abc");
312
313        assert_eq!(malformed.category(), WarningCategory::Parser);
314        assert_eq!(malformed.code(), WarningCode::MalformedXml);
315        assert_eq!(sheet.category(), WarningCategory::Data);
316        assert_eq!(sheet.code(), WarningCode::IgnoredWorkbookSheet);
317        assert_eq!(shared.code(), WarningCode::SharedStringIndexOutOfBounds);
318        assert_eq!(invalid.code(), WarningCode::InvalidSharedStringIndex);
319        assert_eq!(WarningCategory::Parser.as_str(), "parser");
320        assert_eq!(WarningCategory::Data.as_str(), "data");
321        assert_eq!(WarningCode::MalformedXml.as_str(), "W001");
322        assert_eq!(WarningCode::IgnoredWorkbookSheet.as_str(), "W002");
323        assert_eq!(WarningCode::SharedStringIndexOutOfBounds.as_str(), "W003");
324        assert_eq!(WarningCode::InvalidSharedStringIndex.as_str(), "W004");
325    }
326
327    #[test]
328    fn classifies_unknown_warnings_as_custom() {
329        let warning = OutputWarning::new("custom.xml", "partial extraction");
330
331        assert_eq!(warning.category(), WarningCategory::Custom);
332        assert_eq!(warning.code(), WarningCode::Custom);
333        assert_eq!(warning.category().as_str(), "custom");
334        assert_eq!(warning.code().as_str(), "W999");
335    }
336
337    #[test]
338    fn defaults_xlsx_csv_options() {
339        let options = XlsxCsvOptions::default();
340
341        assert_eq!(options.sheet_name, None);
342        assert_eq!(options.sheet_index, None);
343        assert!(!options.include_hidden);
344        assert_eq!(options.delimiter, b',');
345    }
346}