docx_lite/
types.rs

1#[derive(Debug, Clone, Default)]
2pub struct Document {
3    pub paragraphs: Vec<Paragraph>,
4    pub tables: Vec<Table>,
5    pub lists: Vec<ListItem>,
6    pub headers: Vec<HeaderFooter>,
7    pub footers: Vec<HeaderFooter>,
8    pub footnotes: Vec<Note>,
9    pub endnotes: Vec<Note>,
10}
11
12#[derive(Debug, Clone, Default)]
13pub struct Paragraph {
14    pub runs: Vec<Run>,
15    pub style: Option<String>,
16    pub numbering_id: Option<i64>,
17    pub numbering_level: Option<i64>,
18}
19
20#[derive(Debug, Clone, Default)]
21pub struct Run {
22    pub text: String,
23    pub bold: bool,
24    pub italic: bool,
25    pub underline: bool,
26}
27
28#[derive(Debug, Clone, Default)]
29pub struct Table {
30    pub rows: Vec<TableRow>,
31}
32
33#[derive(Debug, Clone, Default)]
34pub struct TableRow {
35    pub cells: Vec<TableCell>,
36}
37
38#[derive(Debug, Clone, Default)]
39pub struct TableCell {
40    pub paragraphs: Vec<Paragraph>,
41}
42
43// New types for v0.2.0
44
45#[derive(Debug, Clone)]
46pub struct ListItem {
47    pub level: u32,
48    pub list_type: ListType,
49    pub number: Option<String>,
50    pub text: String,
51}
52
53#[derive(Debug, Clone, PartialEq)]
54pub enum ListType {
55    Bullet,
56    Numbered,
57}
58
59#[derive(Debug, Clone, Default)]
60pub struct HeaderFooter {
61    pub paragraphs: Vec<Paragraph>,
62    pub tables: Vec<Table>,
63    pub header_type: HeaderFooterType,
64}
65
66#[derive(Debug, Clone, Default, PartialEq)]
67pub enum HeaderFooterType {
68    #[default]
69    Default,
70    First,
71    Even,
72    Odd,
73}
74
75#[derive(Debug, Clone)]
76pub struct Note {
77    pub id: String,
78    pub note_type: NoteType,
79    pub paragraphs: Vec<Paragraph>,
80}
81
82#[derive(Debug, Clone, PartialEq)]
83pub enum NoteType {
84    Footnote,
85    Endnote,
86}
87
88#[derive(Debug, Clone, Default)]
89pub struct ExtractOptions {
90    pub include_headers: bool,
91    pub include_footers: bool,
92    pub include_footnotes: bool,
93    pub include_endnotes: bool,
94    pub include_list_markers: bool,
95}
96
97impl ExtractOptions {
98    pub fn all() -> Self {
99        Self {
100            include_headers: true,
101            include_footers: true,
102            include_footnotes: true,
103            include_endnotes: true,
104            include_list_markers: true,
105        }
106    }
107
108    pub fn none() -> Self {
109        Self::default()
110    }
111}
112
113impl Document {
114    pub fn new() -> Self {
115        Self::default()
116    }
117
118    pub fn extract_text(&self) -> String {
119        self.extract_text_with_options(&ExtractOptions::none())
120    }
121
122    pub fn extract_text_with_options(&self, options: &ExtractOptions) -> String {
123        let mut text = String::new();
124
125        // Headers
126        if options.include_headers && !self.headers.is_empty() {
127            text.push_str("--- Headers ---\n");
128            for header in &self.headers {
129                text.push_str(&header.extract_text());
130                text.push('\n');
131            }
132            text.push('\n');
133        }
134
135        // Main content - paragraphs and lists interspersed
136        let mut list_index = 0;
137        for paragraph in &self.paragraphs {
138            // Check if this paragraph is a list item
139            if let (Some(_num_id), Some(level)) = (paragraph.numbering_id, paragraph.numbering_level) {
140                // This is a list item
141                if options.include_list_markers && list_index < self.lists.len() {
142                    let list_item = &self.lists[list_index];
143                    let indent = "  ".repeat(level as usize);
144                    let marker = match list_item.list_type {
145                        ListType::Bullet => "• ".to_string(),
146                        ListType::Numbered => {
147                            if let Some(ref num) = list_item.number {
148                                format!("{}. ", num)
149                            } else {
150                                "• ".to_string()
151                            }
152                        }
153                    };
154                    text.push_str(&format!("{}{}{}\n", indent, marker, list_item.text));
155                    list_index += 1;
156                } else {
157                    // Include as regular paragraph without marker
158                    let para_text = paragraph.to_text();
159                    if !para_text.is_empty() {
160                        text.push_str(&para_text);
161                        text.push('\n');
162                    }
163                }
164            } else {
165                // Regular paragraph
166                let para_text = paragraph.to_text();
167                if !para_text.is_empty() {
168                    text.push_str(&para_text);
169                    text.push('\n');
170                }
171            }
172        }
173
174        // Tables
175        for table in &self.tables {
176            for row in &table.rows {
177                for cell in &row.cells {
178                    for paragraph in &cell.paragraphs {
179                        let para_text = paragraph.to_text();
180                        if !para_text.is_empty() {
181                            text.push_str(&para_text);
182                            text.push('\t');
183                        }
184                    }
185                }
186                text.push('\n');
187            }
188            text.push('\n');
189        }
190
191        // Footnotes
192        if options.include_footnotes && !self.footnotes.is_empty() {
193            text.push_str("\n--- Footnotes ---\n");
194            for (i, note) in self.footnotes.iter().enumerate() {
195                text.push_str(&format!("[{}] ", i + 1));
196                for para in &note.paragraphs {
197                    text.push_str(&para.to_text());
198                }
199                text.push('\n');
200            }
201        }
202
203        // Endnotes
204        if options.include_endnotes && !self.endnotes.is_empty() {
205            text.push_str("\n--- Endnotes ---\n");
206            for (i, note) in self.endnotes.iter().enumerate() {
207                text.push_str(&format!("[{}] ", i + 1));
208                for para in &note.paragraphs {
209                    text.push_str(&para.to_text());
210                }
211                text.push('\n');
212            }
213        }
214
215        // Footers
216        if options.include_footers && !self.footers.is_empty() {
217            text.push_str("\n--- Footers ---\n");
218            for footer in &self.footers {
219                text.push_str(&footer.extract_text());
220                text.push('\n');
221            }
222        }
223
224        text
225    }
226}
227
228impl Paragraph {
229    pub fn new() -> Self {
230        Self::default()
231    }
232
233    pub fn to_text(&self) -> String {
234        self.runs.iter()
235            .map(|run| run.text.as_str())
236            .collect::<Vec<_>>()
237            .join("")
238    }
239
240    pub fn add_run(&mut self, run: Run) {
241        self.runs.push(run);
242    }
243}
244
245impl Run {
246    pub fn new(text: String) -> Self {
247        Self {
248            text,
249            ..Default::default()
250        }
251    }
252}
253
254impl Table {
255    pub fn new() -> Self {
256        Self::default()
257    }
258}
259
260impl HeaderFooter {
261    pub fn extract_text(&self) -> String {
262        let mut text = String::new();
263
264        for paragraph in &self.paragraphs {
265            let para_text = paragraph.to_text();
266            if !para_text.is_empty() {
267                text.push_str(&para_text);
268                text.push('\n');
269            }
270        }
271
272        for table in &self.tables {
273            for row in &table.rows {
274                for cell in &row.cells {
275                    for paragraph in &cell.paragraphs {
276                        let para_text = paragraph.to_text();
277                        if !para_text.is_empty() {
278                            text.push_str(&para_text);
279                            text.push('\t');
280                        }
281                    }
282                }
283                text.push('\n');
284            }
285        }
286
287        text
288    }
289}