docx_parser/
lib.rs

1//! A library to parse Docx files into a simpler format, useful for exporting it as markdown or JSON.
2//!
3//! # Examples
4//!
5//! ```
6//! use docx_parser::MarkdownDocument;
7//!
8//! let markdown_doc = MarkdownDocument::from_file("./test/tables.docx");
9//! let markdown = markdown_doc.to_markdown(true);
10//! let json = markdown_doc.to_json(true);
11//! println!("\n\n{}", markdown);
12//! println!("\n\n{}", json);
13//! ```
14
15mod utils;
16
17use docx_rust::document::BodyContent::{Paragraph, Sdt, SectionProperty, Table, TableCell};
18use docx_rust::document::{ParagraphContent, RunContent, TableCellContent, TableRowContent};
19use docx_rust::formatting::{NumberFormat, OnOffOnlyType, ParagraphProperty};
20use docx_rust::media::MediaType;
21use docx_rust::styles::StyleType;
22use docx_rust::DocxFile;
23use serde::Serialize;
24use serde_json;
25use std::collections::HashMap;
26use std::path::Path;
27use std::str::FromStr;
28use utils::{max_lengths_per_column, save_image_to_file, serialize_images, table_row_to_markdown};
29
30#[derive(Debug, PartialEq, Eq, Clone, Serialize)]
31#[serde(rename_all = "camelCase")]
32pub struct BlockStyle {
33    /// Use bold
34    pub bold: bool,
35    /// Use italics
36    pub italics: bool,
37    /// Use underline
38    pub underline: bool,
39    /// Use strikethrough
40    pub strike: bool,
41    #[serde(skip_serializing_if = "Option::is_none")]
42    /// Size is specified in points x 2, so size 19 is equal to 9.5pt
43    pub size: Option<isize>,
44}
45
46impl BlockStyle {
47    pub fn new() -> Self {
48        BlockStyle {
49            bold: false,
50            italics: false,
51            underline: false,
52            strike: false,
53            size: None,
54        }
55    }
56
57    pub fn combine_with(&mut self, other: &BlockStyle) {
58        self.bold = other.bold;
59        self.italics = other.italics;
60        self.underline = other.underline;
61        self.strike = other.strike;
62        if let Some(size) = other.size {
63            self.size = Some(size);
64        }
65    }
66}
67
68#[derive(Debug, Clone, Serialize)]
69#[serde(rename_all = "camelCase")]
70
71pub struct MarkdownNumbering {
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub id: Option<isize>,
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub indent_level: Option<isize>,
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub format: Option<String>, // NumberFormat
78    #[serde(skip_serializing_if = "Option::is_none")]
79    pub level_text: Option<String>,
80}
81
82#[derive(Debug, Default, Clone, Serialize)]
83#[serde(rename_all = "camelCase")]
84pub struct ParagraphStyle {
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub style_id: Option<String>,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub outline_lvl: Option<isize>,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub numbering: Option<MarkdownNumbering>,
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub page_break_before: Option<bool>,
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub style: Option<BlockStyle>,
95}
96
97impl ParagraphStyle {
98    pub fn new() -> Self {
99        ParagraphStyle {
100            style_id: None,
101            outline_lvl: None,
102            numbering: None,
103            page_break_before: None,
104            style: None,
105        }
106    }
107
108    pub fn combine_with(&mut self, other: &ParagraphStyle) {
109        self.style_id = self.style_id.clone().or_else(|| other.style_id.clone());
110        self.outline_lvl = self.outline_lvl.or_else(|| other.outline_lvl);
111        self.page_break_before = self.page_break_before.or_else(|| other.page_break_before);
112        if self.numbering.is_none() {
113            self.numbering = other.numbering.clone()
114        }
115        if let Some(ref mut style) = self.style {
116            if let Some(ref other_style) = other.style {
117                style.combine_with(other_style);
118            }
119        } else {
120            self.style = other.style.clone();
121        }
122    }
123}
124
125impl<'a> From<&'a ParagraphProperty<'a>> for ParagraphStyle {
126    fn from(paragraph_property: &'a ParagraphProperty) -> Self {
127        // Extract properties from ParagraphProperty and create a new ParagraphStyle
128        let mut paragraph_style = ParagraphStyle::new();
129        if let Some(style_id) = &paragraph_property.style_id {
130            paragraph_style.style_id = Some(style_id.value.to_string());
131        }
132        if let Some(outline_lvl) = &paragraph_property.outline_lvl {
133            paragraph_style.outline_lvl = Some(outline_lvl.value);
134        }
135        if let Some(page_break_before) = &paragraph_property.page_break_before {
136            paragraph_style.page_break_before = page_break_before.value;
137        }
138        if let Some(numbering) = &paragraph_property.numbering {
139            paragraph_style.numbering = Some(MarkdownNumbering {
140                id: numbering.id.as_ref().map(|ni| ni.value),
141                indent_level: numbering.level.as_ref().map(|level| level.value),
142                format: None,
143                level_text: None,
144            });
145        }
146        if paragraph_property.r_pr.len() > 0 {
147            let mut block_style = BlockStyle::new();
148            paragraph_property
149                .r_pr
150                .iter()
151                .for_each(|character_property| {
152                    if let Some(size) = &character_property.size {
153                        block_style.size = Some(size.value);
154                    }
155                    if character_property.bold.is_some() {
156                        block_style.bold = true;
157                    }
158                    if character_property.underline.is_some() {
159                        block_style.underline = true;
160                    }
161                    if character_property.italics.is_some() || character_property.emphasis.is_some()
162                    {
163                        block_style.italics = true;
164                    }
165                    if character_property.strike.is_some() || character_property.dstrike.is_some() {
166                        block_style.strike = true;
167                    }
168                });
169            paragraph_style.style = Some(block_style);
170        }
171        paragraph_style
172    }
173}
174
175#[derive(Debug, PartialEq, Eq, Serialize)]
176pub enum TextType {
177    Text,
178    Image,
179    Link,
180    Code,
181    Quote,
182    List,
183    Table,
184    Header,
185    HorizontalRule,
186    BlockQuote,
187    CodeBlock,
188    HeaderBlock,
189    BookmarkLink,
190}
191
192#[derive(Debug, PartialEq, Eq, Serialize)]
193#[serde(rename_all = "camelCase")]
194pub struct TextBlock {
195    pub text_type: TextType,
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub style: Option<BlockStyle>,
198    pub text: String,
199}
200
201impl TextBlock {
202    pub fn new(text: String, style: Option<BlockStyle>, text_type: TextType) -> Self {
203        TextBlock {
204            style,
205            text,
206            text_type,
207        }
208    }
209
210    pub fn to_markdown(&self, paragraph_style: &ParagraphStyle) -> String {
211        let mut markdown = self.text.clone();
212
213        let mut style = if self.style.is_some() {
214            self.style.as_ref().unwrap().clone()
215        } else {
216            BlockStyle::new()
217        };
218
219        if let Some(block_style) = &paragraph_style.style {
220            style.combine_with(block_style);
221        };
222
223        // Add bold formatting if enabled
224        if style.bold {
225            markdown = format!("**{markdown}**");
226        }
227
228        // Add italic formatting if enabled
229        if style.italics {
230            markdown = format!("*{markdown}*");
231        }
232
233        // Add underline formatting if enabled
234        if style.underline {
235            markdown = format!("__{markdown}__");
236        }
237
238        // Add strike-through formatting if enabled
239        if style.strike {
240            markdown = format!("~~{markdown}~~");
241        }
242        markdown
243    }
244}
245
246#[derive(Debug, Serialize)]
247pub struct MarkdownParagraph {
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub style: Option<ParagraphStyle>,
250    pub blocks: Vec<TextBlock>,
251}
252
253impl MarkdownParagraph {
254    pub fn new() -> Self {
255        MarkdownParagraph {
256            style: None,
257            blocks: vec![],
258        }
259    }
260
261    /// Convert a MarkdownParagraph to a Markdown string.
262    pub fn to_markdown(
263        &self,
264        styles: &HashMap<String, ParagraphStyle>,
265        numberings: &mut HashMap<isize, usize>,
266        doc: &MarkdownDocument,
267    ) -> String {
268        let mut markdown = String::new();
269
270        let mut style = if self.style.is_some() {
271            self.style.as_ref().unwrap().clone()
272        } else {
273            ParagraphStyle::default()
274        };
275
276        if let Some(style_id) = &style.style_id {
277            if let Some(doc_style) = styles.get(style_id) {
278                style.combine_with(doc_style);
279            }
280            // markdown += &format!("[{}]", style_id);
281        };
282
283        // Add outline level if available
284        if let Some(outline_lvl) = style.outline_lvl {
285            // Convert outline level to appropriate Markdown heading level
286            let heading_level = match outline_lvl {
287                0 => "# ",
288                1 => "## ",
289                2 => "### ",
290                3 => "#### ",
291                4 => "##### ",
292                _ => "###### ", // Use the smallest heading level for higher levels
293            };
294            markdown += heading_level;
295        }
296
297        // Add numbering if available
298        if let Some(numbering) = &style.numbering {
299            if let Some(level) = numbering.indent_level {
300                if level > 0 {
301                    markdown += &"    ".repeat(level as usize); // Start numbering from 1
302                }
303            }
304            if let Some(id) = numbering.id {
305                let format = match &doc.numberings[&id].format {
306                    Some(entry) => NumberFormat::from_str(entry).unwrap_or(NumberFormat::Decimal),
307                    None => NumberFormat::Decimal,
308                };
309                let count = numberings.entry(id).or_insert(0); // Start numbering from 1
310                let numbering_symbol = match format {
311                    NumberFormat::UpperRoman => format!("{}.", ((*count) as u8 + b'I') as char),
312                    NumberFormat::LowerRoman => format!("{}.", ((*count) as u8 + b'i') as char),
313                    NumberFormat::UpperLetter => format!("{}.", ((*count) as u8 + b'A') as char),
314                    NumberFormat::LowerLetter => format!("{}.", ((*count) as u8 + b'a') as char),
315                    NumberFormat::Bullet => match &doc.numberings[&id].level_text {
316                        Some(level_text) if level_text.trim().is_empty() => " ".to_string(),
317                        _ => "-".to_string(),
318                    },
319                    _ => format!("{}.", *count + 1),
320                };
321                *count += 1;
322                markdown += &format!("{numbering_symbol} ");
323            }
324        }
325
326        for block in &self.blocks {
327            markdown += &block.to_markdown(&style);
328        }
329        markdown
330    }
331
332    /// Convert a docx::Paragraph to a MarkdownParagraph
333    fn from_paragraph(
334        paragraph: &docx_rust::document::Paragraph,
335        docx: &docx_rust::Docx,
336    ) -> MarkdownParagraph {
337        let mut markdown_paragraph = MarkdownParagraph::new();
338        if let Some(paragraph_property) = &paragraph.property {
339            let paragraph_style: ParagraphStyle = paragraph_property.into();
340            markdown_paragraph.style = Some(paragraph_style);
341        }
342        for paragraph_content in &paragraph.content {
343            match paragraph_content {
344                ParagraphContent::Run(run) => {
345                    let block_style = match &run.property {
346                        Some(character_property) => {
347                            let mut block_style = BlockStyle::new();
348                            if let Some(size) = &character_property.size {
349                                block_style.size = Some(size.value);
350                            }
351                            if character_property.bold.is_some() {
352                                block_style.bold = true;
353                            }
354                            if character_property.underline.is_some() {
355                                block_style.underline = true;
356                            }
357                            if character_property.italics.is_some()
358                                || character_property.emphasis.is_some()
359                            {
360                                block_style.italics = true;
361                            }
362                            if character_property.strike.is_some()
363                                || character_property.dstrike.is_some()
364                            {
365                                block_style.strike = true;
366                            }
367                            Some(block_style)
368                        }
369                        None => None,
370                    };
371
372                    let is_same_style = |style: &Option<BlockStyle>| style == &block_style;
373
374                    for run_content in &run.content {
375                        match run_content {
376                            RunContent::Text(text) => {
377                                let text = text.text.to_string();
378                                let mut could_extend_text = false;
379                                if let Some(prev_block) = markdown_paragraph.blocks.last_mut() {
380                                    if is_same_style(&prev_block.style)
381                                        && prev_block.text_type == TextType::Text
382                                    {
383                                        prev_block.text.push_str(&text);
384                                        could_extend_text = true
385                                    }
386                                };
387                                if !could_extend_text {
388                                    let text_block =
389                                        TextBlock::new(text, block_style.clone(), TextType::Text);
390                                    markdown_paragraph.blocks.push(text_block);
391                                }
392                            }
393                            RunContent::Drawing(drawing) => {
394                                if let Some(inline) = &drawing.inline {
395                                    if let Some(graphic) = &inline.graphic {
396                                        let id = graphic.data.pic.fill.blip.embed.to_string();
397                                        if let Some(relationships) = &docx.document_rels {
398                                            if let Some(target) = relationships.get_target(&id) {
399                                                let descr = match &inline.doc_property.descr {
400                                                    Some(descr) => descr.to_string(),
401                                                    None => "".to_string(),
402                                                };
403                                                let img_text =
404                                                    format!("![{}](./{})", descr, target);
405                                                let text_block =
406                                                    TextBlock::new(img_text, None, TextType::Image);
407                                                markdown_paragraph.blocks.push(text_block);
408                                            }
409                                        }
410                                    }
411                                }
412                            }
413                            _ => (),
414                        }
415                    }
416                }
417                ParagraphContent::Link(link) => {
418                    let descr = link.content.content.first();
419                    let target = match &link.anchor {
420                        Some(anchor) => Some(format!("#{}", anchor.to_string())),
421                        None => match &link.id {
422                            Some(id) => match &docx.document_rels {
423                                Some(doc_relationships) => {
424                                    doc_relationships.relationships.iter().find_map(|r| {
425                                        if r.id == *id {
426                                            Some(r.target.to_string())
427                                        } else {
428                                            None
429                                        }
430                                    })
431                                }
432                                None => None,
433                            },
434                            None => None,
435                        },
436                    };
437                    if let (Some(RunContent::Text(descr)), Some(target)) = (descr, target) {
438                        let link = format!("[{}]({})", descr.text, target);
439                        let text_block = TextBlock::new(link, None, TextType::Link);
440                        markdown_paragraph.blocks.push(text_block);
441                    }
442                }
443                ParagraphContent::BookmarkStart(bookmark_start) => {
444                    if let Some(name) = &bookmark_start.name {
445                        let bookmark = format!(r#"<a name="{}"></a>"#, name);
446                        let text_block = TextBlock::new(bookmark, None, TextType::BookmarkLink);
447                        markdown_paragraph.blocks.push(text_block);
448                    }
449                }
450                _ => (),
451            }
452        }
453        markdown_paragraph
454    }
455}
456
457#[derive(Debug, Serialize)]
458#[serde(rename_all = "camelCase")]
459pub struct MarkdownDocument {
460    #[serde(skip_serializing_if = "Option::is_none")]
461    pub creator: Option<String>,
462    #[serde(skip_serializing_if = "Option::is_none")]
463    pub last_editor: Option<String>,
464    #[serde(skip_serializing_if = "Option::is_none")]
465    pub company: Option<String>,
466    #[serde(skip_serializing_if = "Option::is_none")]
467    pub title: Option<String>,
468    #[serde(skip_serializing_if = "Option::is_none")]
469    pub description: Option<String>,
470    #[serde(skip_serializing_if = "Option::is_none")]
471    pub subject: Option<String>,
472    #[serde(skip_serializing_if = "Option::is_none")]
473    pub keywords: Option<String>,
474    pub content: Vec<MarkdownContent>,
475    pub styles: HashMap<String, ParagraphStyle>,
476    pub numberings: HashMap<isize, MarkdownNumbering>,
477    #[serde(serialize_with = "serialize_images")]
478    pub images: HashMap<String, Vec<u8>>,
479}
480
481impl MarkdownDocument {
482    pub fn new() -> Self {
483        MarkdownDocument {
484            creator: None,
485            last_editor: None,
486            company: None,
487            title: None,
488            description: None,
489            subject: None,
490            keywords: None,
491            content: vec![],
492            styles: HashMap::new(),
493            numberings: HashMap::new(),
494            images: HashMap::new(),
495        }
496    }
497
498    pub fn from_file<P: AsRef<Path>>(path: P) -> Self {
499        let mut markdown_doc = MarkdownDocument::new();
500
501        let docx = match DocxFile::from_file(path) {
502            Ok(docx_file) => docx_file,
503            Err(err) => {
504                panic!("Error processing file: {:?}", err)
505            }
506        };
507        let docx = match docx.parse() {
508            Ok(docx) => docx,
509            Err(err) => {
510                panic!("Exiting: {:?}", err);
511            }
512        };
513
514        // println!("{:?}", &docx);
515
516        if let Some(app) = &docx.app {
517            if let Some(company) = &app.company {
518                if !company.is_empty() {
519                    markdown_doc.company = Some(company.to_string());
520                }
521            }
522        }
523
524        if let Some(core) = &docx.core {
525            if let Some(title) = &core.title {
526                if !title.is_empty() {
527                    markdown_doc.title = Some(title.to_string());
528                }
529            }
530            if let Some(subject) = &core.subject {
531                if !subject.is_empty() {
532                    markdown_doc.subject = Some(subject.to_string());
533                }
534            }
535            if let Some(keywords) = &core.keywords {
536                if !keywords.is_empty() {
537                    markdown_doc.keywords = Some(keywords.to_string());
538                }
539            }
540            if let Some(description) = &core.description {
541                if !description.is_empty() {
542                    markdown_doc.description = Some(description.to_string());
543                }
544            }
545            if let Some(creator) = &core.creator {
546                if !creator.is_empty() {
547                    markdown_doc.creator = Some(creator.to_string());
548                }
549            }
550            if let Some(last_modified_by) = &core.last_modified_by {
551                if !last_modified_by.is_empty() {
552                    markdown_doc.last_editor = Some(last_modified_by.to_string());
553                }
554            }
555        }
556
557        if let Some(numbering) = &docx.numbering {
558            numbering.numberings.iter().for_each(|n| {
559                if let Some(id) = n.num_id {
560                    if let Some(details) = numbering.numbering_details(id) {
561                        markdown_doc.numberings.insert(
562                            id,
563                            MarkdownNumbering {
564                                id: Some(id),
565                                indent_level: None,
566                                format: details.levels[0]
567                                    .number_format
568                                    .as_ref()
569                                    .map(|i| i.value.to_string()),
570                                level_text: details.levels[0]
571                                    .level_text
572                                    .as_ref()
573                                    .map(|i| i.value.to_string()),
574                            },
575                        );
576                        ()
577                    }
578                }
579            })
580        }
581
582        for (id, (MediaType::Image, media_data)) in &docx.media {
583            markdown_doc.images.insert(id.clone(), media_data.to_vec());
584        }
585
586        for style in &docx.styles.styles {
587            match style.ty {
588                Some(StyleType::Paragraph) => {
589                    if let Some(paragraph_property) = &style.paragraph {
590                        let paragraph_style: ParagraphStyle = paragraph_property.into();
591                        markdown_doc
592                            .styles
593                            .insert(style.style_id.to_string(), paragraph_style);
594                    }
595                }
596                _ => (),
597            }
598        }
599
600        for content in &docx.document.body.content {
601            match content {
602                Paragraph(paragraph) => {
603                    let markdown_paragraph = MarkdownParagraph::from_paragraph(&paragraph, &docx);
604                    if markdown_paragraph.blocks.len() > 0 {
605                        markdown_doc
606                            .content
607                            .push(MarkdownContent::Paragraph(markdown_paragraph));
608                    }
609                }
610                Table(table) => {
611                    let rows_columns: MarkdownTable = table
612                        .rows
613                        .iter()
614                        .map(|row| {
615                            let is_header = match &row.property.table_header {
616                                Some(table_header) => match table_header.value {
617                                    Some(OnOffOnlyType::On) => true,
618                                    _ => false,
619                                },
620                                None => false,
621                            };
622                            let cells: Vec<Vec<MarkdownParagraph>> = row
623                                .cells
624                                .iter()
625                                .filter_map(|row_content| match row_content {
626                                    TableRowContent::TableCell(cell) => {
627                                        let cells: Vec<MarkdownParagraph> = cell
628                                            .content
629                                            .iter()
630                                            .filter_map(|content| match content {
631                                                TableCellContent::Paragraph(paragraph) => {
632                                                    Some(MarkdownParagraph::from_paragraph(
633                                                        &paragraph, &docx,
634                                                    ))
635                                                } // _ => None,
636                                            })
637                                            .collect();
638                                        if cells.len() > 0 {
639                                            Some(cells)
640                                        } else {
641                                            None
642                                        }
643                                    }
644                                    _ => None,
645                                })
646                                .collect();
647                            MarkdownTableRow { is_header, cells }
648                        })
649                        .collect();
650
651                    markdown_doc
652                        .content
653                        .push(MarkdownContent::Table(rows_columns));
654                }
655                Sdt(_) => {
656                    // println!("Sdt");
657                }
658                SectionProperty(_sp) => {
659                    // println!("SectionProperty: {:?}", sp);
660                }
661                TableCell(tc) => {
662                    println!("TableCell: {:?}", tc);
663                }
664            }
665        }
666
667        markdown_doc
668    }
669
670    pub fn to_json(&self, pretty: bool) -> String {
671        if pretty {
672            serde_json::to_string_pretty(self).expect("Serialization failed")
673        } else {
674            serde_json::to_string(self).expect("Serialization failed")
675        }
676    }
677
678    pub fn to_markdown(&self, export_images: bool) -> String {
679        let mut markdown = String::new();
680
681        if let Some(title) = &self.title {
682            markdown += &format!("# {}\n\n", title);
683        }
684
685        let mut numberings: HashMap<isize, usize> = HashMap::new();
686
687        for (index, content) in self.content.iter().enumerate() {
688            match content {
689                MarkdownContent::Paragraph(paragraph) => {
690                    markdown += &paragraph.to_markdown(&self.styles, &mut numberings, &self);
691                    markdown += "\n";
692                }
693                MarkdownContent::Table(table) => {
694                    let table_with_simple_cells: Vec<(bool, Vec<String>)> = table
695                        .iter()
696                        .map(|MarkdownTableRow { is_header, cells }| {
697                            let row_content: &Vec<String> = &cells
698                                .iter()
699                                .map(|cell| {
700                                    let cell_content = &cell.iter().enumerate().fold(
701                                        "".to_string(),
702                                        |mut content, (i, paragraph)| {
703                                            let paragraph_as_markdown = &paragraph.to_markdown(
704                                                &self.styles,
705                                                &mut numberings,
706                                                &self,
707                                            );
708                                            if i + 1 < cell.len() {
709                                                content +=
710                                                    &format!("{}<br/>", paragraph_as_markdown);
711                                            } else {
712                                                content += paragraph_as_markdown;
713                                            }
714                                            content
715                                        },
716                                    );
717                                    cell_content.clone()
718                                })
719                                .collect();
720                            (is_header.clone(), row_content.clone())
721                        })
722                        .collect();
723                    let column_lengths = max_lengths_per_column(&table_with_simple_cells, 3);
724                    let divider = &table_row_to_markdown(
725                        &column_lengths,
726                        &column_lengths.iter().map(|i| "-".repeat(*i)).collect(),
727                    );
728                    let table = &table_with_simple_cells.iter().enumerate().fold(
729                        "".to_string(),
730                        |mut acc, (i, (is_header, row))| {
731                            let markdown_row = &table_row_to_markdown(&column_lengths, row);
732                            if i == 0 {
733                                if *is_header {
734                                    acc.push_str(markdown_row);
735                                    acc.push_str(divider);
736                                } else {
737                                    acc.push_str(&table_row_to_markdown(
738                                        &column_lengths,
739                                        &column_lengths.iter().map(|_| "".to_string()).collect(),
740                                    ));
741                                    acc.push_str(divider);
742                                    acc.push_str(markdown_row);
743                                }
744                            } else {
745                                acc.push_str(markdown_row);
746                            }
747                            if i == table_with_simple_cells.len() {
748                                acc.push_str("\n");
749                            }
750                            acc
751                        },
752                    );
753                    markdown += table;
754                }
755            };
756            if index != self.content.len() - 1 {
757                markdown += "\n";
758            }
759        }
760
761        if export_images {
762            for (image, data) in &self.images {
763                match save_image_to_file(image, data) {
764                    Ok(_) => (),
765                    Err(err) => eprintln!("{err}"),
766                };
767            }
768        }
769
770        markdown
771    }
772}
773
774#[derive(Debug, Serialize)]
775#[serde(rename_all = "camelCase")]
776pub enum MarkdownContent {
777    Paragraph(MarkdownParagraph),
778    Table(MarkdownTable),
779}
780
781pub type MarkdownTable = Vec<MarkdownTableRow>;
782
783#[derive(Debug, Serialize)]
784#[serde(rename_all = "camelCase")]
785pub struct MarkdownTableRow {
786    is_header: bool,
787    cells: Vec<MarkdownTableCell>,
788}
789
790pub type MarkdownTableCell = Vec<MarkdownParagraph>;
791
792#[cfg(test)]
793mod tests {
794    use std::fs;
795
796    // Import necessary items
797    use super::*;
798
799    #[test]
800    fn test_headers() {
801        let markdown_pandoc = fs::read_to_string("./test/headers.md").unwrap();
802        let markdown_doc = MarkdownDocument::from_file("./test/headers.docx");
803        let markdown = markdown_doc.to_markdown(false);
804        assert_eq!(markdown_pandoc, markdown);
805    }
806
807    #[test]
808    fn test_bullets() {
809        let markdown_pandoc = fs::read_to_string("./test/lists.md").unwrap();
810        let markdown_doc = MarkdownDocument::from_file("./test/lists.docx");
811        let markdown = markdown_doc.to_markdown(false);
812        assert_eq!(markdown_pandoc, markdown);
813    }
814
815    #[test]
816    fn test_images() {
817        let markdown_pandoc = fs::read_to_string("./test/image.md").unwrap();
818        let markdown_doc = MarkdownDocument::from_file("./test/image.docx");
819        let markdown = markdown_doc.to_markdown(false);
820        assert_eq!(markdown_pandoc, markdown);
821    }
822
823    #[test]
824    fn test_links() {
825        let markdown_pandoc = fs::read_to_string("./test/links.md").unwrap();
826        let markdown_doc = MarkdownDocument::from_file("./test/links.docx");
827        let markdown = markdown_doc.to_markdown(false);
828        assert_eq!(markdown_pandoc, markdown);
829    }
830
831    #[test]
832    fn test_tables() {
833        let markdown_pandoc = fs::read_to_string("./test/tables.md").unwrap();
834        let markdown_doc = MarkdownDocument::from_file("./test/tables.docx");
835        let markdown = markdown_doc.to_markdown(false);
836        assert_eq!(markdown_pandoc, markdown);
837    }
838
839    #[test]
840    fn test_one_row_table() {
841        let markdown_pandoc = fs::read_to_string("./test/table_one_row.md").unwrap();
842        let markdown_doc = MarkdownDocument::from_file("./test/table_one_row.docx");
843        let markdown = markdown_doc.to_markdown(false);
844        assert_eq!(markdown_pandoc, markdown);
845    }
846
847    #[test]
848    fn test_table_with_list_cell() {
849        let markdown_pandoc = fs::read_to_string("./test/table_with_list_cell.md").unwrap();
850        let markdown_doc = MarkdownDocument::from_file("./test/table_with_list_cell.docx");
851        let markdown = markdown_doc.to_markdown(false);
852        assert_eq!(markdown_pandoc, markdown);
853    }
854
855    #[test]
856    fn test_tables_separated_with_rawblock() {
857        let markdown_pandoc =
858            fs::read_to_string("./test/tables_separated_with_rawblock.md").unwrap();
859        let markdown_doc =
860            MarkdownDocument::from_file("./test/tables_separated_with_rawblock.docx");
861        let markdown = markdown_doc.to_markdown(false);
862        assert_eq!(markdown_pandoc, markdown);
863    }
864}