use crate::markdown::{to_markdown, to_markdown_images};
use crate::ImageMode;
#[derive(Debug, Clone, PartialEq)]
pub struct DoclingDocument {
pub name: String,
pub nodes: Vec<Node>,
pub strict_markdown: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Node {
Heading { level: u8, text: String },
Paragraph { text: String },
ListItem {
ordered: bool,
number: u64,
first_in_list: bool,
text: String,
level: u8,
},
Code {
language: Option<String>,
text: String,
},
Table(Table),
Picture {
caption: Option<String>,
image: Option<PictureImage>,
},
Group { label: String, children: Vec<Node> },
}
#[derive(Debug, Clone, PartialEq)]
pub struct PictureImage {
pub mimetype: String,
pub width: u32,
pub height: u32,
pub data: Vec<u8>,
}
impl PictureImage {
pub fn data_uri(&self) -> String {
format!(
"data:{};base64,{}",
self.mimetype,
crate::base64::encode(&self.data)
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Table {
pub rows: Vec<Vec<String>>,
}
impl DoclingDocument {
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
nodes: Vec::new(),
strict_markdown: false,
}
}
pub fn push(&mut self, node: Node) {
self.nodes.push(node);
}
pub fn add_heading(&mut self, level: u8, text: impl Into<String>) {
self.push(Node::Heading {
level,
text: text.into(),
});
}
pub fn add_paragraph(&mut self, text: impl Into<String>) {
self.push(Node::Paragraph { text: text.into() });
}
pub fn export_to_markdown(&self) -> String {
to_markdown(self, self.strict_markdown)
}
pub fn export_to_markdown_with(&self, strict: bool) -> String {
to_markdown(self, strict)
}
pub fn export_to_json(&self) -> String {
serde_json::to_string_pretty(&crate::json::to_json(self))
.expect("DoclingDocument JSON is always serializable")
}
pub fn export_to_markdown_with_images(
&self,
image_mode: ImageMode,
artifacts_dir: &str,
) -> (String, Vec<(String, Vec<u8>)>) {
to_markdown_images(self, self.strict_markdown, image_mode, artifacts_dir)
}
}